You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

279 lines
5.5KB

  1. #pragma once
  2. #include <simd/Vector.hpp>
  3. #include <simd/sse_mathfun_extension.h>
  4. #include <common.hpp>
  5. #include <math.hpp>
  6. namespace rack {
  7. namespace simd {
  8. // Functions based on instructions
  9. /** `~a & b` */
  10. inline float_4 andnot(float_4 a, float_4 b) {
  11. return float_4(_mm_andnot_ps(a.v, b.v));
  12. }
  13. /** Returns an integer with each bit corresponding to the most significant bit of each element.
  14. For example, `movemask(float_4::mask())` returns 0xf.
  15. */
  16. inline int movemask(float_4 a) {
  17. return _mm_movemask_ps(a.v);
  18. }
  19. /** Returns an integer with each bit corresponding to the most significant bit of each element.
  20. For example, `movemask(int32_4::mask())` returns 0xf.
  21. */
  22. inline int movemask(int32_4 a) {
  23. return _mm_movemask_ps(_mm_castsi128_ps(a.v));
  24. }
  25. /** Returns the approximate reciprocal square root.
  26. Much faster than `1/sqrt(x)`.
  27. */
  28. inline float_4 rsqrt(float_4 x) {
  29. return float_4(_mm_rsqrt_ps(x.v));
  30. }
  31. /** Returns the approximate reciprocal.
  32. Much faster than `1/x`.
  33. */
  34. inline float_4 rcp(float_4 x) {
  35. return float_4(_mm_rcp_ps(x.v));
  36. }
  37. // Nonstandard convenience functions
  38. inline float ifelse(bool cond, float a, float b) {
  39. return cond ? a : b;
  40. }
  41. /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
  42. inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) {
  43. return (a & mask) | andnot(mask, b);
  44. }
  45. /** Returns a vector where element N is all 1's if the N'th bit of `a` is 1, or all 0's if the N'th bit of `a` is 0.
  46. */
  47. template <typename T>
  48. T movemaskInverse(int a);
  49. template <>
  50. inline int32_4 movemaskInverse<int32_4>(int a) {
  51. // Pick out N'th bit of `a` and check if it's 1.
  52. int32_4 mask1234 = int32_4(1, 2, 4, 8);
  53. return (mask1234 & int32_4(a)) == mask1234;
  54. }
  55. template <>
  56. inline float_4 movemaskInverse<float_4>(int a) {
  57. return float_4::cast(movemaskInverse<int32_4>(a));
  58. }
  59. // Standard math functions from std::
  60. /* Import std:: math functions into the simd namespace so you can use `sin(T)` etc in templated functions and get both the scalar and vector versions.
  61. Example:
  62. template <typename T>
  63. T sin_plus_cos(T x) {
  64. return simd::sin(x) + simd::cos(x);
  65. }
  66. */
  67. using std::fmax;
  68. inline float_4 fmax(float_4 x, float_4 b) {
  69. return float_4(_mm_max_ps(x.v, b.v));
  70. }
  71. using std::fmin;
  72. inline float_4 fmin(float_4 x, float_4 b) {
  73. return float_4(_mm_min_ps(x.v, b.v));
  74. }
  75. using std::sqrt;
  76. inline float_4 sqrt(float_4 x) {
  77. return float_4(_mm_sqrt_ps(x.v));
  78. }
  79. using std::log;
  80. inline float_4 log(float_4 x) {
  81. return float_4(sse_mathfun_log_ps(x.v));
  82. }
  83. using std::log10;
  84. inline float_4 log10(float_4 x) {
  85. return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
  86. }
  87. using std::log2;
  88. inline float_4 log2(float_4 x) {
  89. return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
  90. }
  91. using std::exp;
  92. inline float_4 exp(float_4 x) {
  93. return float_4(sse_mathfun_exp_ps(x.v));
  94. }
  95. using std::sin;
  96. inline float_4 sin(float_4 x) {
  97. return float_4(sse_mathfun_sin_ps(x.v));
  98. }
  99. using std::cos;
  100. inline float_4 cos(float_4 x) {
  101. return float_4(sse_mathfun_cos_ps(x.v));
  102. }
  103. using std::tan;
  104. inline float_4 tan(float_4 x) {
  105. return float_4(sse_mathfun_tan_ps(x.v));
  106. }
  107. using std::atan;
  108. inline float_4 atan(float_4 x) {
  109. return float_4(sse_mathfun_atan_ps(x.v));
  110. }
  111. using std::atan2;
  112. inline float_4 atan2(float_4 x, float_4 y) {
  113. return float_4(sse_mathfun_atan2_ps(x.v, y.v));
  114. }
  115. using std::trunc;
  116. // SIMDe defines _MM_FROUND_NO_EXC with a prefix
  117. #ifndef _MM_FROUND_NO_EXC
  118. #define _MM_FROUND_NO_EXC SIMDE_MM_FROUND_NO_EXC
  119. #endif
  120. inline float_4 trunc(float_4 a) {
  121. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
  122. }
  123. using std::floor;
  124. inline float_4 floor(float_4 a) {
  125. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
  126. }
  127. using std::ceil;
  128. inline float_4 ceil(float_4 a) {
  129. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
  130. }
  131. using std::round;
  132. inline float_4 round(float_4 a) {
  133. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
  134. }
  135. using std::fmod;
  136. inline float_4 fmod(float_4 a, float_4 b) {
  137. return a - floor(a / b) * b;
  138. }
  139. using std::hypot;
  140. inline float_4 hypot(float_4 a, float_4 b) {
  141. return sqrt(a * a + b * b);
  142. }
  143. using std::fabs;
  144. inline float_4 fabs(float_4 a) {
  145. // Sign bit
  146. int32_4 mask = ~0x80000000;
  147. return a & float_4::cast(mask);
  148. }
  149. using std::abs;
  150. inline float_4 abs(float_4 a) {
  151. return fabs(a);
  152. }
  153. inline float_4 abs(std::complex<float_4> a) {
  154. return hypot(a.real(), a.imag());
  155. }
  156. using std::arg;
  157. inline float_4 arg(std::complex<float_4> a) {
  158. return atan2(a.imag(), a.real());
  159. }
  160. using std::pow;
  161. inline float_4 pow(float_4 a, float_4 b) {
  162. return exp(b * log(a));
  163. }
  164. inline float_4 pow(float a, float_4 b) {
  165. return exp(b * std::log(a));
  166. }
  167. template <typename T>
  168. T pow(T a, int b) {
  169. // Optimal with `-O3 -funsafe-math-optimizations` when b is known at compile-time
  170. T p = 1;
  171. for (int i = 1; i <= b; i *= 2) {
  172. if (i & b)
  173. p *= a;
  174. a *= a;
  175. }
  176. return p;
  177. }
  178. // From math.hpp
  179. using math::clamp;
  180. inline float_4 clamp(float_4 x, float_4 a = 0.f, float_4 b = 1.f) {
  181. return fmin(fmax(x, a), b);
  182. }
  183. using math::rescale;
  184. inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) {
  185. return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
  186. }
  187. using math::crossfade;
  188. inline float_4 crossfade(float_4 a, float_4 b, float_4 p) {
  189. return a + (b - a) * p;
  190. }
  191. using math::sgn;
  192. inline float_4 sgn(float_4 x) {
  193. float_4 signbit = x & -0.f;
  194. float_4 nonzero = (x != 0.f);
  195. return signbit | (nonzero & 1.f);
  196. }
  197. } // namespace simd
  198. } // namespace rack