You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
5.4KB

  1. #pragma once
  2. #include <simd/Vector.hpp>
  3. #include <simd/sse_mathfun_extension.h>
  4. #include <math.hpp>
  5. namespace rack {
  6. namespace simd {
  7. // Functions based on instructions
  8. /** `~a & b` */
  9. inline float_4 andnot(float_4 a, float_4 b) {
  10. return float_4(_mm_andnot_ps(a.v, b.v));
  11. }
  12. /** Returns an integer with each bit corresponding to the most significant bit of each element.
  13. For example, `movemask(float_4::mask())` returns 0xf.
  14. */
  15. inline int movemask(float_4 a) {
  16. return _mm_movemask_ps(a.v);
  17. }
  18. /** Returns an integer with each bit corresponding to the most significant bit of each element.
  19. For example, `movemask(int32_4::mask())` returns 0xf.
  20. */
  21. inline int movemask(int32_4 a) {
  22. return _mm_movemask_ps(_mm_castsi128_ps(a.v));
  23. }
  24. /** Returns the approximate reciprocal square root.
  25. Much faster than `1/sqrt(x)`.
  26. */
  27. inline float_4 rsqrt(float_4 x) {
  28. return float_4(_mm_rsqrt_ps(x.v));
  29. }
  30. /** Returns the approximate reciprocal.
  31. Much faster than `1/x`.
  32. */
  33. inline float_4 rcp(float_4 x) {
  34. return float_4(_mm_rcp_ps(x.v));
  35. }
  36. // Nonstandard convenience functions
  37. inline float ifelse(bool cond, float a, float b) {
  38. return cond ? a : b;
  39. }
  40. /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
  41. inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) {
  42. return (a & mask) | andnot(mask, b);
  43. }
  44. /** Returns a vector where element N is all 1's if the N'th bit of `a` is 1, or all 0's if the N'th bit of `a` is 0.
  45. */
  46. template <typename T>
  47. T movemaskInverse(int a);
  48. template <>
  49. inline int32_4 movemaskInverse<int32_4>(int a) {
  50. // Pick out N'th bit of `a` and check if it's 1.
  51. int32_4 mask1234 = int32_4(1, 2, 4, 8);
  52. return (mask1234 & int32_4(a)) == mask1234;
  53. }
  54. template <>
  55. inline float_4 movemaskInverse<float_4>(int a) {
  56. return float_4::cast(movemaskInverse<int32_4>(a));
  57. }
  58. // Standard math functions from std::
  59. /* Import std:: math functions into the simd namespace so you can use `sin(T)` etc in templated functions and get both the scalar and vector versions.
  60. Example:
  61. template <typename T>
  62. T sin_plus_cos(T x) {
  63. return simd::sin(x) + simd::cos(x);
  64. }
  65. */
  66. using std::fmax;
  67. inline float_4 fmax(float_4 x, float_4 b) {
  68. return float_4(_mm_max_ps(x.v, b.v));
  69. }
  70. using std::fmin;
  71. inline float_4 fmin(float_4 x, float_4 b) {
  72. return float_4(_mm_min_ps(x.v, b.v));
  73. }
  74. using std::sqrt;
  75. inline float_4 sqrt(float_4 x) {
  76. return float_4(_mm_sqrt_ps(x.v));
  77. }
  78. using std::log;
  79. inline float_4 log(float_4 x) {
  80. return float_4(sse_mathfun_log_ps(x.v));
  81. }
  82. using std::log10;
  83. inline float_4 log10(float_4 x) {
  84. return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
  85. }
  86. using std::log2;
  87. inline float_4 log2(float_4 x) {
  88. return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
  89. }
  90. using std::exp;
  91. inline float_4 exp(float_4 x) {
  92. return float_4(sse_mathfun_exp_ps(x.v));
  93. }
  94. using std::sin;
  95. inline float_4 sin(float_4 x) {
  96. return float_4(sse_mathfun_sin_ps(x.v));
  97. }
  98. using std::cos;
  99. inline float_4 cos(float_4 x) {
  100. return float_4(sse_mathfun_cos_ps(x.v));
  101. }
  102. using std::tan;
  103. inline float_4 tan(float_4 x) {
  104. return float_4(sse_mathfun_tan_ps(x.v));
  105. }
  106. using std::atan;
  107. inline float_4 atan(float_4 x) {
  108. return float_4(sse_mathfun_atan_ps(x.v));
  109. }
  110. using std::atan2;
  111. inline float_4 atan2(float_4 x, float_4 y) {
  112. return float_4(sse_mathfun_atan2_ps(x.v, y.v));
  113. }
  114. using std::trunc;
  115. // SIMDe defines _MM_FROUND_NO_EXC with a prefix
  116. #ifndef _MM_FROUND_NO_EXC
  117. #define _MM_FROUND_NO_EXC SIMDE_MM_FROUND_NO_EXC
  118. #endif
  119. inline float_4 trunc(float_4 a) {
  120. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
  121. }
  122. using std::floor;
  123. inline float_4 floor(float_4 a) {
  124. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
  125. }
  126. using std::ceil;
  127. inline float_4 ceil(float_4 a) {
  128. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
  129. }
  130. using std::round;
  131. inline float_4 round(float_4 a) {
  132. return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
  133. }
  134. using std::fmod;
  135. inline float_4 fmod(float_4 a, float_4 b) {
  136. return a - floor(a / b) * b;
  137. }
  138. using std::hypot;
  139. inline float_4 hypot(float_4 a, float_4 b) {
  140. return sqrt(a * a + b * b);
  141. }
  142. using std::fabs;
  143. inline float_4 fabs(float_4 a) {
  144. // Sign bit
  145. int32_4 mask = ~0x80000000;
  146. return a & float_4::cast(mask);
  147. }
  148. using std::abs;
  149. inline float_4 abs(float_4 a) {
  150. return fabs(a);
  151. }
  152. inline float_4 abs(std::complex<float_4> a) {
  153. return hypot(a.real(), a.imag());
  154. }
  155. using std::arg;
  156. inline float_4 arg(std::complex<float_4> a) {
  157. return atan2(a.imag(), a.real());
  158. }
  159. using std::pow;
  160. inline float_4 pow(float_4 a, float_4 b) {
  161. return exp(b * log(a));
  162. }
  163. inline float_4 pow(float a, float_4 b) {
  164. return exp(b * std::log(a));
  165. }
  166. template <typename T>
  167. T pow(T a, int b) {
  168. // Optimal with `-O3 -funsafe-math-optimizations` when b is known at compile-time
  169. T p = 1;
  170. for (int i = 1; i <= b; i *= 2) {
  171. if (i & b)
  172. p *= a;
  173. a *= a;
  174. }
  175. return p;
  176. }
  177. // From math.hpp
  178. using math::clamp;
  179. inline float_4 clamp(float_4 x, float_4 a = 0.f, float_4 b = 1.f) {
  180. return fmin(fmax(x, a), b);
  181. }
  182. using math::rescale;
  183. inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) {
  184. return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
  185. }
  186. using math::crossfade;
  187. inline float_4 crossfade(float_4 a, float_4 b, float_4 p) {
  188. return a + (b - a) * p;
  189. }
  190. using math::sgn;
  191. inline float_4 sgn(float_4 x) {
  192. float_4 signbit = x & -0.f;
  193. float_4 nonzero = (x != 0.f);
  194. return signbit | (nonzero & 1.f);
  195. }
  196. } // namespace simd
  197. } // namespace rack