You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

257 lines
4.6KB

  1. #pragma once
  2. #include <simd/vector.hpp>
  3. #include <simd/sse_mathfun_extension.h>
  4. #include <common.hpp>
  5. #include <math.hpp>
  6. namespace rack {
  7. namespace simd {
  8. // Nonstandard functions
  9. inline float ifelse(bool cond, float a, float b) {
  10. return cond ? a : b;
  11. }
  12. /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
  13. inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) {
  14. return (a & mask) | andnot(mask, b);
  15. }
  16. /** Returns the approximate reciprocal square root.
  17. Much faster than `1/sqrt(x)`.
  18. */
  19. inline float_4 rsqrt(float_4 x) {
  20. return float_4(_mm_rsqrt_ps(x.v));
  21. }
  22. /** Returns the approximate reciprocal.
  23. Much faster than `1/x`.
  24. */
  25. inline float_4 rcp(float_4 x) {
  26. return float_4(_mm_rcp_ps(x.v));
  27. }
  28. /** Returns a vector where element N is all 1's if the N'th bit of `a` is 1, or all 0's if the N'th bit of `a` is 0.
  29. */
  30. template <typename T>
  31. T movemaskInverse(int a);
  32. template <>
  33. inline int32_4 movemaskInverse<int32_4>(int a) {
  34. int32_4 msk8421 = int32_4(1, 2, 4, 8);
  35. return (msk8421 & int32_4(a)) == msk8421;
  36. }
  37. template <>
  38. inline float_4 movemaskInverse<float_4>(int a) {
  39. return float_4::cast(movemaskInverse<int32_4>(a));
  40. }
  41. // Standard math functions from std::
  42. /* Import std:: math functions into the simd namespace so you can use `sin(T)` etc in templated functions and get both the scalar and vector versions.
  43. Example:
  44. template <typename T>
  45. T sin_plus_cos(T x) {
  46. return simd::sin(x) + simd::cos(x);
  47. }
  48. */
  49. using std::fmax;
  50. inline float_4 fmax(float_4 x, float_4 b) {
  51. return float_4(_mm_max_ps(x.v, b.v));
  52. }
  53. using std::fmin;
  54. inline float_4 fmin(float_4 x, float_4 b) {
  55. return float_4(_mm_min_ps(x.v, b.v));
  56. }
  57. using std::sqrt;
  58. inline float_4 sqrt(float_4 x) {
  59. return float_4(_mm_sqrt_ps(x.v));
  60. }
  61. using std::log;
  62. inline float_4 log(float_4 x) {
  63. return float_4(sse_mathfun_log_ps(x.v));
  64. }
  65. using std::log10;
  66. inline float_4 log10(float_4 x) {
  67. return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
  68. }
  69. using std::log2;
  70. inline float_4 log2(float_4 x) {
  71. return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
  72. }
  73. using std::exp;
  74. inline float_4 exp(float_4 x) {
  75. return float_4(sse_mathfun_exp_ps(x.v));
  76. }
  77. using std::sin;
  78. inline float_4 sin(float_4 x) {
  79. return float_4(sse_mathfun_sin_ps(x.v));
  80. }
  81. using std::cos;
  82. inline float_4 cos(float_4 x) {
  83. return float_4(sse_mathfun_cos_ps(x.v));
  84. }
  85. using std::tan;
  86. inline float_4 tan(float_4 x) {
  87. return float_4(sse_mathfun_tan_ps(x.v));
  88. }
  89. using std::atan;
  90. inline float_4 atan(float_4 x) {
  91. return float_4(sse_mathfun_atan_ps(x.v));
  92. }
  93. using std::atan2;
  94. inline float_4 atan2(float_4 x, float_4 y) {
  95. return float_4(sse_mathfun_atan2_ps(x.v, y.v));
  96. }
  97. using std::trunc;
  98. inline float_4 trunc(float_4 a) {
  99. return float_4(_mm_cvtepi32_ps(_mm_cvttps_epi32(a.v)));
  100. }
  101. using std::floor;
  102. inline float_4 floor(float_4 a) {
  103. float_4 b = trunc(a);
  104. b -= (b > a) & 1.f;
  105. return b;
  106. }
  107. using std::ceil;
  108. inline float_4 ceil(float_4 a) {
  109. float_4 b = trunc(a);
  110. b += (b < a) & 1.f;
  111. return b;
  112. }
  113. using std::round;
  114. inline float_4 round(float_4 a) {
  115. a += ifelse(a < 0, -0.5f, 0.5f);
  116. float_4 b = trunc(a);
  117. return b;
  118. }
  119. using std::fmod;
  120. inline float_4 fmod(float_4 a, float_4 b) {
  121. return a - trunc(a / b) * b;
  122. }
  123. using std::hypot;
  124. inline float_4 hypot(float_4 a, float_4 b) {
  125. return sqrt(a * a + b * b);
  126. }
  127. using std::fabs;
  128. inline float_4 fabs(float_4 a) {
  129. // Sign bit
  130. int32_4 mask = ~0x80000000;
  131. return a & float_4::cast(mask);
  132. }
  133. using std::abs;
  134. inline float_4 abs(float_4 a) {
  135. return fabs(a);
  136. }
  137. inline float_4 abs(std::complex<float_4> a) {
  138. return hypot(a.real(), a.imag());
  139. }
  140. using std::arg;
  141. inline float_4 arg(std::complex<float_4> a) {
  142. return atan2(a.imag(), a.real());
  143. }
  144. using std::pow;
  145. inline float_4 pow(float_4 a, float_4 b) {
  146. return exp(b * log(a));
  147. }
  148. inline float_4 pow(float a, float_4 b) {
  149. return exp(b * std::log(a));
  150. }
  151. template <typename T>
  152. T pow(T a, int b) {
  153. // Optimal with `-O3 -funsafe-math-optimizations` when b is known at compile-time
  154. T p = 1;
  155. for (int i = 1; i <= b; i *= 2) {
  156. if (i & b)
  157. p *= a;
  158. a *= a;
  159. }
  160. return p;
  161. }
  162. // From math.hpp
  163. using math::clamp;
  164. inline float_4 clamp(float_4 x, float_4 a, float_4 b) {
  165. return fmin(fmax(x, a), b);
  166. }
  167. using math::rescale;
  168. inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) {
  169. return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
  170. }
  171. using math::crossfade;
  172. inline float_4 crossfade(float_4 a, float_4 b, float_4 p) {
  173. return a + (b - a) * p;
  174. }
  175. using math::sgn;
  176. inline float_4 sgn(float_4 x) {
  177. float_4 signbit = x & -0.f;
  178. float_4 nonzero = (x != 0.f);
  179. return signbit | (nonzero & 1.f);
  180. }
  181. } // namespace simd
  182. } // namespace rack