You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
4.2KB

  1. #pragma once
  2. #include <simd/vector.hpp>
  3. #include <simd/sse_mathfun_extension.h>
  4. #include <common.hpp>
  5. #include <math.hpp>
  6. namespace rack {
  7. namespace simd {
  8. // Nonstandard functions
  9. inline float ifelse(bool cond, float a, float b) {
  10. return cond ? a : b;
  11. }
  12. /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
  13. inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) {
  14. return (a & mask) | andnot(mask, b);
  15. }
  16. /** Returns the approximate reciprocal square root.
  17. Much faster than `1/sqrt(x)`.
  18. */
  19. inline float_4 rsqrt(float_4 x) {
  20. return float_4(_mm_rsqrt_ps(x.v));
  21. }
  22. /** Returns the approximate reciprocal.
  23. Much faster than `1/x`.
  24. */
  25. inline float_4 rcp(float_4 x) {
  26. return float_4(_mm_rcp_ps(x.v));
  27. }
  28. /** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit.
  29. */
  30. template <typename T>
  31. T movemaskInverse(int a);
  32. template <>
  33. inline float_4 movemaskInverse<float_4>(int x) {
  34. __m128i msk8421 = _mm_set_epi32(8, 4, 2, 1);
  35. __m128i x_bc = _mm_set1_epi32(x);
  36. __m128i t = _mm_and_si128(x_bc, msk8421);
  37. return float_4(_mm_castsi128_ps(_mm_cmpeq_epi32(x_bc, t)));
  38. }
  39. // Standard math functions from std::
  40. /* Import std:: math functions into the simd namespace so you can use `sin(T)` etc in templated functions and get both the scalar and vector versions.
  41. Example:
  42. template <typename T>
  43. T sin_plus_cos(T x) {
  44. return simd::sin(x) + simd::cos(x);
  45. }
  46. */
  47. using std::fmax;
  48. inline float_4 fmax(float_4 x, float_4 b) {
  49. return float_4(_mm_max_ps(x.v, b.v));
  50. }
  51. using std::fmin;
  52. inline float_4 fmin(float_4 x, float_4 b) {
  53. return float_4(_mm_min_ps(x.v, b.v));
  54. }
  55. using std::sqrt;
  56. inline float_4 sqrt(float_4 x) {
  57. return float_4(_mm_sqrt_ps(x.v));
  58. }
  59. using std::log;
  60. inline float_4 log(float_4 x) {
  61. return float_4(sse_mathfun_log_ps(x.v));
  62. }
  63. using std::log10;
  64. inline float_4 log10(float_4 x) {
  65. return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
  66. }
  67. using std::log2;
  68. inline float_4 log2(float_4 x) {
  69. return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
  70. }
  71. using std::exp;
  72. inline float_4 exp(float_4 x) {
  73. return float_4(sse_mathfun_exp_ps(x.v));
  74. }
  75. using std::sin;
  76. inline float_4 sin(float_4 x) {
  77. return float_4(sse_mathfun_sin_ps(x.v));
  78. }
  79. using std::cos;
  80. inline float_4 cos(float_4 x) {
  81. return float_4(sse_mathfun_cos_ps(x.v));
  82. }
  83. using std::tan;
  84. inline float_4 tan(float_4 x) {
  85. return float_4(sse_mathfun_tan_ps(x.v));
  86. }
  87. using std::atan;
  88. inline float_4 atan(float_4 x) {
  89. return float_4(sse_mathfun_atan_ps(x.v));
  90. }
  91. using std::atan2;
  92. inline float_4 atan2(float_4 x, float_4 y) {
  93. return float_4(sse_mathfun_atan2_ps(x.v, y.v));
  94. }
  95. using std::trunc;
  96. inline float_4 trunc(float_4 a) {
  97. return float_4(_mm_cvtepi32_ps(_mm_cvttps_epi32(a.v)));
  98. }
  99. using std::floor;
  100. inline float_4 floor(float_4 a) {
  101. float_4 b = trunc(a);
  102. b -= (b > a) & 1.f;
  103. return b;
  104. }
  105. using std::ceil;
  106. inline float_4 ceil(float_4 a) {
  107. float_4 b = trunc(a);
  108. b += (b < a) & 1.f;
  109. return b;
  110. }
  111. using std::round;
  112. inline float_4 round(float_4 a) {
  113. a += ifelse(a < 0, -0.5f, 0.5f);
  114. float_4 b = trunc(a);
  115. return b;
  116. }
  117. using std::fmod;
  118. inline float_4 fmod(float_4 a, float_4 b) {
  119. return a - trunc(a / b) * b;
  120. }
  121. using std::fabs;
  122. inline float_4 fabs(float_4 a) {
  123. // Sign bit
  124. int32_4 mask = ~0x80000000;
  125. return a & float_4::cast(mask);
  126. }
  127. using std::pow;
  128. inline float_4 pow(float_4 a, float_4 b) {
  129. return exp(b * log(a));
  130. }
  131. inline float_4 pow(float a, float_4 b) {
  132. return exp(b * std::log(a));
  133. }
  134. template <typename T>
  135. T pow(T a, int b) {
  136. // Optimal with `-O3 -funsafe-math-optimizations` when b is known at compile-time
  137. T p = 1;
  138. for (int i = 1; i <= b; i *= 2) {
  139. if (i & b)
  140. p *= a;
  141. a *= a;
  142. }
  143. return p;
  144. }
  145. // From math.hpp
  146. using math::clamp;
  147. inline float_4 clamp(float_4 x, float_4 a, float_4 b) {
  148. return fmin(fmax(x, a), b);
  149. }
  150. using math::rescale;
  151. inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) {
  152. return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
  153. }
  154. using math::crossfade;
  155. inline float_4 crossfade(float_4 a, float_4 b, float_4 p) {
  156. return a + (b - a) * p;
  157. }
  158. using math::sgn;
  159. inline float_4 sgn(float_4 x) {
  160. float_4 signbit = x & -0.f;
  161. float_4 nonzero = (x != 0.f);
  162. return signbit | (nonzero & 1.f);
  163. }
  164. } // namespace simd
  165. } // namespace rack