You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

functions.hpp 4.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. #pragma once
  2. #include <simd/vector.hpp>
  3. #include <simd/sse_mathfun_extension.h>
  4. #include <common.hpp>
  5. #include <math.hpp>
  6. namespace rack {
  7. namespace simd {
  8. // Standard math functions from std::
  9. /* Import std:: math functions into the simd namespace so you can use `sin(T)` etc in templated functions and get both the scalar and vector versions.
  10. Example:
  11. template <typename T>
  12. T sin_plus_cos(T x) {
  13. return simd::sin(x) + simd::cos(x);
  14. }
  15. */
  16. using std::fmax;
  17. inline float_4 fmax(float_4 x, float_4 b) {
  18. return float_4(_mm_max_ps(x.v, b.v));
  19. }
  20. using std::fmin;
  21. inline float_4 fmin(float_4 x, float_4 b) {
  22. return float_4(_mm_min_ps(x.v, b.v));
  23. }
  24. using std::sqrt;
  25. inline float_4 sqrt(float_4 x) {
  26. return float_4(_mm_sqrt_ps(x.v));
  27. }
  28. using std::log;
  29. inline float_4 log(float_4 x) {
  30. return float_4(sse_mathfun_log_ps(x.v));
  31. }
  32. using std::log10;
  33. inline float_4 log10(float_4 x) {
  34. return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
  35. }
  36. using std::log2;
  37. inline float_4 log2(float_4 x) {
  38. return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
  39. }
  40. using std::exp;
  41. inline float_4 exp(float_4 x) {
  42. return float_4(sse_mathfun_exp_ps(x.v));
  43. }
  44. using std::sin;
  45. inline float_4 sin(float_4 x) {
  46. return float_4(sse_mathfun_sin_ps(x.v));
  47. }
  48. using std::cos;
  49. inline float_4 cos(float_4 x) {
  50. return float_4(sse_mathfun_cos_ps(x.v));
  51. }
  52. using std::tan;
  53. inline float_4 tan(float_4 x) {
  54. return float_4(sse_mathfun_tan_ps(x.v));
  55. }
  56. using std::atan;
  57. inline float_4 atan(float_4 x) {
  58. return float_4(sse_mathfun_atan_ps(x.v));
  59. }
  60. using std::atan2;
  61. inline float_4 atan2(float_4 x, float_4 y) {
  62. return float_4(sse_mathfun_atan2_ps(x.v, y.v));
  63. }
  64. using std::floor;
  65. inline float_4 floor(float_4 a) {
  66. return float_4(sse_mathfun_floor_ps(a.v));
  67. }
  68. using std::ceil;
  69. inline float_4 ceil(float_4 a) {
  70. return float_4(sse_mathfun_ceil_ps(a.v));
  71. }
  72. using std::round;
  73. inline float_4 round(float_4 a) {
  74. return float_4(sse_mathfun_round_ps(a.v));
  75. }
  76. using std::fmod;
  77. inline float_4 fmod(float_4 a, float_4 b) {
  78. return float_4(sse_mathfun_fmod_ps(a.v, b.v));
  79. }
  80. using std::fabs;
  81. inline float_4 fabs(float_4 a) {
  82. return float_4(sse_mathfun_fabs_ps(a.v));
  83. }
  84. using std::trunc;
  85. inline float_4 trunc(float_4 a) {
  86. return float_4(sse_mathfun_trunc_ps(a.v));
  87. }
  88. using std::pow;
  89. inline float_4 pow(float_4 a, float_4 b) {
  90. return exp(b * log(a));
  91. }
  92. inline float_4 pow(float a, float_4 b) {
  93. return exp(b * std::log(a));
  94. }
  95. template <typename T>
  96. T pow(T a, int b) {
  97. // Optimal with `-O3 -funsafe-math-optimizations` when b is known at compile-time
  98. T p = 1;
  99. for (int i = 1; i <= b; i *= 2) {
  100. if (i & b)
  101. p *= a;
  102. a *= a;
  103. }
  104. return p;
  105. }
  106. // Nonstandard functions
  107. inline float ifelse(bool cond, float a, float b) {
  108. return cond ? a : b;
  109. }
  110. /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
  111. inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) {
  112. return (a & mask) | andnot(mask, b);
  113. }
  114. /** Returns the approximate reciprocal square root.
  115. Much faster than `1/sqrt(x)`.
  116. */
  117. inline float_4 rsqrt(float_4 x) {
  118. return float_4(_mm_rsqrt_ps(x.v));
  119. }
  120. /** Returns the approximate reciprocal.
  121. Much faster than `1/x`.
  122. */
  123. inline float_4 rcp(float_4 x) {
  124. return float_4(_mm_rcp_ps(x.v));
  125. }
  126. // From math.hpp
  127. using math::clamp;
  128. inline float_4 clamp(float_4 x, float_4 a, float_4 b) {
  129. return fmin(fmax(x, a), b);
  130. }
  131. using math::rescale;
  132. inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) {
  133. return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
  134. }
  135. using math::crossfade;
  136. inline float_4 crossfade(float_4 a, float_4 b, float_4 p) {
  137. return a + (b - a) * p;
  138. }
  139. using math::sgn;
  140. inline float_4 sgn(float_4 x) {
  141. float_4 signbit = x & -0.f;
  142. float_4 nonzero = (x != 0.f);
  143. return signbit | (nonzero & 1.f);
  144. }
  145. /** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit. */
  146. template <typename T>
  147. inline T movemaskInverse(int a);
  148. template <>
  149. inline float_4 movemaskInverse<float_4>(int x) {
  150. __m128i msk8421 = _mm_set_epi32(8, 4, 2, 1);
  151. __m128i x_bc = _mm_set1_epi32(x);
  152. __m128i t = _mm_and_si128(x_bc, msk8421);
  153. return float_4(_mm_castsi128_ps(_mm_cmpeq_epi32(x_bc, t)));
  154. }
  155. } // namespace simd
  156. } // namespace rack