You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

176 lines
3.0KB

  1. #pragma once
  2. #include "vector.hpp"
  3. #include "sse_mathfun.h"
  4. #include "math.hpp"
  5. #include <cmath>
  6. namespace rack {
  7. namespace simd {
  8. // Standard math functions from std::
  9. /* Import std:: math functions into the simd namespace so you can use `sin(T)` etc in templated functions and get both the scalar and vector versions.
  10. Example:
  11. template <typename T>
  12. T sin_plus_cos(T x) {
  13. using namespace simd;
  14. return sin(x) + cos(x);
  15. }
  16. */
  17. using std::fmax;
  18. inline f32_4 fmax(f32_4 x, f32_4 b) {
  19. return f32_4(_mm_max_ps(x.v, b.v));
  20. }
  21. using std::fmin;
  22. inline f32_4 fmin(f32_4 x, f32_4 b) {
  23. return f32_4(_mm_min_ps(x.v, b.v));
  24. }
  25. using std::sqrt;
  26. inline f32_4 sqrt(f32_4 x) {
  27. return f32_4(_mm_sqrt_ps(x.v));
  28. }
  29. using std::log;
  30. inline f32_4 log(f32_4 x) {
  31. return f32_4(sse_mathfun_log_ps(x.v));
  32. }
  33. using std::log10;
  34. inline f32_4 log10(f32_4 x) {
  35. return f32_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
  36. }
  37. using std::log2;
  38. inline f32_4 log2(f32_4 x) {
  39. return f32_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
  40. }
  41. using std::exp;
  42. inline f32_4 exp(f32_4 x) {
  43. return f32_4(sse_mathfun_exp_ps(x.v));
  44. }
  45. using std::sin;
  46. inline f32_4 sin(f32_4 x) {
  47. return f32_4(sse_mathfun_sin_ps(x.v));
  48. }
  49. using std::cos;
  50. inline f32_4 cos(f32_4 x) {
  51. return f32_4(sse_mathfun_cos_ps(x.v));
  52. }
  53. using std::floor;
  54. inline f32_4 floor(f32_4 a) {
  55. return f32_4(sse_mathfun_floor_ps(a.v));
  56. }
  57. using std::ceil;
  58. inline f32_4 ceil(f32_4 a) {
  59. return f32_4(sse_mathfun_ceil_ps(a.v));
  60. }
  61. using std::round;
  62. inline f32_4 round(f32_4 a) {
  63. return f32_4(sse_mathfun_round_ps(a.v));
  64. }
  65. using std::fmod;
  66. inline f32_4 fmod(f32_4 a, f32_4 b) {
  67. return f32_4(sse_mathfun_fmod_ps(a.v, b.v));
  68. }
  69. using std::fabs;
  70. inline f32_4 fabs(f32_4 a) {
  71. return f32_4(sse_mathfun_fabs_ps(a.v));
  72. }
  73. using std::trunc;
  74. inline f32_4 trunc(f32_4 a) {
  75. return f32_4(sse_mathfun_trunc_ps(a.v));
  76. }
  77. using std::pow;
  78. inline f32_4 pow(f32_4 a, f32_4 b) {
  79. return exp(b * log(a));
  80. }
  81. inline f32_4 pow(float a, f32_4 b) {
  82. return exp(b * std::log(a));
  83. }
  84. // Nonstandard functions
  85. inline float ifelse(bool cond, float a, float b) {
  86. return cond ? a : b;
  87. }
  88. /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
  89. inline f32_4 ifelse(f32_4 mask, f32_4 a, f32_4 b) {
  90. return (a & mask) | andnot(mask, b);
  91. }
  92. /** Returns the approximate reciprocal square root.
  93. Much faster than `1/sqrt(x)`.
  94. */
  95. inline f32_4 rsqrt(f32_4 x) {
  96. return f32_4(_mm_rsqrt_ps(x.v));
  97. }
  98. /** Returns the approximate reciprocal.
  99. Much faster than `1/x`.
  100. */
  101. inline f32_4 rcp(f32_4 x) {
  102. return f32_4(_mm_rcp_ps(x.v));
  103. }
  104. // From math.hpp
  105. using math::clamp;
  106. inline f32_4 clamp(f32_4 x, f32_4 a, f32_4 b) {
  107. return fmin(fmax(x, a), b);
  108. }
  109. using math::rescale;
  110. inline f32_4 rescale(f32_4 x, f32_4 xMin, f32_4 xMax, f32_4 yMin, f32_4 yMax) {
  111. return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
  112. }
  113. using math::sgn;
  114. inline f32_4 sgn(f32_4 x) {
  115. f32_4 signbit = x & -0.f;
  116. f32_4 nonzero = (x != 0.f);
  117. return signbit | (nonzero & 1.f);
  118. }
  119. } // namespace simd
  120. } // namespace rack