You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

107 lines
2.4KB

  1. #include <x86intrin.h>
  2. namespace rack {
  3. namespace simd {
  4. template <int N>
  5. struct f32;
  6. /** Wrapper for `__m128` representing a vector of 4 single-precision float values. */
  7. template <>
  8. struct f32<4> {
  9. __m128 v;
  10. f32<4>() {}
  11. f32<4>(__m128 v) : v(v) {}
  12. f32<4>(float x) {
  13. v = _mm_set_ps1(x);
  14. }
  15. /** Reads an array of 4 values */
  16. f32<4>(const float *x) {
  17. v = _mm_loadu_ps(x);
  18. }
  19. /** Writes an array of 4 values */
  20. void store(float *x) {
  21. _mm_storeu_ps(x, v);
  22. }
  23. };
  24. typedef f32<4> f32_4;
  25. // Operator overloads
  26. #define DECLARE_F32_4_OPERATOR_INFIX(operator, func) \
  27. inline f32_4 operator(f32_4 a, f32_4 b) { \
  28. return f32_4(func(a.v, b.v)); \
  29. } \
  30. template <typename T> \
  31. f32_4 operator(T a, f32_4 b) { \
  32. return operator(f32_4(a), b); \
  33. } \
  34. template <typename T> \
  35. f32_4 operator(f32_4 a, T b) { \
  36. return operator(a, f32_4(b)); \
  37. }
  38. DECLARE_F32_4_OPERATOR_INFIX(operator+, _mm_add_ps)
  39. DECLARE_F32_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
  40. DECLARE_F32_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
  41. DECLARE_F32_4_OPERATOR_INFIX(operator/, _mm_div_ps)
  42. DECLARE_F32_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
  43. DECLARE_F32_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
  44. DECLARE_F32_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
  45. DECLARE_F32_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
  46. DECLARE_F32_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
  47. DECLARE_F32_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
  48. #define DECLARE_F32_4_OPERATOR_INCREMENT(operator, func) \
  49. inline f32_4 &operator(f32_4 &a, f32_4 b) { \
  50. a.v = func(a.v, b.v); \
  51. return a; \
  52. } \
  53. template <typename T> \
  54. f32_4 &operator(f32_4 &a, T b) { \
  55. return operator(a, f32_4(b)); \
  56. }
  57. DECLARE_F32_4_OPERATOR_INCREMENT(operator+=, _mm_add_ps);
  58. DECLARE_F32_4_OPERATOR_INCREMENT(operator-=, _mm_sub_ps);
  59. DECLARE_F32_4_OPERATOR_INCREMENT(operator*=, _mm_mul_ps);
  60. DECLARE_F32_4_OPERATOR_INCREMENT(operator/=, _mm_div_ps);
  61. inline f32_4 rsqrt(f32_4 a) {
  62. return f32_4(_mm_rsqrt_ps(a.v));
  63. }
  64. inline f32_4 rcp(f32_4 a) {
  65. return f32_4(_mm_rcp_ps(a.v));
  66. }
  67. } // namespace simd
  68. } // namespace rack
  69. namespace std {
  70. inline rack::simd::f32_4 max(rack::simd::f32_4 a, rack::simd::f32_4 b) {
  71. return rack::simd::f32_4(_mm_max_ps(a.v, b.v));
  72. }
  73. inline rack::simd::f32_4 min(rack::simd::f32_4 a, rack::simd::f32_4 b) {
  74. return rack::simd::f32_4(_mm_min_ps(a.v, b.v));
  75. }
  76. inline rack::simd::f32_4 sqrt(rack::simd::f32_4 a) {
  77. return rack::simd::f32_4(_mm_sqrt_ps(a.v));
  78. }
  79. } // namespace std