You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

vector.hpp 4.0KB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #pragma once
  2. #include <cstring>
  3. #include <emmintrin.h>
  4. namespace rack {
  5. namespace simd {
  6. /** Casts the literal bits of FROM to TO without type conversion.
  7. API copied from C++20.
  8. Usage example:
  9. printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000
  10. */
  11. template <typename TO, typename FROM>
  12. TO bit_cast(const FROM &x) {
  13. static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size");
  14. // Should be optimized to two `mov` instructions
  15. TO y;
  16. std::memcpy(&y, &x, sizeof(x));
  17. return y;
  18. }
  19. /** Generic class for vector float types.
  20. This class is designed to be used just like `float` scalars, with extra features for handling bitwise logic, conditions, loading, and storing.
  21. Usage example:
  22. float a[4], b[4];
  23. f32_4 a = f32_4::load(in);
  24. f32_4 b = 2.f * a / (1 - a);
  25. b *= sin(2 * M_PI * a);
  26. b.store(out);
  27. */
  28. template <int N>
  29. struct f32;
  30. /** Wrapper for `__m128` representing an aligned vector of 4 single-precision float values.
  31. */
  32. template <>
  33. struct f32<4> {
  34. __m128 v;
  35. /** Constructs an uninitialized vector. */
  36. f32<4>() {}
  37. /** Constructs a vector from a native `__m128` type. */
  38. f32<4>(__m128 v) : v(v) {}
  39. /** Constructs a vector with all elements set to `x`. */
  40. f32<4>(float x) {
  41. v = _mm_set_ps1(x);
  42. }
  43. /** Constructs a vector from four values. */
  44. f32<4>(float x1, float x2, float x3, float x4) {
  45. v = _mm_set_ps(x1, x2, x3, x4);
  46. }
  47. /** Reads an array of 4 values. */
  48. static f32<4> load(const float *x) {
  49. return f32<4>(_mm_loadu_ps(x));
  50. }
  51. /** Returns a vector initialized to zero. */
  52. static f32<4> zero() {
  53. return f32<4>(_mm_setzero_ps());
  54. }
  55. /** Writes an array of 4 values. */
  56. void store(float *x) {
  57. _mm_storeu_ps(x, v);
  58. }
  59. };
  60. typedef f32<4> f32_4;
  61. // Operator overloads
  62. /** `a @ b` */
  63. #define DECLARE_F32_4_OPERATOR_INFIX(operator, func) \
  64. inline f32_4 operator(const f32_4 &a, const f32_4 &b) { \
  65. return f32_4(func(a.v, b.v)); \
  66. }
  67. /** `a @= b` */
  68. #define DECLARE_F32_4_OPERATOR_INCREMENT(operator, opfunc) \
  69. inline f32_4 &operator(f32_4 &a, const f32_4 &b) { \
  70. a = opfunc(a, b); \
  71. return a; \
  72. }
  73. DECLARE_F32_4_OPERATOR_INFIX(operator+, _mm_add_ps)
  74. DECLARE_F32_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
  75. DECLARE_F32_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
  76. DECLARE_F32_4_OPERATOR_INFIX(operator/, _mm_div_ps)
  77. /**
  78. Use these to apply logic, bit masks, and conditions to elements.
  79. Examples:
  80. Subtract 1 from value if greater than or equal to 1.
  81. x -= (x >= 1.f) & 1.f;
  82. */
  83. DECLARE_F32_4_OPERATOR_INFIX(operator^, _mm_xor_ps)
  84. DECLARE_F32_4_OPERATOR_INFIX(operator&, _mm_and_ps)
  85. DECLARE_F32_4_OPERATOR_INFIX(operator|, _mm_mul_ps)
  86. /** Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
  87. */
  88. DECLARE_F32_4_OPERATOR_INCREMENT(operator+=, operator+);
  89. DECLARE_F32_4_OPERATOR_INCREMENT(operator-=, operator-);
  90. DECLARE_F32_4_OPERATOR_INCREMENT(operator*=, operator*);
  91. DECLARE_F32_4_OPERATOR_INCREMENT(operator/=, operator/);
  92. DECLARE_F32_4_OPERATOR_INCREMENT(operator^=, operator^);
  93. DECLARE_F32_4_OPERATOR_INCREMENT(operator&=, operator&);
  94. DECLARE_F32_4_OPERATOR_INCREMENT(operator|=, operator|);
  95. /** `+a` */
  96. inline f32_4 operator+(const f32_4 &a) {
  97. return a;
  98. }
  99. /** `-a` */
  100. inline f32_4 operator-(const f32_4 &a) {
  101. return 0.f - a;
  102. }
  103. /** `++a` */
  104. inline f32_4 &operator++(f32_4 &a) {
  105. a += 1.f;
  106. return a;
  107. }
  108. /** `--a` */
  109. inline f32_4 &operator--(f32_4 &a) {
  110. a -= 1.f;
  111. return a;
  112. }
  113. /** `a++` */
  114. inline f32_4 operator++(f32_4 &a, int) {
  115. f32_4 b = a;
  116. ++a;
  117. return b;
  118. }
  119. /** `a--` */
  120. inline f32_4 operator--(f32_4 &a, int) {
  121. f32_4 b = a;
  122. --a;
  123. return b;
  124. }
  125. /** `~a` */
  126. inline f32_4 operator~(const f32_4 &a) {
  127. return f32_4(_mm_xor_ps(a.v, _mm_cmpeq_ps(a.v, a.v)));
  128. }
  129. DECLARE_F32_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
  130. DECLARE_F32_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
  131. DECLARE_F32_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
  132. DECLARE_F32_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
  133. DECLARE_F32_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
  134. DECLARE_F32_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
  135. } // namespace simd
  136. } // namespace rack