You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

vector.hpp 4.0KB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #pragma once
  2. #include <cstring>
  3. #include <emmintrin.h>
  4. namespace rack {
  5. /** Abstraction of byte-aligned values for SIMD CPU acceleration. */
  6. namespace simd {
  7. /** Casts the literal bits of FROM to TO without type conversion.
  8. API copied from C++20.
  9. Usage example:
  10. printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000
  11. */
  12. template <typename TO, typename FROM>
  13. TO bit_cast(const FROM &x) {
  14. static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size");
  15. // Should be optimized to two `mov` instructions
  16. TO y;
  17. std::memcpy(&y, &x, sizeof(x));
  18. return y;
  19. }
  20. /** Generic class for vector float types.
  21. This class is designed to be used just like `float` scalars, with extra features for handling bitwise logic, conditions, loading, and storing.
  22. Usage example:
  23. float a[4], b[4];
  24. f32_4 a = f32_4::load(in);
  25. f32_4 b = 2.f * a / (1 - a);
  26. b *= sin(2 * M_PI * a);
  27. b.store(out);
  28. */
  29. template <int N>
  30. struct f32;
  31. /** Wrapper for `__m128` representing an aligned vector of 4 single-precision float values.
  32. */
  33. template <>
  34. struct f32<4> {
  35. __m128 v;
  36. /** Constructs an uninitialized vector. */
  37. f32<4>() {}
  38. /** Constructs a vector from a native `__m128` type. */
  39. f32<4>(__m128 v) : v(v) {}
  40. /** Constructs a vector with all elements set to `x`. */
  41. f32<4>(float x) {
  42. v = _mm_set_ps1(x);
  43. }
  44. /** Constructs a vector from four values. */
  45. f32<4>(float x1, float x2, float x3, float x4) {
  46. v = _mm_set_ps(x1, x2, x3, x4);
  47. }
  48. /** Reads an array of 4 values. */
  49. static f32<4> load(const float *x) {
  50. return f32<4>(_mm_loadu_ps(x));
  51. }
  52. /** Returns a vector initialized to zero. */
  53. static f32<4> zero() {
  54. return f32<4>(_mm_setzero_ps());
  55. }
  56. /** Writes an array of 4 values. */
  57. void store(float *x) {
  58. _mm_storeu_ps(x, v);
  59. }
  60. };
  61. typedef f32<4> f32_4;
  62. // Operator overloads
  63. /** `a @ b` */
  64. #define DECLARE_F32_4_OPERATOR_INFIX(operator, func) \
  65. inline f32_4 operator(const f32_4 &a, const f32_4 &b) { \
  66. return f32_4(func(a.v, b.v)); \
  67. }
  68. /** `a @= b` */
  69. #define DECLARE_F32_4_OPERATOR_INCREMENT(operator, opfunc) \
  70. inline f32_4 &operator(f32_4 &a, const f32_4 &b) { \
  71. a = opfunc(a, b); \
  72. return a; \
  73. }
  74. DECLARE_F32_4_OPERATOR_INFIX(operator+, _mm_add_ps)
  75. DECLARE_F32_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
  76. DECLARE_F32_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
  77. DECLARE_F32_4_OPERATOR_INFIX(operator/, _mm_div_ps)
  78. /**
  79. Use these to apply logic, bit masks, and conditions to elements.
  80. Examples:
  81. Subtract 1 from value if greater than or equal to 1.
  82. x -= (x >= 1.f) & 1.f;
  83. */
  84. DECLARE_F32_4_OPERATOR_INFIX(operator^, _mm_xor_ps)
  85. DECLARE_F32_4_OPERATOR_INFIX(operator&, _mm_and_ps)
  86. DECLARE_F32_4_OPERATOR_INFIX(operator|, _mm_mul_ps)
  87. /** Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
  88. */
  89. DECLARE_F32_4_OPERATOR_INCREMENT(operator+=, operator+);
  90. DECLARE_F32_4_OPERATOR_INCREMENT(operator-=, operator-);
  91. DECLARE_F32_4_OPERATOR_INCREMENT(operator*=, operator*);
  92. DECLARE_F32_4_OPERATOR_INCREMENT(operator/=, operator/);
  93. DECLARE_F32_4_OPERATOR_INCREMENT(operator^=, operator^);
  94. DECLARE_F32_4_OPERATOR_INCREMENT(operator&=, operator&);
  95. DECLARE_F32_4_OPERATOR_INCREMENT(operator|=, operator|);
  96. /** `+a` */
  97. inline f32_4 operator+(const f32_4 &a) {
  98. return a;
  99. }
  100. /** `-a` */
  101. inline f32_4 operator-(const f32_4 &a) {
  102. return 0.f - a;
  103. }
  104. /** `++a` */
  105. inline f32_4 &operator++(f32_4 &a) {
  106. a += 1.f;
  107. return a;
  108. }
  109. /** `--a` */
  110. inline f32_4 &operator--(f32_4 &a) {
  111. a -= 1.f;
  112. return a;
  113. }
  114. /** `a++` */
  115. inline f32_4 operator++(f32_4 &a, int) {
  116. f32_4 b = a;
  117. ++a;
  118. return b;
  119. }
  120. /** `a--` */
  121. inline f32_4 operator--(f32_4 &a, int) {
  122. f32_4 b = a;
  123. --a;
  124. return b;
  125. }
  126. /** `~a` */
  127. inline f32_4 operator~(const f32_4 &a) {
  128. return f32_4(_mm_xor_ps(a.v, _mm_cmpeq_ps(a.v, a.v)));
  129. }
  130. DECLARE_F32_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
  131. DECLARE_F32_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
  132. DECLARE_F32_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
  133. DECLARE_F32_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
  134. DECLARE_F32_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
  135. DECLARE_F32_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
  136. } // namespace simd
  137. } // namespace rack