You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

vector.hpp 4.7KB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. #pragma once
  2. #include <cstring>
  3. #include <pmmintrin.h>
  4. #include <type_traits>
  5. namespace rack {
  6. /** Abstraction of byte-aligned values for SIMD CPU acceleration. */
  7. namespace simd {
  8. /** Casts the literal bits of FROM to TO without type conversion.
  9. API copied from C++20.
  10. Usage example:
  11. printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000
  12. */
  13. template <typename TO, typename FROM>
  14. TO bit_cast(const FROM &x) {
  15. static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size");
  16. // Should be optimized to two `mov` instructions
  17. TO y;
  18. std::memcpy(&y, &x, sizeof(x));
  19. return y;
  20. }
  21. /** Generic class for vector types.
  22. This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing.
  23. Usage example:
  24. float a[4], b[4];
  25. float_4 a = float_4::load(in);
  26. float_4 b = 2.f * a / (1 - a);
  27. b *= sin(2 * M_PI * a);
  28. b.store(out);
  29. */
  30. template <typename T, int N>
  31. struct Vector;
  32. /** Wrapper for `__m128` representing an aligned vector of 4 single-precision float values.
  33. */
  34. template <>
  35. struct Vector<float, 4> {
  36. union {
  37. __m128 v;
  38. /** Accessing this array of scalars is slow and defeats the purpose of vectorizing.
  39. */
  40. float s[4];
  41. };
  42. /** Constructs an uninitialized vector. */
  43. Vector() {}
  44. /** Constructs a vector from a native `__m128` type. */
  45. Vector(__m128 v) : v(v) {}
  46. /** Constructs a vector with all elements set to `x`. */
  47. Vector(float x) {
  48. v = _mm_set_ps1(x);
  49. }
  50. /** Constructs a vector from four values. */
  51. Vector(float x1, float x2, float x3, float x4) {
  52. v = _mm_set_ps(x1, x2, x3, x4);
  53. }
  54. /** Returns a vector initialized to zero. */
  55. static Vector zero() {
  56. return Vector(_mm_setzero_ps());
  57. }
  58. /** Returns a vector with all 1 bits. */
  59. static Vector mask() {
  60. __m128 zero = _mm_setzero_ps();
  61. return Vector(_mm_cmpeq_ps(zero, zero));
  62. }
  63. /** Reads an array of 4 values. */
  64. static Vector load(const float *x) {
  65. return Vector(_mm_loadu_ps(x));
  66. }
  67. /** Writes an array of 4 values. */
  68. void store(float *x) {
  69. _mm_storeu_ps(x, v);
  70. }
  71. };
  72. // Typedefs
  73. typedef Vector<float, 4> float_4;
  74. // typedef Vector<double, 2> double_2;
  75. // typedef Vector<int32_t, 4> int32_4;
  76. // Operator overloads
  77. /** `a @ b` */
  78. #define DECLARE_FLOAT_4_OPERATOR_INFIX(operator, func) \
  79. inline float_4 operator(const float_4 &a, const float_4 &b) { \
  80. return float_4(func(a.v, b.v)); \
  81. }
  82. /** `a @= b` */
  83. #define DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator, opfunc) \
  84. inline float_4 &operator(float_4 &a, const float_4 &b) { \
  85. a = opfunc(a, b); \
  86. return a; \
  87. }
  88. DECLARE_FLOAT_4_OPERATOR_INFIX(operator+, _mm_add_ps)
  89. DECLARE_FLOAT_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
  90. DECLARE_FLOAT_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
  91. DECLARE_FLOAT_4_OPERATOR_INFIX(operator/, _mm_div_ps)
  92. /* Use these to apply logic, bit masks, and conditions to elements.
  93. Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
  94. Examples:
  95. Subtract 1 from value if greater than or equal to 1.
  96. x -= (x >= 1.f) & 1.f;
  97. */
  98. DECLARE_FLOAT_4_OPERATOR_INFIX(operator^, _mm_xor_ps)
  99. DECLARE_FLOAT_4_OPERATOR_INFIX(operator&, _mm_and_ps)
  100. DECLARE_FLOAT_4_OPERATOR_INFIX(operator|, _mm_or_ps)
  101. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator+=, operator+);
  102. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator-=, operator-);
  103. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator*=, operator*);
  104. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator/=, operator/);
  105. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator^=, operator^);
  106. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator&=, operator&);
  107. DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator|=, operator|);
  108. DECLARE_FLOAT_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
  109. DECLARE_FLOAT_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
  110. DECLARE_FLOAT_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
  111. DECLARE_FLOAT_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
  112. DECLARE_FLOAT_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
  113. DECLARE_FLOAT_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
  114. /** `+a` */
  115. inline float_4 operator+(const float_4 &a) {
  116. return a;
  117. }
  118. /** `-a` */
  119. inline float_4 operator-(const float_4 &a) {
  120. return 0.f - a;
  121. }
  122. /** `++a` */
  123. inline float_4 &operator++(float_4 &a) {
  124. a += 1.f;
  125. return a;
  126. }
  127. /** `--a` */
  128. inline float_4 &operator--(float_4 &a) {
  129. a -= 1.f;
  130. return a;
  131. }
  132. /** `a++` */
  133. inline float_4 operator++(float_4 &a, int) {
  134. float_4 b = a;
  135. ++a;
  136. return b;
  137. }
  138. /** `a--` */
  139. inline float_4 operator--(float_4 &a, int) {
  140. float_4 b = a;
  141. --a;
  142. return b;
  143. }
  144. /** `~a` */
  145. inline float_4 operator~(const float_4 &a) {
  146. return a ^ float_4::mask();
  147. }
  148. // Instructions not available as operators
  149. /** `~a & b` */
  150. inline float_4 andnot(const float_4 &a, const float_4 &b) {
  151. return float_4(_mm_andnot_ps(a.v, b.v));
  152. }
  153. } // namespace simd
  154. } // namespace rack