You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

196 lines
4.4KB

  1. #pragma once
  2. #include <cstring>
  3. #include <x86intrin.h>
  4. #include <type_traits>
  5. namespace rack {
  6. /** Abstraction of byte-aligned values for SIMD CPU acceleration. */
  7. namespace simd {
  8. /** Casts the literal bits of FROM to TO without type conversion.
  9. API copied from C++20.
  10. Usage example:
  11. printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000
  12. */
  13. template <typename TO, typename FROM>
  14. TO bit_cast(const FROM &x) {
  15. static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size");
  16. // Should be optimized to two `mov` instructions
  17. TO y;
  18. std::memcpy(&y, &x, sizeof(x));
  19. return y;
  20. }
  21. /** Generic class for vector types.
  22. This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing.
  23. Usage example:
  24. float a[4], b[4];
  25. float4 a = float4::load(in);
  26. float4 b = 2.f * a / (1 - a);
  27. b *= sin(2 * M_PI * a);
  28. b.store(out);
  29. */
  30. template <typename T, int N>
  31. struct Vector;
  32. /** Wrapper for `__m128` representing an aligned vector of 4 single-precision float values.
  33. */
  34. template <>
  35. struct Vector<float, 4> {
  36. __m128 v;
  37. /** Constructs an uninitialized vector. */
  38. Vector<float, 4>() {}
  39. /** Constructs a vector from a native `__m128` type. */
  40. Vector<float, 4>(__m128 v) : v(v) {}
  41. /** Constructs a vector with all elements set to `x`. */
  42. Vector<float, 4>(float x) {
  43. v = _mm_set_ps1(x);
  44. }
  45. /** Constructs a vector from four values. */
  46. Vector<float, 4>(float x1, float x2, float x3, float x4) {
  47. v = _mm_set_ps(x1, x2, x3, x4);
  48. }
  49. /** Returns a vector initialized to zero. */
  50. static Vector<float, 4> zero() {
  51. return Vector<float, 4>(_mm_setzero_ps());
  52. }
  53. /** Reads an array of 4 values. */
  54. static Vector<float, 4> load(const float *x) {
  55. return Vector<float, 4>(_mm_loadu_ps(x));
  56. }
  57. /** Writes an array of 4 values. */
  58. void store(float *x) {
  59. _mm_storeu_ps(x, v);
  60. }
  61. };
  62. // Typedefs
  63. typedef Vector<float, 4> float4;
  64. // Operator overloads
  65. /** `a @ b` */
  66. #define DECLARE_FLOAT4_OPERATOR_INFIX(operator, func) \
  67. inline float4 operator(const float4 &a, const float4 &b) { \
  68. return float4(func(a.v, b.v)); \
  69. }
  70. /** `a @= b` */
  71. #define DECLARE_FLOAT4_OPERATOR_INCREMENT(operator, opfunc) \
  72. inline float4 &operator(float4 &a, const float4 &b) { \
  73. a = opfunc(a, b); \
  74. return a; \
  75. }
  76. DECLARE_FLOAT4_OPERATOR_INFIX(operator+, _mm_add_ps)
  77. DECLARE_FLOAT4_OPERATOR_INFIX(operator-, _mm_sub_ps)
  78. DECLARE_FLOAT4_OPERATOR_INFIX(operator*, _mm_mul_ps)
  79. DECLARE_FLOAT4_OPERATOR_INFIX(operator/, _mm_div_ps)
  80. /* Use these to apply logic, bit masks, and conditions to elements.
  81. Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
  82. Examples:
  83. Subtract 1 from value if greater than or equal to 1.
  84. x -= (x >= 1.f) & 1.f;
  85. */
  86. DECLARE_FLOAT4_OPERATOR_INFIX(operator^, _mm_xor_ps)
  87. DECLARE_FLOAT4_OPERATOR_INFIX(operator&, _mm_and_ps)
  88. DECLARE_FLOAT4_OPERATOR_INFIX(operator|, _mm_or_ps)
  89. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator+=, operator+);
  90. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator-=, operator-);
  91. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator*=, operator*);
  92. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator/=, operator/);
  93. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator^=, operator^);
  94. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator&=, operator&);
  95. DECLARE_FLOAT4_OPERATOR_INCREMENT(operator|=, operator|);
  96. DECLARE_FLOAT4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
  97. DECLARE_FLOAT4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
  98. DECLARE_FLOAT4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
  99. DECLARE_FLOAT4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
  100. DECLARE_FLOAT4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
  101. DECLARE_FLOAT4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
  102. /** `+a` */
  103. inline float4 operator+(const float4 &a) {
  104. return a;
  105. }
  106. /** `-a` */
  107. inline float4 operator-(const float4 &a) {
  108. return 0.f - a;
  109. }
  110. /** `++a` */
  111. inline float4 &operator++(float4 &a) {
  112. a += 1.f;
  113. return a;
  114. }
  115. /** `--a` */
  116. inline float4 &operator--(float4 &a) {
  117. a -= 1.f;
  118. return a;
  119. }
  120. /** `a++` */
  121. inline float4 operator++(float4 &a, int) {
  122. float4 b = a;
  123. ++a;
  124. return b;
  125. }
  126. /** `a--` */
  127. inline float4 operator--(float4 &a, int) {
  128. float4 b = a;
  129. --a;
  130. return b;
  131. }
  132. /** `~a` */
  133. inline float4 operator~(const float4 &a) {
  134. float4 mask = float4::zero();
  135. mask = (mask == mask);
  136. return a ^ mask;
  137. }
  138. // Instructions not available as operators
  139. /** `~a & b` */
  140. inline float4 andnot(const float4 &a, const float4 &b) {
  141. return float4(_mm_andnot_ps(a.v, b.v));
  142. }
  143. } // namespace simd
  144. } // namespace rack