| @@ -1,7 +1,6 @@ | |||
| #pragma once | |||
| #include <cstring> | |||
| #include <pmmintrin.h> | |||
| #include <type_traits> | |||
| namespace rack { | |||
| @@ -11,23 +10,6 @@ namespace rack { | |||
| namespace simd { | |||
| /** Casts the literal bits of FROM to TO without type conversion. | |||
| API copied from C++20. | |||
| Usage example: | |||
| printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000 | |||
| */ | |||
| template <typename TO, typename FROM> | |||
| TO bit_cast(const FROM &x) { | |||
| static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size"); | |||
| // Should be optimized to two `mov` instructions | |||
| TO y; | |||
| std::memcpy(&y, &x, sizeof(x)); | |||
| return y; | |||
| } | |||
| /** Generic class for vector types. | |||
| This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing. | |||
| @@ -63,7 +45,7 @@ struct Vector<float, 4> { | |||
| /** Constructs a vector with all elements set to `x`. */ | |||
| Vector(float x) { | |||
| v = _mm_set_ps1(x); | |||
| v = _mm_set1_ps(x); | |||
| } | |||
| /** Constructs a vector from four values. */ | |||
| @@ -78,8 +60,7 @@ struct Vector<float, 4> { | |||
| /** Returns a vector with all 1 bits. */ | |||
| static Vector mask() { | |||
| __m128 zero = _mm_setzero_ps(); | |||
| return Vector(_mm_cmpeq_ps(zero, zero)); | |||
| return _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); | |||
| } | |||
| /** Reads an array of 4 values. */ | |||
| @@ -99,34 +80,76 @@ struct Vector<float, 4> { | |||
| }; | |||
| // Typedefs | |||
| template <> | |||
| struct Vector<int32_t, 4> { | |||
| union { | |||
| __m128i v; | |||
| int32_t s[4]; | |||
| }; | |||
| Vector() {} | |||
| Vector(__m128i v) : v(v) {} | |||
| Vector(int32_t x) { | |||
| v = _mm_set1_epi32(x); | |||
| } | |||
| Vector(int32_t x1, int32_t x2, int32_t x3, int32_t x4) { | |||
| v = _mm_set_epi32(x1, x2, x3, x4); | |||
| } | |||
| static Vector zero() { | |||
| return Vector(_mm_setzero_si128()); | |||
| } | |||
| static Vector mask() { | |||
| return Vector(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); | |||
| } | |||
| static Vector load(const int32_t *x) { | |||
| // HACK | |||
| // Use _mm_loadu_si128() because GCC doesn't support _mm_loadu_si32() | |||
| return Vector(_mm_loadu_si128((__m128i*) x)); | |||
| } | |||
| void store(int32_t *x) { | |||
| // HACK | |||
| // Use _mm_storeu_si128() because GCC doesn't support _mm_storeu_si32() | |||
| _mm_storeu_si128((__m128i*) x, v); | |||
| } | |||
| }; | |||
| typedef Vector<float, 4> float_4; | |||
| // typedef Vector<double, 2> double_2; | |||
| // typedef Vector<int32_t, 4> int32_4; | |||
| // Instructions not available as operators | |||
| /** `~a & b` */ | |||
| inline Vector<float, 4> andnot(const Vector<float, 4> &a, const Vector<float, 4> &b) { | |||
| return Vector<float, 4>(_mm_andnot_ps(a.v, b.v)); | |||
| } | |||
| // Operator overloads | |||
| /** `a @ b` */ | |||
| #define DECLARE_FLOAT_4_OPERATOR_INFIX(operator, func) \ | |||
| inline float_4 operator(const float_4 &a, const float_4 &b) { \ | |||
| return float_4(func(a.v, b.v)); \ | |||
| #define DECLARE_VECTOR_OPERATOR_INFIX(t, s, operator, func) \ | |||
| inline Vector<t, s> operator(const Vector<t, s> &a, const Vector<t, s> &b) { \ | |||
| return Vector<t, s>(func(a.v, b.v)); \ | |||
| } | |||
| /** `a @= b` */ | |||
| #define DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator, opfunc) \ | |||
| inline float_4 &operator(float_4 &a, const float_4 &b) { \ | |||
| #define DECLARE_VECTOR_OPERATOR_INCREMENT(t, s, operator, opfunc) \ | |||
| inline Vector<t, s> &operator(Vector<t, s> &a, const Vector<t, s> &b) { \ | |||
| a = opfunc(a, b); \ | |||
| return a; \ | |||
| } | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator+, _mm_add_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator-, _mm_sub_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator*, _mm_mul_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator/, _mm_div_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator+, _mm_add_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator+, _mm_add_epi32) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator-, _mm_sub_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator-, _mm_sub_epi32) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator*, _mm_mul_ps) | |||
| // DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator*, NOT AVAILABLE IN SSE3) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator/, _mm_div_ps) | |||
| // DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator/, NOT AVAILABLE IN SSE3) | |||
| /* Use these to apply logic, bit masks, and conditions to elements. | |||
| Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. | |||
| @@ -137,74 +160,134 @@ Subtract 1 from value if greater than or equal to 1. | |||
| x -= (x >= 1.f) & 1.f; | |||
| */ | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator^, _mm_xor_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator&, _mm_and_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator|, _mm_or_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator+=, operator+); | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator-=, operator-); | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator*=, operator*); | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator/=, operator/); | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator^=, operator^); | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator&=, operator&); | |||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator|=, operator|); | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps) | |||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator^, _mm_xor_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator^, _mm_xor_si128) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator&, _mm_and_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator&, _mm_and_si128) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator|, _mm_or_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator|, _mm_or_si128) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator+=, operator+) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator+=, operator+) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator-=, operator-) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator-=, operator-) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator*=, operator*) | |||
| // DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator*=, NOT AVAILABLE IN SSE3) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator/=, operator/) | |||
| // DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator/=, NOT AVAILABLE IN SSE3) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator^=, operator^) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator^=, operator^) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator&=, operator&) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator&=, operator&) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator|=, operator|) | |||
| DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator|=, operator|) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator==, _mm_cmpeq_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator==, _mm_cmpeq_epi32) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>=, _mm_cmpge_ps) | |||
| inline Vector<int32_t, 4> operator>=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) { | |||
| return Vector<int32_t, 4>(_mm_cmpgt_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask(); | |||
| } | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>, _mm_cmpgt_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator>, _mm_cmpgt_epi32) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<=, _mm_cmple_ps) | |||
| inline Vector<int32_t, 4> operator<=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) { | |||
| return Vector<int32_t, 4>(_mm_cmplt_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask(); | |||
| } | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<, _mm_cmplt_ps) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator<, _mm_cmplt_epi32) | |||
| DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator!=, _mm_cmpneq_ps) | |||
| inline Vector<int32_t, 4> operator!=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) { | |||
| return Vector<int32_t, 4>(_mm_cmpeq_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask(); | |||
| } | |||
| /** `+a` */ | |||
| inline float_4 operator+(const float_4 &a) { | |||
| inline Vector<float, 4> operator+(const Vector<float, 4> &a) { | |||
| return a; | |||
| } | |||
| inline Vector<int32_t, 4> operator+(const Vector<int32_t, 4> &a) { | |||
| return a; | |||
| } | |||
| /** `-a` */ | |||
| inline float_4 operator-(const float_4 &a) { | |||
| inline Vector<float, 4> operator-(const Vector<float, 4> &a) { | |||
| return 0.f - a; | |||
| } | |||
| inline Vector<int32_t, 4> operator-(const Vector<int32_t, 4> &a) { | |||
| return 0 - a; | |||
| } | |||
| /** `++a` */ | |||
| inline float_4 &operator++(float_4 &a) { | |||
| inline Vector<float, 4> &operator++(Vector<float, 4> &a) { | |||
| a += 1.f; | |||
| return a; | |||
| } | |||
| inline Vector<int32_t, 4> &operator++(Vector<int32_t, 4> &a) { | |||
| a += 1; | |||
| return a; | |||
| } | |||
| /** `--a` */ | |||
| inline float_4 &operator--(float_4 &a) { | |||
| inline Vector<float, 4> &operator--(Vector<float, 4> &a) { | |||
| a -= 1.f; | |||
| return a; | |||
| } | |||
| inline Vector<int32_t, 4> &operator--(Vector<int32_t, 4> &a) { | |||
| a -= 1; | |||
| return a; | |||
| } | |||
| /** `a++` */ | |||
| inline float_4 operator++(float_4 &a, int) { | |||
| float_4 b = a; | |||
| inline Vector<float, 4> operator++(Vector<float, 4> &a, int) { | |||
| Vector<float, 4> b = a; | |||
| ++a; | |||
| return b; | |||
| } | |||
| inline Vector<int32_t, 4> operator++(Vector<int32_t, 4> &a, int) { | |||
| Vector<int32_t, 4> b = a; | |||
| ++a; | |||
| return b; | |||
| } | |||
| /** `a--` */ | |||
| inline float_4 operator--(float_4 &a, int) { | |||
| float_4 b = a; | |||
| inline Vector<float, 4> operator--(Vector<float, 4> &a, int) { | |||
| Vector<float, 4> b = a; | |||
| --a; | |||
| return b; | |||
| } | |||
| inline Vector<int32_t, 4> operator--(Vector<int32_t, 4> &a, int) { | |||
| Vector<int32_t, 4> b = a; | |||
| --a; | |||
| return b; | |||
| } | |||
| /** `~a` */ | |||
| inline float_4 operator~(const float_4 &a) { | |||
| return a ^ float_4::mask(); | |||
| inline Vector<float, 4> operator~(const Vector<float, 4> &a) { | |||
| return a ^ Vector<float, 4>::mask(); | |||
| } | |||
| inline Vector<int32_t, 4> operator~(const Vector<int32_t, 4> &a) { | |||
| return a ^ Vector<int32_t, 4>::mask(); | |||
| } | |||
| // Instructions not available as operators | |||
| // Typedefs | |||
| /** `~a & b` */ | |||
| inline float_4 andnot(const float_4 &a, const float_4 &b) { | |||
| return float_4(_mm_andnot_ps(a.v, b.v)); | |||
| } | |||
| typedef Vector<float, 4> float_4; | |||
| typedef Vector<int32_t, 4> int32_4; | |||
| } // namespace simd | |||