|
|
@@ -1,7 +1,6 @@ |
|
|
|
#pragma once |
|
|
|
#include <cstring> |
|
|
|
#include <pmmintrin.h> |
|
|
|
#include <type_traits> |
|
|
|
|
|
|
|
|
|
|
|
namespace rack { |
|
|
@@ -11,23 +10,6 @@ namespace rack { |
|
|
|
namespace simd { |
|
|
|
|
|
|
|
|
|
|
|
/** Casts the literal bits of FROM to TO without type conversion. |
|
|
|
API copied from C++20. |
|
|
|
|
|
|
|
Usage example: |
|
|
|
|
|
|
|
printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000 |
|
|
|
*/ |
|
|
|
template <typename TO, typename FROM> |
|
|
|
TO bit_cast(const FROM &x) { |
|
|
|
static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size"); |
|
|
|
// Should be optimized to two `mov` instructions |
|
|
|
TO y; |
|
|
|
std::memcpy(&y, &x, sizeof(x)); |
|
|
|
return y; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** Generic class for vector types. |
|
|
|
|
|
|
|
This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing. |
|
|
@@ -63,7 +45,7 @@ struct Vector<float, 4> { |
|
|
|
|
|
|
|
/** Constructs a vector with all elements set to `x`. */ |
|
|
|
Vector(float x) { |
|
|
|
v = _mm_set_ps1(x); |
|
|
|
v = _mm_set1_ps(x); |
|
|
|
} |
|
|
|
|
|
|
|
/** Constructs a vector from four values. */ |
|
|
@@ -78,8 +60,7 @@ struct Vector<float, 4> { |
|
|
|
|
|
|
|
/** Returns a vector with all 1 bits. */ |
|
|
|
static Vector mask() { |
|
|
|
__m128 zero = _mm_setzero_ps(); |
|
|
|
return Vector(_mm_cmpeq_ps(zero, zero)); |
|
|
|
return _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); |
|
|
|
} |
|
|
|
|
|
|
|
/** Reads an array of 4 values. */ |
|
|
@@ -99,34 +80,76 @@ struct Vector<float, 4> { |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
// Typedefs |
|
|
|
template <> |
|
|
|
struct Vector<int32_t, 4> { |
|
|
|
union { |
|
|
|
__m128i v; |
|
|
|
int32_t s[4]; |
|
|
|
}; |
|
|
|
|
|
|
|
Vector() {} |
|
|
|
Vector(__m128i v) : v(v) {} |
|
|
|
Vector(int32_t x) { |
|
|
|
v = _mm_set1_epi32(x); |
|
|
|
} |
|
|
|
Vector(int32_t x1, int32_t x2, int32_t x3, int32_t x4) { |
|
|
|
v = _mm_set_epi32(x1, x2, x3, x4); |
|
|
|
} |
|
|
|
static Vector zero() { |
|
|
|
return Vector(_mm_setzero_si128()); |
|
|
|
} |
|
|
|
static Vector mask() { |
|
|
|
return Vector(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); |
|
|
|
} |
|
|
|
static Vector load(const int32_t *x) { |
|
|
|
// HACK |
|
|
|
// Use _mm_loadu_si128() because GCC doesn't support _mm_loadu_si32() |
|
|
|
return Vector(_mm_loadu_si128((__m128i*) x)); |
|
|
|
} |
|
|
|
void store(int32_t *x) { |
|
|
|
// HACK |
|
|
|
// Use _mm_storeu_si128() because GCC doesn't support _mm_storeu_si32() |
|
|
|
_mm_storeu_si128((__m128i*) x, v); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
typedef Vector<float, 4> float_4; |
|
|
|
// typedef Vector<double, 2> double_2; |
|
|
|
// typedef Vector<int32_t, 4> int32_4; |
|
|
|
// Instructions not available as operators |
|
|
|
|
|
|
|
|
|
|
|
/** `~a & b` */ |
|
|
|
inline Vector<float, 4> andnot(const Vector<float, 4> &a, const Vector<float, 4> &b) { |
|
|
|
return Vector<float, 4>(_mm_andnot_ps(a.v, b.v)); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Operator overloads |
|
|
|
|
|
|
|
|
|
|
|
/** `a @ b` */ |
|
|
|
#define DECLARE_FLOAT_4_OPERATOR_INFIX(operator, func) \ |
|
|
|
inline float_4 operator(const float_4 &a, const float_4 &b) { \ |
|
|
|
return float_4(func(a.v, b.v)); \ |
|
|
|
#define DECLARE_VECTOR_OPERATOR_INFIX(t, s, operator, func) \ |
|
|
|
inline Vector<t, s> operator(const Vector<t, s> &a, const Vector<t, s> &b) { \ |
|
|
|
return Vector<t, s>(func(a.v, b.v)); \ |
|
|
|
} |
|
|
|
|
|
|
|
/** `a @= b` */ |
|
|
|
#define DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator, opfunc) \ |
|
|
|
inline float_4 &operator(float_4 &a, const float_4 &b) { \ |
|
|
|
#define DECLARE_VECTOR_OPERATOR_INCREMENT(t, s, operator, opfunc) \ |
|
|
|
inline Vector<t, s> &operator(Vector<t, s> &a, const Vector<t, s> &b) { \ |
|
|
|
a = opfunc(a, b); \ |
|
|
|
return a; \ |
|
|
|
} |
|
|
|
|
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator+, _mm_add_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator-, _mm_sub_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator*, _mm_mul_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator/, _mm_div_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator+, _mm_add_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator+, _mm_add_epi32) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator-, _mm_sub_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator-, _mm_sub_epi32) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator*, _mm_mul_ps) |
|
|
|
// DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator*, NOT AVAILABLE IN SSE3) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator/, _mm_div_ps) |
|
|
|
// DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator/, NOT AVAILABLE IN SSE3) |
|
|
|
|
|
|
|
/* Use these to apply logic, bit masks, and conditions to elements. |
|
|
|
Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. |
|
|
@@ -137,74 +160,134 @@ Subtract 1 from value if greater than or equal to 1. |
|
|
|
|
|
|
|
x -= (x >= 1.f) & 1.f; |
|
|
|
*/ |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator^, _mm_xor_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator&, _mm_and_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator|, _mm_or_ps) |
|
|
|
|
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator+=, operator+); |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator-=, operator-); |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator*=, operator*); |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator/=, operator/); |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator^=, operator^); |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator&=, operator&); |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator|=, operator|); |
|
|
|
|
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps) |
|
|
|
DECLARE_FLOAT_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator^, _mm_xor_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator^, _mm_xor_si128) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator&, _mm_and_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator&, _mm_and_si128) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator|, _mm_or_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator|, _mm_or_si128) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator+=, operator+) |
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator+=, operator+) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator-=, operator-) |
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator-=, operator-) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator*=, operator*) |
|
|
|
// DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator*=, NOT AVAILABLE IN SSE3) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator/=, operator/) |
|
|
|
// DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator/=, NOT AVAILABLE IN SSE3) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator^=, operator^) |
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator^=, operator^) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator&=, operator&) |
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator&=, operator&) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator|=, operator|) |
|
|
|
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator|=, operator|) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator==, _mm_cmpeq_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator==, _mm_cmpeq_epi32) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>=, _mm_cmpge_ps) |
|
|
|
inline Vector<int32_t, 4> operator>=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) { |
|
|
|
return Vector<int32_t, 4>(_mm_cmpgt_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask(); |
|
|
|
} |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>, _mm_cmpgt_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator>, _mm_cmpgt_epi32) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<=, _mm_cmple_ps) |
|
|
|
inline Vector<int32_t, 4> operator<=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) { |
|
|
|
return Vector<int32_t, 4>(_mm_cmplt_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask(); |
|
|
|
} |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<, _mm_cmplt_ps) |
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator<, _mm_cmplt_epi32) |
|
|
|
|
|
|
|
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator!=, _mm_cmpneq_ps) |
|
|
|
inline Vector<int32_t, 4> operator!=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) { |
|
|
|
return Vector<int32_t, 4>(_mm_cmpeq_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask(); |
|
|
|
} |
|
|
|
|
|
|
|
/** `+a` */ |
|
|
|
inline float_4 operator+(const float_4 &a) { |
|
|
|
inline Vector<float, 4> operator+(const Vector<float, 4> &a) { |
|
|
|
return a; |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> operator+(const Vector<int32_t, 4> &a) { |
|
|
|
return a; |
|
|
|
} |
|
|
|
|
|
|
|
/** `-a` */ |
|
|
|
inline float_4 operator-(const float_4 &a) { |
|
|
|
inline Vector<float, 4> operator-(const Vector<float, 4> &a) { |
|
|
|
return 0.f - a; |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> operator-(const Vector<int32_t, 4> &a) { |
|
|
|
return 0 - a; |
|
|
|
} |
|
|
|
|
|
|
|
/** `++a` */ |
|
|
|
inline float_4 &operator++(float_4 &a) { |
|
|
|
inline Vector<float, 4> &operator++(Vector<float, 4> &a) { |
|
|
|
a += 1.f; |
|
|
|
return a; |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> &operator++(Vector<int32_t, 4> &a) { |
|
|
|
a += 1; |
|
|
|
return a; |
|
|
|
} |
|
|
|
|
|
|
|
/** `--a` */ |
|
|
|
inline float_4 &operator--(float_4 &a) { |
|
|
|
inline Vector<float, 4> &operator--(Vector<float, 4> &a) { |
|
|
|
a -= 1.f; |
|
|
|
return a; |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> &operator--(Vector<int32_t, 4> &a) { |
|
|
|
a -= 1; |
|
|
|
return a; |
|
|
|
} |
|
|
|
|
|
|
|
/** `a++` */ |
|
|
|
inline float_4 operator++(float_4 &a, int) { |
|
|
|
float_4 b = a; |
|
|
|
inline Vector<float, 4> operator++(Vector<float, 4> &a, int) { |
|
|
|
Vector<float, 4> b = a; |
|
|
|
++a; |
|
|
|
return b; |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> operator++(Vector<int32_t, 4> &a, int) { |
|
|
|
Vector<int32_t, 4> b = a; |
|
|
|
++a; |
|
|
|
return b; |
|
|
|
} |
|
|
|
|
|
|
|
/** `a--` */ |
|
|
|
inline float_4 operator--(float_4 &a, int) { |
|
|
|
float_4 b = a; |
|
|
|
inline Vector<float, 4> operator--(Vector<float, 4> &a, int) { |
|
|
|
Vector<float, 4> b = a; |
|
|
|
--a; |
|
|
|
return b; |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> operator--(Vector<int32_t, 4> &a, int) { |
|
|
|
Vector<int32_t, 4> b = a; |
|
|
|
--a; |
|
|
|
return b; |
|
|
|
} |
|
|
|
|
|
|
|
/** `~a` */ |
|
|
|
inline float_4 operator~(const float_4 &a) { |
|
|
|
return a ^ float_4::mask(); |
|
|
|
inline Vector<float, 4> operator~(const Vector<float, 4> &a) { |
|
|
|
return a ^ Vector<float, 4>::mask(); |
|
|
|
} |
|
|
|
inline Vector<int32_t, 4> operator~(const Vector<int32_t, 4> &a) { |
|
|
|
return a ^ Vector<int32_t, 4>::mask(); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Instructions not available as operators |
|
|
|
// Typedefs |
|
|
|
|
|
|
|
|
|
|
|
/** `~a & b` */ |
|
|
|
inline float_4 andnot(const float_4 &a, const float_4 &b) { |
|
|
|
return float_4(_mm_andnot_ps(a.v, b.v)); |
|
|
|
} |
|
|
|
typedef Vector<float, 4> float_4; |
|
|
|
typedef Vector<int32_t, 4> int32_4; |
|
|
|
|
|
|
|
|
|
|
|
} // namespace simd |
|
|
|