Browse Source

Add simd::int32_4

tags/v1.0.0
Andrew Belt 5 years ago
parent
commit
262c6a0406
1 changed files with 150 additions and 67 deletions
  1. +150
    -67
      include/simd/vector.hpp

+ 150
- 67
include/simd/vector.hpp View File

@@ -1,7 +1,6 @@
#pragma once #pragma once
#include <cstring> #include <cstring>
#include <pmmintrin.h> #include <pmmintrin.h>
#include <type_traits>




namespace rack { namespace rack {
@@ -11,23 +10,6 @@ namespace rack {
namespace simd { namespace simd {




/** Casts the literal bits of FROM to TO without type conversion.
API copied from C++20.

Usage example:

printf("%08x\n", bit_cast<int>(1.f)); // Prints 3f800000
*/
template <typename TO, typename FROM>
TO bit_cast(const FROM &x) {
static_assert(sizeof(FROM) == sizeof(TO), "types must have equal size");
// Should be optimized to two `mov` instructions
TO y;
std::memcpy(&y, &x, sizeof(x));
return y;
}


/** Generic class for vector types. /** Generic class for vector types.


This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing. This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing.
@@ -63,7 +45,7 @@ struct Vector<float, 4> {


/** Constructs a vector with all elements set to `x`. */ /** Constructs a vector with all elements set to `x`. */
Vector(float x) { Vector(float x) {
v = _mm_set_ps1(x);
v = _mm_set1_ps(x);
} }


/** Constructs a vector from four values. */ /** Constructs a vector from four values. */
@@ -78,8 +60,7 @@ struct Vector<float, 4> {


/** Returns a vector with all 1 bits. */ /** Returns a vector with all 1 bits. */
static Vector mask() { static Vector mask() {
__m128 zero = _mm_setzero_ps();
return Vector(_mm_cmpeq_ps(zero, zero));
return _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
} }


/** Reads an array of 4 values. */ /** Reads an array of 4 values. */
@@ -99,34 +80,76 @@ struct Vector<float, 4> {
}; };




// Typedefs
template <>
struct Vector<int32_t, 4> {
union {
__m128i v;
int32_t s[4];
};

Vector() {}
Vector(__m128i v) : v(v) {}
Vector(int32_t x) {
v = _mm_set1_epi32(x);
}
Vector(int32_t x1, int32_t x2, int32_t x3, int32_t x4) {
v = _mm_set_epi32(x1, x2, x3, x4);
}
static Vector zero() {
return Vector(_mm_setzero_si128());
}
static Vector mask() {
return Vector(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
}
static Vector load(const int32_t *x) {
// HACK
// Use _mm_loadu_si128() because GCC doesn't support _mm_loadu_si32()
return Vector(_mm_loadu_si128((__m128i*) x));
}
void store(int32_t *x) {
// HACK
// Use _mm_storeu_si128() because GCC doesn't support _mm_storeu_si32()
_mm_storeu_si128((__m128i*) x, v);
}
};




typedef Vector<float, 4> float_4;
// typedef Vector<double, 2> double_2;
// typedef Vector<int32_t, 4> int32_4;
// Instructions not available as operators


/** `~a & b` */
inline Vector<float, 4> andnot(const Vector<float, 4> &a, const Vector<float, 4> &b) {
return Vector<float, 4>(_mm_andnot_ps(a.v, b.v));
}




// Operator overloads // Operator overloads




/** `a @ b` */ /** `a @ b` */
#define DECLARE_FLOAT_4_OPERATOR_INFIX(operator, func) \
inline float_4 operator(const float_4 &a, const float_4 &b) { \
return float_4(func(a.v, b.v)); \
#define DECLARE_VECTOR_OPERATOR_INFIX(t, s, operator, func) \
inline Vector<t, s> operator(const Vector<t, s> &a, const Vector<t, s> &b) { \
return Vector<t, s>(func(a.v, b.v)); \
} }


/** `a @= b` */ /** `a @= b` */
#define DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator, opfunc) \
inline float_4 &operator(float_4 &a, const float_4 &b) { \
#define DECLARE_VECTOR_OPERATOR_INCREMENT(t, s, operator, opfunc) \
inline Vector<t, s> &operator(Vector<t, s> &a, const Vector<t, s> &b) { \
a = opfunc(a, b); \ a = opfunc(a, b); \
return a; \ return a; \
} }


DECLARE_FLOAT_4_OPERATOR_INFIX(operator+, _mm_add_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator/, _mm_div_ps)
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator+, _mm_add_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator+, _mm_add_epi32)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator-, _mm_sub_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator-, _mm_sub_epi32)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator*, _mm_mul_ps)
// DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator*, NOT AVAILABLE IN SSE3)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator/, _mm_div_ps)
// DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator/, NOT AVAILABLE IN SSE3)


/* Use these to apply logic, bit masks, and conditions to elements. /* Use these to apply logic, bit masks, and conditions to elements.
Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
@@ -137,74 +160,134 @@ Subtract 1 from value if greater than or equal to 1.


x -= (x >= 1.f) & 1.f; x -= (x >= 1.f) & 1.f;
*/ */
DECLARE_FLOAT_4_OPERATOR_INFIX(operator^, _mm_xor_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator&, _mm_and_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator|, _mm_or_ps)

DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator+=, operator+);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator-=, operator-);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator*=, operator*);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator/=, operator/);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator^=, operator^);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator&=, operator&);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator|=, operator|);

DECLARE_FLOAT_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator^, _mm_xor_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator^, _mm_xor_si128)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator&, _mm_and_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator&, _mm_and_si128)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator|, _mm_or_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator|, _mm_or_si128)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator+=, operator+)
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator+=, operator+)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator-=, operator-)
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator-=, operator-)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator*=, operator*)
// DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator*=, NOT AVAILABLE IN SSE3)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator/=, operator/)
// DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator/=, NOT AVAILABLE IN SSE3)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator^=, operator^)
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator^=, operator^)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator&=, operator&)
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator&=, operator&)

DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator|=, operator|)
DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator|=, operator|)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator==, _mm_cmpeq_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator==, _mm_cmpeq_epi32)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>=, _mm_cmpge_ps)
inline Vector<int32_t, 4> operator>=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) {
return Vector<int32_t, 4>(_mm_cmpgt_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask();
}

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>, _mm_cmpgt_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator>, _mm_cmpgt_epi32)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<=, _mm_cmple_ps)
inline Vector<int32_t, 4> operator<=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) {
return Vector<int32_t, 4>(_mm_cmplt_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask();
}

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<, _mm_cmplt_ps)
DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator<, _mm_cmplt_epi32)

DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator!=, _mm_cmpneq_ps)
inline Vector<int32_t, 4> operator!=(const Vector<int32_t, 4> &a, const Vector<int32_t, 4> &b) {
return Vector<int32_t, 4>(_mm_cmpeq_epi32(a.v, b.v)) ^ Vector<int32_t, 4>::mask();
}


/** `+a` */ /** `+a` */
inline float_4 operator+(const float_4 &a) {
inline Vector<float, 4> operator+(const Vector<float, 4> &a) {
return a;
}
inline Vector<int32_t, 4> operator+(const Vector<int32_t, 4> &a) {
return a; return a;
} }


/** `-a` */ /** `-a` */
inline float_4 operator-(const float_4 &a) {
inline Vector<float, 4> operator-(const Vector<float, 4> &a) {
return 0.f - a; return 0.f - a;
} }
inline Vector<int32_t, 4> operator-(const Vector<int32_t, 4> &a) {
return 0 - a;
}


/** `++a` */ /** `++a` */
inline float_4 &operator++(float_4 &a) {
inline Vector<float, 4> &operator++(Vector<float, 4> &a) {
a += 1.f; a += 1.f;
return a; return a;
} }
inline Vector<int32_t, 4> &operator++(Vector<int32_t, 4> &a) {
a += 1;
return a;
}


/** `--a` */ /** `--a` */
inline float_4 &operator--(float_4 &a) {
inline Vector<float, 4> &operator--(Vector<float, 4> &a) {
a -= 1.f; a -= 1.f;
return a; return a;
} }
inline Vector<int32_t, 4> &operator--(Vector<int32_t, 4> &a) {
a -= 1;
return a;
}


/** `a++` */ /** `a++` */
inline float_4 operator++(float_4 &a, int) {
float_4 b = a;
inline Vector<float, 4> operator++(Vector<float, 4> &a, int) {
Vector<float, 4> b = a;
++a;
return b;
}
inline Vector<int32_t, 4> operator++(Vector<int32_t, 4> &a, int) {
Vector<int32_t, 4> b = a;
++a; ++a;
return b; return b;
} }


/** `a--` */ /** `a--` */
inline float_4 operator--(float_4 &a, int) {
float_4 b = a;
inline Vector<float, 4> operator--(Vector<float, 4> &a, int) {
Vector<float, 4> b = a;
--a;
return b;
}
inline Vector<int32_t, 4> operator--(Vector<int32_t, 4> &a, int) {
Vector<int32_t, 4> b = a;
--a; --a;
return b; return b;
} }


/** `~a` */ /** `~a` */
inline float_4 operator~(const float_4 &a) {
return a ^ float_4::mask();
inline Vector<float, 4> operator~(const Vector<float, 4> &a) {
return a ^ Vector<float, 4>::mask();
}
inline Vector<int32_t, 4> operator~(const Vector<int32_t, 4> &a) {
return a ^ Vector<int32_t, 4>::mask();
} }




// Instructions not available as operators
// Typedefs




/** `~a & b` */
inline float_4 andnot(const float_4 &a, const float_4 &b) {
return float_4(_mm_andnot_ps(a.v, b.v));
}
typedef Vector<float, 4> float_4;
typedef Vector<int32_t, 4> int32_4;




} // namespace simd } // namespace simd


Loading…
Cancel
Save