Browse Source

Rename f32_4 to float4

tags/v1.0.0
Andrew Belt 5 years ago
parent
commit
1b168ce3a3
2 changed files with 103 additions and 101 deletions
  1. +42
    -43
      include/simd/functions.hpp
  2. +61
    -58
      include/simd/vector.hpp

+ 42
- 43
include/simd/functions.hpp View File

@@ -24,101 +24,101 @@ Example:

using std::fmax;

inline f32_4 fmax(f32_4 x, f32_4 b) {
return f32_4(_mm_max_ps(x.v, b.v));
inline float4 fmax(float4 x, float4 b) {
return float4(_mm_max_ps(x.v, b.v));
}

using std::fmin;

inline f32_4 fmin(f32_4 x, f32_4 b) {
return f32_4(_mm_min_ps(x.v, b.v));
inline float4 fmin(float4 x, float4 b) {
return float4(_mm_min_ps(x.v, b.v));
}

using std::sqrt;

inline f32_4 sqrt(f32_4 x) {
return f32_4(_mm_sqrt_ps(x.v));
inline float4 sqrt(float4 x) {
return float4(_mm_sqrt_ps(x.v));
}

using std::log;

inline f32_4 log(f32_4 x) {
return f32_4(sse_mathfun_log_ps(x.v));
inline float4 log(float4 x) {
return float4(sse_mathfun_log_ps(x.v));
}

using std::log10;

inline f32_4 log10(f32_4 x) {
return f32_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
inline float4 log10(float4 x) {
return float4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
}

using std::log2;

inline f32_4 log2(f32_4 x) {
return f32_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
inline float4 log2(float4 x) {
return float4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
}

using std::exp;

inline f32_4 exp(f32_4 x) {
return f32_4(sse_mathfun_exp_ps(x.v));
inline float4 exp(float4 x) {
return float4(sse_mathfun_exp_ps(x.v));
}

using std::sin;

inline f32_4 sin(f32_4 x) {
return f32_4(sse_mathfun_sin_ps(x.v));
inline float4 sin(float4 x) {
return float4(sse_mathfun_sin_ps(x.v));
}

using std::cos;

inline f32_4 cos(f32_4 x) {
return f32_4(sse_mathfun_cos_ps(x.v));
inline float4 cos(float4 x) {
return float4(sse_mathfun_cos_ps(x.v));
}

using std::floor;

inline f32_4 floor(f32_4 a) {
return f32_4(sse_mathfun_floor_ps(a.v));
inline float4 floor(float4 a) {
return float4(sse_mathfun_floor_ps(a.v));
}

using std::ceil;

inline f32_4 ceil(f32_4 a) {
return f32_4(sse_mathfun_ceil_ps(a.v));
inline float4 ceil(float4 a) {
return float4(sse_mathfun_ceil_ps(a.v));
}

using std::round;

inline f32_4 round(f32_4 a) {
return f32_4(sse_mathfun_round_ps(a.v));
inline float4 round(float4 a) {
return float4(sse_mathfun_round_ps(a.v));
}

using std::fmod;

inline f32_4 fmod(f32_4 a, f32_4 b) {
return f32_4(sse_mathfun_fmod_ps(a.v, b.v));
inline float4 fmod(float4 a, float4 b) {
return float4(sse_mathfun_fmod_ps(a.v, b.v));
}

using std::fabs;

inline f32_4 fabs(f32_4 a) {
return f32_4(sse_mathfun_fabs_ps(a.v));
inline float4 fabs(float4 a) {
return float4(sse_mathfun_fabs_ps(a.v));
}

using std::trunc;

inline f32_4 trunc(f32_4 a) {
return f32_4(sse_mathfun_trunc_ps(a.v));
inline float4 trunc(float4 a) {
return float4(sse_mathfun_trunc_ps(a.v));
}

using std::pow;

inline f32_4 pow(f32_4 a, f32_4 b) {
inline float4 pow(float4 a, float4 b) {
return exp(b * log(a));
}

inline f32_4 pow(float a, f32_4 b) {
inline float4 pow(float a, float4 b) {
return exp(b * std::log(a));
}

@@ -129,44 +129,43 @@ inline float ifelse(bool cond, float a, float b) {
}

/** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
inline f32_4 ifelse(f32_4 mask, f32_4 a, f32_4 b) {
inline float4 ifelse(float4 mask, float4 a, float4 b) {
return (a & mask) | andnot(mask, b);
}


/** Returns the approximate reciprocal square root.
Much faster than `1/sqrt(x)`.
*/
inline f32_4 rsqrt(f32_4 x) {
return f32_4(_mm_rsqrt_ps(x.v));
inline float4 rsqrt(float4 x) {
return float4(_mm_rsqrt_ps(x.v));
}

/** Returns the approximate reciprocal.
Much faster than `1/x`.
*/
inline f32_4 rcp(f32_4 x) {
return f32_4(_mm_rcp_ps(x.v));
inline float4 rcp(float4 x) {
return float4(_mm_rcp_ps(x.v));
}

// From math.hpp

using math::clamp;

inline f32_4 clamp(f32_4 x, f32_4 a, f32_4 b) {
inline float4 clamp(float4 x, float4 a, float4 b) {
return fmin(fmax(x, a), b);
}

using math::rescale;

inline f32_4 rescale(f32_4 x, f32_4 xMin, f32_4 xMax, f32_4 yMin, f32_4 yMax) {
inline float4 rescale(float4 x, float4 xMin, float4 xMax, float4 yMin, float4 yMax) {
return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
}

using math::sgn;

inline f32_4 sgn(f32_4 x) {
f32_4 signbit = x & -0.f;
f32_4 nonzero = (x != 0.f);
inline float4 sgn(float4 x) {
float4 signbit = x & -0.f;
float4 nonzero = (x != 0.f);
return signbit | (nonzero & 1.f);
}



+ 61
- 58
include/simd/vector.hpp View File

@@ -28,52 +28,52 @@ TO bit_cast(const FROM &x) {
}


/** Generic class for vector float types.
/** Generic class for vector types.

This class is designed to be used just like `float` scalars, with extra features for handling bitwise logic, conditions, loading, and storing.
This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing.

Usage example:

float a[4], b[4];
f32_4 a = f32_4::load(in);
f32_4 b = 2.f * a / (1 - a);
float4 a = float4::load(in);
float4 b = 2.f * a / (1 - a);
b *= sin(2 * M_PI * a);
b.store(out);
*/
template <int N>
struct f32;
template <typename T, int N>
struct Vector;


/** Wrapper for `__m128` representing an aligned vector of 4 single-precision float values.
*/
template <>
struct f32<4> {
struct Vector<float, 4> {
__m128 v;

/** Constructs an uninitialized vector. */
f32<4>() {}
Vector<float, 4>() {}

/** Constructs a vector from a native `__m128` type. */
f32<4>(__m128 v) : v(v) {}
Vector<float, 4>(__m128 v) : v(v) {}

/** Constructs a vector with all elements set to `x`. */
f32<4>(float x) {
Vector<float, 4>(float x) {
v = _mm_set_ps1(x);
}

/** Constructs a vector from four values. */
f32<4>(float x1, float x2, float x3, float x4) {
Vector<float, 4>(float x1, float x2, float x3, float x4) {
v = _mm_set_ps(x1, x2, x3, x4);
}

/** Reads an array of 4 values. */
static f32<4> load(const float *x) {
return f32<4>(_mm_loadu_ps(x));
/** Returns a vector initialized to zero. */
static Vector<float, 4> zero() {
return Vector<float, 4>(_mm_setzero_ps());
}

/** Returns a vector initialized to zero. */
static f32<4> zero() {
return f32<4>(_mm_setzero_ps());
/** Reads an array of 4 values. */
static Vector<float, 4> load(const float *x) {
return Vector<float, 4>(_mm_loadu_ps(x));
}

/** Writes an array of 4 values. */
@@ -83,29 +83,32 @@ struct f32<4> {
};


typedef f32<4> f32_4;
// Typedefs


typedef Vector<float, 4> float4;


// Operator overloads


/** `a @ b` */
#define DECLARE_F32_4_OPERATOR_INFIX(operator, func) \
inline f32_4 operator(const f32_4 &a, const f32_4 &b) { \
return f32_4(func(a.v, b.v)); \
#define DECLARE_FLOAT4_OPERATOR_INFIX(operator, func) \
inline float4 operator(const float4 &a, const float4 &b) { \
return float4(func(a.v, b.v)); \
}

/** `a @= b` */
#define DECLARE_F32_4_OPERATOR_INCREMENT(operator, opfunc) \
inline f32_4 &operator(f32_4 &a, const f32_4 &b) { \
#define DECLARE_FLOAT4_OPERATOR_INCREMENT(operator, opfunc) \
inline float4 &operator(float4 &a, const float4 &b) { \
a = opfunc(a, b); \
return a; \
}

DECLARE_F32_4_OPERATOR_INFIX(operator+, _mm_add_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator/, _mm_div_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator+, _mm_add_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator-, _mm_sub_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator*, _mm_mul_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator/, _mm_div_ps)

/* Use these to apply logic, bit masks, and conditions to elements.
Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
@@ -116,75 +119,75 @@ Subtract 1 from value if greater than or equal to 1.

x -= (x >= 1.f) & 1.f;
*/
DECLARE_F32_4_OPERATOR_INFIX(operator^, _mm_xor_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator&, _mm_and_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator|, _mm_or_ps)
DECLARE_F32_4_OPERATOR_INCREMENT(operator+=, operator+);
DECLARE_F32_4_OPERATOR_INCREMENT(operator-=, operator-);
DECLARE_F32_4_OPERATOR_INCREMENT(operator*=, operator*);
DECLARE_F32_4_OPERATOR_INCREMENT(operator/=, operator/);
DECLARE_F32_4_OPERATOR_INCREMENT(operator^=, operator^);
DECLARE_F32_4_OPERATOR_INCREMENT(operator&=, operator&);
DECLARE_F32_4_OPERATOR_INCREMENT(operator|=, operator|);
DECLARE_F32_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
DECLARE_F32_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator^, _mm_xor_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator&, _mm_and_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator|, _mm_or_ps)
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator+=, operator+);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator-=, operator-);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator*=, operator*);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator/=, operator/);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator^=, operator^);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator&=, operator&);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator|=, operator|);
DECLARE_FLOAT4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)

/** `+a` */
inline f32_4 operator+(const f32_4 &a) {
inline float4 operator+(const float4 &a) {
return a;
}

/** `-a` */
inline f32_4 operator-(const f32_4 &a) {
inline float4 operator-(const float4 &a) {
return 0.f - a;
}

/** `++a` */
inline f32_4 &operator++(f32_4 &a) {
inline float4 &operator++(float4 &a) {
a += 1.f;
return a;
}

/** `--a` */
inline f32_4 &operator--(f32_4 &a) {
inline float4 &operator--(float4 &a) {
a -= 1.f;
return a;
}

/** `a++` */
inline f32_4 operator++(f32_4 &a, int) {
f32_4 b = a;
inline float4 operator++(float4 &a, int) {
float4 b = a;
++a;
return b;
}

/** `a--` */
inline f32_4 operator--(f32_4 &a, int) {
f32_4 b = a;
inline float4 operator--(float4 &a, int) {
float4 b = a;
--a;
return b;
}

/** `~a` */
inline f32_4 operator~(const f32_4 &a) {
f32_4 mask = f32_4::zero();
inline float4 operator~(const float4 &a) {
float4 mask = float4::zero();
mask = (mask == mask);
return a ^ mask;
}


// helpful idioms
// Instructions not available as operators


/** `~a & b` */
inline f32_4 andnot(const f32_4 &a, const f32_4 &b) {
return f32_4(_mm_andnot_ps(a.v, b.v));
inline float4 andnot(const float4 &a, const float4 &b) {
return float4(_mm_andnot_ps(a.v, b.v));
}




Loading…
Cancel
Save