Browse Source

Rename float4 to float_4.

tags/v1.0.0
Andrew Belt 6 years ago
parent
commit
26abab6de3
2 changed files with 85 additions and 84 deletions
  1. +42
    -42
      include/simd/functions.hpp
  2. +43
    -42
      include/simd/vector.hpp

+ 42
- 42
include/simd/functions.hpp View File

@@ -24,101 +24,101 @@ Example:


using std::fmax; using std::fmax;


inline float4 fmax(float4 x, float4 b) {
return float4(_mm_max_ps(x.v, b.v));
inline float_4 fmax(float_4 x, float_4 b) {
return float_4(_mm_max_ps(x.v, b.v));
} }


using std::fmin; using std::fmin;


inline float4 fmin(float4 x, float4 b) {
return float4(_mm_min_ps(x.v, b.v));
inline float_4 fmin(float_4 x, float_4 b) {
return float_4(_mm_min_ps(x.v, b.v));
} }


using std::sqrt; using std::sqrt;


inline float4 sqrt(float4 x) {
return float4(_mm_sqrt_ps(x.v));
inline float_4 sqrt(float_4 x) {
return float_4(_mm_sqrt_ps(x.v));
} }


using std::log; using std::log;


inline float4 log(float4 x) {
return float4(sse_mathfun_log_ps(x.v));
inline float_4 log(float_4 x) {
return float_4(sse_mathfun_log_ps(x.v));
} }


using std::log10; using std::log10;


inline float4 log10(float4 x) {
return float4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
inline float_4 log10(float_4 x) {
return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f);
} }


using std::log2; using std::log2;


inline float4 log2(float4 x) {
return float4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
inline float_4 log2(float_4 x) {
return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f);
} }


using std::exp; using std::exp;


inline float4 exp(float4 x) {
return float4(sse_mathfun_exp_ps(x.v));
inline float_4 exp(float_4 x) {
return float_4(sse_mathfun_exp_ps(x.v));
} }


using std::sin; using std::sin;


inline float4 sin(float4 x) {
return float4(sse_mathfun_sin_ps(x.v));
inline float_4 sin(float_4 x) {
return float_4(sse_mathfun_sin_ps(x.v));
} }


using std::cos; using std::cos;


inline float4 cos(float4 x) {
return float4(sse_mathfun_cos_ps(x.v));
inline float_4 cos(float_4 x) {
return float_4(sse_mathfun_cos_ps(x.v));
} }


using std::floor; using std::floor;


inline float4 floor(float4 a) {
return float4(sse_mathfun_floor_ps(a.v));
inline float_4 floor(float_4 a) {
return float_4(sse_mathfun_floor_ps(a.v));
} }


using std::ceil; using std::ceil;


inline float4 ceil(float4 a) {
return float4(sse_mathfun_ceil_ps(a.v));
inline float_4 ceil(float_4 a) {
return float_4(sse_mathfun_ceil_ps(a.v));
} }


using std::round; using std::round;


inline float4 round(float4 a) {
return float4(sse_mathfun_round_ps(a.v));
inline float_4 round(float_4 a) {
return float_4(sse_mathfun_round_ps(a.v));
} }


using std::fmod; using std::fmod;


inline float4 fmod(float4 a, float4 b) {
return float4(sse_mathfun_fmod_ps(a.v, b.v));
inline float_4 fmod(float_4 a, float_4 b) {
return float_4(sse_mathfun_fmod_ps(a.v, b.v));
} }


using std::fabs; using std::fabs;


inline float4 fabs(float4 a) {
return float4(sse_mathfun_fabs_ps(a.v));
inline float_4 fabs(float_4 a) {
return float_4(sse_mathfun_fabs_ps(a.v));
} }


using std::trunc; using std::trunc;


inline float4 trunc(float4 a) {
return float4(sse_mathfun_trunc_ps(a.v));
inline float_4 trunc(float_4 a) {
return float_4(sse_mathfun_trunc_ps(a.v));
} }


using std::pow; using std::pow;


inline float4 pow(float4 a, float4 b) {
inline float_4 pow(float_4 a, float_4 b) {
return exp(b * log(a)); return exp(b * log(a));
} }


inline float4 pow(float a, float4 b) {
inline float_4 pow(float a, float_4 b) {
return exp(b * std::log(a)); return exp(b * std::log(a));
} }


@@ -129,43 +129,43 @@ inline float ifelse(bool cond, float a, float b) {
} }


/** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */ /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */
inline float4 ifelse(float4 mask, float4 a, float4 b) {
inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) {
return (a & mask) | andnot(mask, b); return (a & mask) | andnot(mask, b);
} }


/** Returns the approximate reciprocal square root. /** Returns the approximate reciprocal square root.
Much faster than `1/sqrt(x)`. Much faster than `1/sqrt(x)`.
*/ */
inline float4 rsqrt(float4 x) {
return float4(_mm_rsqrt_ps(x.v));
inline float_4 rsqrt(float_4 x) {
return float_4(_mm_rsqrt_ps(x.v));
} }


/** Returns the approximate reciprocal. /** Returns the approximate reciprocal.
Much faster than `1/x`. Much faster than `1/x`.
*/ */
inline float4 rcp(float4 x) {
return float4(_mm_rcp_ps(x.v));
inline float_4 rcp(float_4 x) {
return float_4(_mm_rcp_ps(x.v));
} }


// From math.hpp // From math.hpp


using math::clamp; using math::clamp;


inline float4 clamp(float4 x, float4 a, float4 b) {
inline float_4 clamp(float_4 x, float_4 a, float_4 b) {
return fmin(fmax(x, a), b); return fmin(fmax(x, a), b);
} }


using math::rescale; using math::rescale;


inline float4 rescale(float4 x, float4 xMin, float4 xMax, float4 yMin, float4 yMax) {
inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) {
return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin); return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin);
} }


using math::sgn; using math::sgn;


inline float4 sgn(float4 x) {
float4 signbit = x & -0.f;
float4 nonzero = (x != 0.f);
inline float_4 sgn(float_4 x) {
float_4 signbit = x & -0.f;
float_4 nonzero = (x != 0.f);
return signbit | (nonzero & 1.f); return signbit | (nonzero & 1.f);
} }




+ 43
- 42
include/simd/vector.hpp View File

@@ -35,8 +35,8 @@ This class is designed to be used just like you use scalars, with extra features
Usage example: Usage example:


float a[4], b[4]; float a[4], b[4];
float4 a = float4::load(in);
float4 b = 2.f * a / (1 - a);
float_4 a = float_4::load(in);
float_4 b = 2.f * a / (1 - a);
b *= sin(2 * M_PI * a); b *= sin(2 * M_PI * a);
b.store(out); b.store(out);
*/ */
@@ -86,29 +86,30 @@ struct Vector<float, 4> {
// Typedefs // Typedefs




typedef Vector<float, 4> float4;
typedef Vector<float, 4> float_4;
// typedef Vector<int32_t, 4> int32_4;




// Operator overloads // Operator overloads




/** `a @ b` */ /** `a @ b` */
#define DECLARE_FLOAT4_OPERATOR_INFIX(operator, func) \
inline float4 operator(const float4 &a, const float4 &b) { \
return float4(func(a.v, b.v)); \
#define DECLARE_FLOAT_4_OPERATOR_INFIX(operator, func) \
inline float_4 operator(const float_4 &a, const float_4 &b) { \
return float_4(func(a.v, b.v)); \
} }


/** `a @= b` */ /** `a @= b` */
#define DECLARE_FLOAT4_OPERATOR_INCREMENT(operator, opfunc) \
inline float4 &operator(float4 &a, const float4 &b) { \
#define DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator, opfunc) \
inline float_4 &operator(float_4 &a, const float_4 &b) { \
a = opfunc(a, b); \ a = opfunc(a, b); \
return a; \ return a; \
} }


DECLARE_FLOAT4_OPERATOR_INFIX(operator+, _mm_add_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator-, _mm_sub_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator*, _mm_mul_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator/, _mm_div_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator+, _mm_add_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator-, _mm_sub_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator*, _mm_mul_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator/, _mm_div_ps)


/* Use these to apply logic, bit masks, and conditions to elements. /* Use these to apply logic, bit masks, and conditions to elements.
Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element.
@@ -119,64 +120,64 @@ Subtract 1 from value if greater than or equal to 1.


x -= (x >= 1.f) & 1.f; x -= (x >= 1.f) & 1.f;
*/ */
DECLARE_FLOAT4_OPERATOR_INFIX(operator^, _mm_xor_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator&, _mm_and_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator|, _mm_or_ps)
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator+=, operator+);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator-=, operator-);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator*=, operator*);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator/=, operator/);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator^=, operator^);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator&=, operator&);
DECLARE_FLOAT4_OPERATOR_INCREMENT(operator|=, operator|);
DECLARE_FLOAT4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
DECLARE_FLOAT4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator^, _mm_xor_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator&, _mm_and_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator|, _mm_or_ps)
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator+=, operator+);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator-=, operator-);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator*=, operator*);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator/=, operator/);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator^=, operator^);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator&=, operator&);
DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator|=, operator|);
DECLARE_FLOAT_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps)
DECLARE_FLOAT_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps)


/** `+a` */ /** `+a` */
inline float4 operator+(const float4 &a) {
inline float_4 operator+(const float_4 &a) {
return a; return a;
} }


/** `-a` */ /** `-a` */
inline float4 operator-(const float4 &a) {
inline float_4 operator-(const float_4 &a) {
return 0.f - a; return 0.f - a;
} }


/** `++a` */ /** `++a` */
inline float4 &operator++(float4 &a) {
inline float_4 &operator++(float_4 &a) {
a += 1.f; a += 1.f;
return a; return a;
} }


/** `--a` */ /** `--a` */
inline float4 &operator--(float4 &a) {
inline float_4 &operator--(float_4 &a) {
a -= 1.f; a -= 1.f;
return a; return a;
} }


/** `a++` */ /** `a++` */
inline float4 operator++(float4 &a, int) {
float4 b = a;
inline float_4 operator++(float_4 &a, int) {
float_4 b = a;
++a; ++a;
return b; return b;
} }


/** `a--` */ /** `a--` */
inline float4 operator--(float4 &a, int) {
float4 b = a;
inline float_4 operator--(float_4 &a, int) {
float_4 b = a;
--a; --a;
return b; return b;
} }


/** `~a` */ /** `~a` */
inline float4 operator~(const float4 &a) {
float4 mask = float4::zero();
inline float_4 operator~(const float_4 &a) {
float_4 mask = float_4::zero();
mask = (mask == mask); mask = (mask == mask);
return a ^ mask; return a ^ mask;
} }
@@ -186,8 +187,8 @@ inline float4 operator~(const float4 &a) {




/** `~a & b` */ /** `~a & b` */
inline float4 andnot(const float4 &a, const float4 &b) {
return float4(_mm_andnot_ps(a.v, b.v));
inline float_4 andnot(const float_4 &a, const float_4 &b) {
return float_4(_mm_andnot_ps(a.v, b.v));
} }






Loading…
Cancel
Save