| @@ -24,101 +24,101 @@ Example: | |||||
| using std::fmax; | using std::fmax; | ||||
| inline float4 fmax(float4 x, float4 b) { | |||||
| return float4(_mm_max_ps(x.v, b.v)); | |||||
| inline float_4 fmax(float_4 x, float_4 b) { | |||||
| return float_4(_mm_max_ps(x.v, b.v)); | |||||
| } | } | ||||
| using std::fmin; | using std::fmin; | ||||
| inline float4 fmin(float4 x, float4 b) { | |||||
| return float4(_mm_min_ps(x.v, b.v)); | |||||
| inline float_4 fmin(float_4 x, float_4 b) { | |||||
| return float_4(_mm_min_ps(x.v, b.v)); | |||||
| } | } | ||||
| using std::sqrt; | using std::sqrt; | ||||
| inline float4 sqrt(float4 x) { | |||||
| return float4(_mm_sqrt_ps(x.v)); | |||||
| inline float_4 sqrt(float_4 x) { | |||||
| return float_4(_mm_sqrt_ps(x.v)); | |||||
| } | } | ||||
| using std::log; | using std::log; | ||||
| inline float4 log(float4 x) { | |||||
| return float4(sse_mathfun_log_ps(x.v)); | |||||
| inline float_4 log(float_4 x) { | |||||
| return float_4(sse_mathfun_log_ps(x.v)); | |||||
| } | } | ||||
| using std::log10; | using std::log10; | ||||
| inline float4 log10(float4 x) { | |||||
| return float4(sse_mathfun_log_ps(x.v)) / std::log(10.f); | |||||
| inline float_4 log10(float_4 x) { | |||||
| return float_4(sse_mathfun_log_ps(x.v)) / std::log(10.f); | |||||
| } | } | ||||
| using std::log2; | using std::log2; | ||||
| inline float4 log2(float4 x) { | |||||
| return float4(sse_mathfun_log_ps(x.v)) / std::log(2.f); | |||||
| inline float_4 log2(float_4 x) { | |||||
| return float_4(sse_mathfun_log_ps(x.v)) / std::log(2.f); | |||||
| } | } | ||||
| using std::exp; | using std::exp; | ||||
| inline float4 exp(float4 x) { | |||||
| return float4(sse_mathfun_exp_ps(x.v)); | |||||
| inline float_4 exp(float_4 x) { | |||||
| return float_4(sse_mathfun_exp_ps(x.v)); | |||||
| } | } | ||||
| using std::sin; | using std::sin; | ||||
| inline float4 sin(float4 x) { | |||||
| return float4(sse_mathfun_sin_ps(x.v)); | |||||
| inline float_4 sin(float_4 x) { | |||||
| return float_4(sse_mathfun_sin_ps(x.v)); | |||||
| } | } | ||||
| using std::cos; | using std::cos; | ||||
| inline float4 cos(float4 x) { | |||||
| return float4(sse_mathfun_cos_ps(x.v)); | |||||
| inline float_4 cos(float_4 x) { | |||||
| return float_4(sse_mathfun_cos_ps(x.v)); | |||||
| } | } | ||||
| using std::floor; | using std::floor; | ||||
| inline float4 floor(float4 a) { | |||||
| return float4(sse_mathfun_floor_ps(a.v)); | |||||
| inline float_4 floor(float_4 a) { | |||||
| return float_4(sse_mathfun_floor_ps(a.v)); | |||||
| } | } | ||||
| using std::ceil; | using std::ceil; | ||||
| inline float4 ceil(float4 a) { | |||||
| return float4(sse_mathfun_ceil_ps(a.v)); | |||||
| inline float_4 ceil(float_4 a) { | |||||
| return float_4(sse_mathfun_ceil_ps(a.v)); | |||||
| } | } | ||||
| using std::round; | using std::round; | ||||
| inline float4 round(float4 a) { | |||||
| return float4(sse_mathfun_round_ps(a.v)); | |||||
| inline float_4 round(float_4 a) { | |||||
| return float_4(sse_mathfun_round_ps(a.v)); | |||||
| } | } | ||||
| using std::fmod; | using std::fmod; | ||||
| inline float4 fmod(float4 a, float4 b) { | |||||
| return float4(sse_mathfun_fmod_ps(a.v, b.v)); | |||||
| inline float_4 fmod(float_4 a, float_4 b) { | |||||
| return float_4(sse_mathfun_fmod_ps(a.v, b.v)); | |||||
| } | } | ||||
| using std::fabs; | using std::fabs; | ||||
| inline float4 fabs(float4 a) { | |||||
| return float4(sse_mathfun_fabs_ps(a.v)); | |||||
| inline float_4 fabs(float_4 a) { | |||||
| return float_4(sse_mathfun_fabs_ps(a.v)); | |||||
| } | } | ||||
| using std::trunc; | using std::trunc; | ||||
| inline float4 trunc(float4 a) { | |||||
| return float4(sse_mathfun_trunc_ps(a.v)); | |||||
| inline float_4 trunc(float_4 a) { | |||||
| return float_4(sse_mathfun_trunc_ps(a.v)); | |||||
| } | } | ||||
| using std::pow; | using std::pow; | ||||
| inline float4 pow(float4 a, float4 b) { | |||||
| inline float_4 pow(float_4 a, float_4 b) { | |||||
| return exp(b * log(a)); | return exp(b * log(a)); | ||||
| } | } | ||||
| inline float4 pow(float a, float4 b) { | |||||
| inline float_4 pow(float a, float_4 b) { | |||||
| return exp(b * std::log(a)); | return exp(b * std::log(a)); | ||||
| } | } | ||||
| @@ -129,43 +129,43 @@ inline float ifelse(bool cond, float a, float b) { | |||||
| } | } | ||||
| /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */ | /** Given a mask, returns a if mask is 0xffffffff per element, b if mask is 0x00000000 */ | ||||
| inline float4 ifelse(float4 mask, float4 a, float4 b) { | |||||
| inline float_4 ifelse(float_4 mask, float_4 a, float_4 b) { | |||||
| return (a & mask) | andnot(mask, b); | return (a & mask) | andnot(mask, b); | ||||
| } | } | ||||
| /** Returns the approximate reciprocal square root. | /** Returns the approximate reciprocal square root. | ||||
| Much faster than `1/sqrt(x)`. | Much faster than `1/sqrt(x)`. | ||||
| */ | */ | ||||
| inline float4 rsqrt(float4 x) { | |||||
| return float4(_mm_rsqrt_ps(x.v)); | |||||
| inline float_4 rsqrt(float_4 x) { | |||||
| return float_4(_mm_rsqrt_ps(x.v)); | |||||
| } | } | ||||
| /** Returns the approximate reciprocal. | /** Returns the approximate reciprocal. | ||||
| Much faster than `1/x`. | Much faster than `1/x`. | ||||
| */ | */ | ||||
| inline float4 rcp(float4 x) { | |||||
| return float4(_mm_rcp_ps(x.v)); | |||||
| inline float_4 rcp(float_4 x) { | |||||
| return float_4(_mm_rcp_ps(x.v)); | |||||
| } | } | ||||
| // From math.hpp | // From math.hpp | ||||
| using math::clamp; | using math::clamp; | ||||
| inline float4 clamp(float4 x, float4 a, float4 b) { | |||||
| inline float_4 clamp(float_4 x, float_4 a, float_4 b) { | |||||
| return fmin(fmax(x, a), b); | return fmin(fmax(x, a), b); | ||||
| } | } | ||||
| using math::rescale; | using math::rescale; | ||||
| inline float4 rescale(float4 x, float4 xMin, float4 xMax, float4 yMin, float4 yMax) { | |||||
| inline float_4 rescale(float_4 x, float_4 xMin, float_4 xMax, float_4 yMin, float_4 yMax) { | |||||
| return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin); | return yMin + (x - xMin) / (xMax - xMin) * (yMax - yMin); | ||||
| } | } | ||||
| using math::sgn; | using math::sgn; | ||||
| inline float4 sgn(float4 x) { | |||||
| float4 signbit = x & -0.f; | |||||
| float4 nonzero = (x != 0.f); | |||||
| inline float_4 sgn(float_4 x) { | |||||
| float_4 signbit = x & -0.f; | |||||
| float_4 nonzero = (x != 0.f); | |||||
| return signbit | (nonzero & 1.f); | return signbit | (nonzero & 1.f); | ||||
| } | } | ||||
| @@ -35,8 +35,8 @@ This class is designed to be used just like you use scalars, with extra features | |||||
| Usage example: | Usage example: | ||||
| float a[4], b[4]; | float a[4], b[4]; | ||||
| float4 a = float4::load(in); | |||||
| float4 b = 2.f * a / (1 - a); | |||||
| float_4 a = float_4::load(in); | |||||
| float_4 b = 2.f * a / (1 - a); | |||||
| b *= sin(2 * M_PI * a); | b *= sin(2 * M_PI * a); | ||||
| b.store(out); | b.store(out); | ||||
| */ | */ | ||||
| @@ -86,29 +86,30 @@ struct Vector<float, 4> { | |||||
| // Typedefs | // Typedefs | ||||
| typedef Vector<float, 4> float4; | |||||
| typedef Vector<float, 4> float_4; | |||||
| // typedef Vector<int32_t, 4> int32_4; | |||||
| // Operator overloads | // Operator overloads | ||||
| /** `a @ b` */ | /** `a @ b` */ | ||||
| #define DECLARE_FLOAT4_OPERATOR_INFIX(operator, func) \ | |||||
| inline float4 operator(const float4 &a, const float4 &b) { \ | |||||
| return float4(func(a.v, b.v)); \ | |||||
| #define DECLARE_FLOAT_4_OPERATOR_INFIX(operator, func) \ | |||||
| inline float_4 operator(const float_4 &a, const float_4 &b) { \ | |||||
| return float_4(func(a.v, b.v)); \ | |||||
| } | } | ||||
| /** `a @= b` */ | /** `a @= b` */ | ||||
| #define DECLARE_FLOAT4_OPERATOR_INCREMENT(operator, opfunc) \ | |||||
| inline float4 &operator(float4 &a, const float4 &b) { \ | |||||
| #define DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator, opfunc) \ | |||||
| inline float_4 &operator(float_4 &a, const float_4 &b) { \ | |||||
| a = opfunc(a, b); \ | a = opfunc(a, b); \ | ||||
| return a; \ | return a; \ | ||||
| } | } | ||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator+, _mm_add_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator-, _mm_sub_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator*, _mm_mul_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator/, _mm_div_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator+, _mm_add_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator-, _mm_sub_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator*, _mm_mul_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator/, _mm_div_ps) | |||||
| /* Use these to apply logic, bit masks, and conditions to elements. | /* Use these to apply logic, bit masks, and conditions to elements. | ||||
| Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. | Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. | ||||
| @@ -119,64 +120,64 @@ Subtract 1 from value if greater than or equal to 1. | |||||
| x -= (x >= 1.f) & 1.f; | x -= (x >= 1.f) & 1.f; | ||||
| */ | */ | ||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator^, _mm_xor_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator&, _mm_and_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator|, _mm_or_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator+=, operator+); | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator-=, operator-); | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator*=, operator*); | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator/=, operator/); | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator^=, operator^); | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator&=, operator&); | |||||
| DECLARE_FLOAT4_OPERATOR_INCREMENT(operator|=, operator|); | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator<=, _mm_cmple_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator<, _mm_cmplt_ps) | |||||
| DECLARE_FLOAT4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator^, _mm_xor_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator&, _mm_and_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator|, _mm_or_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator+=, operator+); | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator-=, operator-); | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator*=, operator*); | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator/=, operator/); | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator^=, operator^); | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator&=, operator&); | |||||
| DECLARE_FLOAT_4_OPERATOR_INCREMENT(operator|=, operator|); | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator==, _mm_cmpeq_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator>=, _mm_cmpge_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator>, _mm_cmpgt_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator<=, _mm_cmple_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator<, _mm_cmplt_ps) | |||||
| DECLARE_FLOAT_4_OPERATOR_INFIX(operator!=, _mm_cmpneq_ps) | |||||
| /** `+a` */ | /** `+a` */ | ||||
| inline float4 operator+(const float4 &a) { | |||||
| inline float_4 operator+(const float_4 &a) { | |||||
| return a; | return a; | ||||
| } | } | ||||
| /** `-a` */ | /** `-a` */ | ||||
| inline float4 operator-(const float4 &a) { | |||||
| inline float_4 operator-(const float_4 &a) { | |||||
| return 0.f - a; | return 0.f - a; | ||||
| } | } | ||||
| /** `++a` */ | /** `++a` */ | ||||
| inline float4 &operator++(float4 &a) { | |||||
| inline float_4 &operator++(float_4 &a) { | |||||
| a += 1.f; | a += 1.f; | ||||
| return a; | return a; | ||||
| } | } | ||||
| /** `--a` */ | /** `--a` */ | ||||
| inline float4 &operator--(float4 &a) { | |||||
| inline float_4 &operator--(float_4 &a) { | |||||
| a -= 1.f; | a -= 1.f; | ||||
| return a; | return a; | ||||
| } | } | ||||
| /** `a++` */ | /** `a++` */ | ||||
| inline float4 operator++(float4 &a, int) { | |||||
| float4 b = a; | |||||
| inline float_4 operator++(float_4 &a, int) { | |||||
| float_4 b = a; | |||||
| ++a; | ++a; | ||||
| return b; | return b; | ||||
| } | } | ||||
| /** `a--` */ | /** `a--` */ | ||||
| inline float4 operator--(float4 &a, int) { | |||||
| float4 b = a; | |||||
| inline float_4 operator--(float_4 &a, int) { | |||||
| float_4 b = a; | |||||
| --a; | --a; | ||||
| return b; | return b; | ||||
| } | } | ||||
| /** `~a` */ | /** `~a` */ | ||||
| inline float4 operator~(const float4 &a) { | |||||
| float4 mask = float4::zero(); | |||||
| inline float_4 operator~(const float_4 &a) { | |||||
| float_4 mask = float_4::zero(); | |||||
| mask = (mask == mask); | mask = (mask == mask); | ||||
| return a ^ mask; | return a ^ mask; | ||||
| } | } | ||||
| @@ -186,8 +187,8 @@ inline float4 operator~(const float4 &a) { | |||||
| /** `~a & b` */ | /** `~a & b` */ | ||||
| inline float4 andnot(const float4 &a, const float4 &b) { | |||||
| return float4(_mm_andnot_ps(a.v, b.v)); | |||||
| inline float_4 andnot(const float_4 &a, const float_4 &b) { | |||||
| return float_4(_mm_andnot_ps(a.v, b.v)); | |||||
| } | } | ||||