Browse Source

Use sse4.2 roundps intrinsic in simd::trunc, floor, ceil, and round. Use floor() in fmod() instead of trunc().

tags/v2.4.1
Andrew Belt 1 year ago
parent
commit
993a27efc9
1 changed files with 5 additions and 11 deletions
  1. +5
    -11
      include/simd/functions.hpp

+ 5
- 11
include/simd/functions.hpp View File

@@ -161,37 +161,31 @@ inline float_4 atan2(float_4 x, float_4 y) {
using std::trunc; using std::trunc;


inline float_4 trunc(float_4 a) { inline float_4 trunc(float_4 a) {
return float_4(_mm_cvtepi32_ps(_mm_cvttps_epi32(a.v)));
return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
} }


using std::floor; using std::floor;


inline float_4 floor(float_4 a) { inline float_4 floor(float_4 a) {
float_4 b = trunc(a);
b -= (b > a) & 1.f;
return b;
return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC));
} }


using std::ceil; using std::ceil;


inline float_4 ceil(float_4 a) { inline float_4 ceil(float_4 a) {
float_4 b = trunc(a);
b += (b < a) & 1.f;
return b;
return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC));
} }


using std::round; using std::round;


inline float_4 round(float_4 a) { inline float_4 round(float_4 a) {
a += ifelse(a < 0, -0.5f, 0.5f);
float_4 b = trunc(a);
return b;
return float_4(_mm_round_ps(a.v, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
} }


using std::fmod; using std::fmod;


inline float_4 fmod(float_4 a, float_4 b) { inline float_4 fmod(float_4 a, float_4 b) {
return a - trunc(a / b) * b;
return a - floor(a / b) * b;
} }


using std::hypot; using std::hypot;


Loading…
Cancel
Save