Browse Source

Add float_4 implementation of approxExp2Floor.

tags/v1.1.4
Andrew Belt 5 years ago
parent
commit
a27b57fe10
2 changed files with 30 additions and 5 deletions
  1. +27
    -3
      include/dsp/approx.hpp
  2. +3
    -2
      include/simd/functions.hpp

+ 27
- 3
include/dsp/approx.hpp View File

@@ -18,6 +18,32 @@ https://en.wikipedia.org/wiki/Estrin%27s_scheme
*/


/** Returns 2^floor(x), assuming that x >= 0.
If `xf` is non-NULL, it is set to the fractional part of x.
*/
template <typename T>
T approxExp2Floor(T x, T* xf);

template <>
inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) {
simd::int32_4 xi = x;
if (xf)
*xf = x - simd::float_4(xi);
// Set float exponent directly
// https://stackoverflow.com/a/57454528/272642
simd::int32_4 y = (xi + 127) << 23;
return simd::float_4::cast(y);
}

template <>
inline float approxExp2Floor<float>(float x, float* xf) {
int xi = x;
if (xf)
*xf = x - xi;
return 1 << xi;
}


/** Returns 2^x, assuming that x >= 0.
Maximum 0.00024% error.
Roughly 7x faster than `std::pow(2, x)`.
@@ -29,9 +55,7 @@ If negative powers are needed, you may use a lower bound and rescale.
template <typename T>
T approxExp2_taylor5(T x) {
// Use bit-shifting for integer part of x.
int xi = x;
x -= xi;
T y = 1 << xi;
T y = approxExp2Floor(x, &x);
// 5th order expansion of 2^x around 0.4752 in Horner form.
// The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers.
y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202)))));


+ 3
- 2
include/simd/functions.hpp View File

@@ -204,9 +204,10 @@ inline float_4 sgn(float_4 x) {
return signbit | (nonzero & 1.f);
}

/** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit. */
/** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit.
*/
template <typename T>
inline T movemaskInverse(int a);
T movemaskInverse(int a);

template <>
inline float_4 movemaskInverse<float_4>(int x) {


Loading…
Cancel
Save