From a27b57fe10af20a93a9ad3e77daa4279f82fba45 Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Sun, 11 Aug 2019 21:42:13 -0400 Subject: [PATCH] Add float_4 implementation of approxExp2Floor. --- include/dsp/approx.hpp | 30 +++++++++++++++++++++++++++--- include/simd/functions.hpp | 5 +++-- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/include/dsp/approx.hpp b/include/dsp/approx.hpp index e4fb6d91..22b9ef6c 100644 --- a/include/dsp/approx.hpp +++ b/include/dsp/approx.hpp @@ -18,6 +18,32 @@ https://en.wikipedia.org/wiki/Estrin%27s_scheme */ +/** Returns 2^floor(x), assuming that x >= 0. +If `xf` is non-NULL, it is set to the fractional part of x. +*/ +template +T approxExp2Floor(T x, T* xf); + +template <> +inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) { + simd::int32_4 xi = x; + if (xf) + *xf = x - simd::float_4(xi); + // Set float exponent directly + // https://stackoverflow.com/a/57454528/272642 + simd::int32_4 y = (xi + 127) << 23; + return simd::float_4::cast(y); +} + +template <> +inline float approxExp2Floor(float x, float* xf) { + int xi = x; + if (xf) + *xf = x - xi; + return 1 << xi; +} + + /** Returns 2^x, assuming that x >= 0. Maximum 0.00024% error. Roughly 7x faster than `std::pow(2, x)`. @@ -29,9 +55,7 @@ If negative powers are needed, you may use a lower bound and rescale. template T approxExp2_taylor5(T x) { // Use bit-shifting for integer part of x. - int xi = x; - x -= xi; - T y = 1 << xi; + T y = approxExp2Floor(x, &x); // 5th order expansion of 2^x around 0.4752 in Horner form. // The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers. y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202))))); diff --git a/include/simd/functions.hpp b/include/simd/functions.hpp index 63c9e298..f58b3856 100644 --- a/include/simd/functions.hpp +++ b/include/simd/functions.hpp @@ -204,9 +204,10 @@ inline float_4 sgn(float_4 x) { return signbit | (nonzero & 1.f); } -/** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit. */ +/** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit. +*/ template -inline T movemaskInverse(int a); +T movemaskInverse(int a); template <> inline float_4 movemaskInverse(int x) {