From a27b57fe10af20a93a9ad3e77daa4279f82fba45 Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Sun, 11 Aug 2019 21:42:13 -0400
Subject: [PATCH] Add float_4 implementation of approxExp2Floor.

---
 include/dsp/approx.hpp     | 30 +++++++++++++++++++++++++++---
 include/simd/functions.hpp |  5 +++--
 2 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/include/dsp/approx.hpp b/include/dsp/approx.hpp
index e4fb6d91..22b9ef6c 100644
--- a/include/dsp/approx.hpp
+++ b/include/dsp/approx.hpp
@@ -18,6 +18,32 @@ https://en.wikipedia.org/wiki/Estrin%27s_scheme
 */
 
 
+/** Returns 2^floor(x), assuming that x >= 0.
+If `xf` is non-NULL, it is set to the fractional part of x.
+*/
+template <typename T>
+T approxExp2Floor(T x, T* xf);
+
+template <>
+inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) {
+	simd::int32_4 xi = x;
+	if (xf)
+		*xf = x - simd::float_4(xi);
+	// Set float exponent directly
+	// https://stackoverflow.com/a/57454528/272642
+	simd::int32_4 y = (xi + 127) << 23;
+	return simd::float_4::cast(y);
+}
+
+template <>
+inline float approxExp2Floor<float>(float x, float* xf) {
+	int xi = x;
+	if (xf)
+		*xf = x - xi;
+	return 1 << xi;
+}
+
+
 /** Returns 2^x, assuming that x >= 0.
 Maximum 0.00024% error.
 Roughly 7x faster than `std::pow(2, x)`.
@@ -29,9 +55,7 @@ If negative powers are needed, you may use a lower bound and rescale.
 template <typename T>
 T approxExp2_taylor5(T x) {
 	// Use bit-shifting for integer part of x.
-	int xi = x;
-	x -= xi;
-	T y = 1 << xi;
+	T y = approxExp2Floor(x, &x);
 	// 5th order expansion of 2^x around 0.4752 in Horner form.
 	// The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers.
 	y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202)))));
diff --git a/include/simd/functions.hpp b/include/simd/functions.hpp
index 63c9e298..f58b3856 100644
--- a/include/simd/functions.hpp
+++ b/include/simd/functions.hpp
@@ -204,9 +204,10 @@ inline float_4 sgn(float_4 x) {
 	return signbit | (nonzero & 1.f);
 }
 
-/** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit. */
+/** Given a mask `a`, returns a vector with each element either 0's or 1's depending on the mask bit.
+*/
 template <typename T>
-inline T movemaskInverse(int a);
+T movemaskInverse(int a);
 
 template <>
 inline float_4 movemaskInverse<float_4>(int x) {