From 1c523ae37396ee80b3814457cb898d9471a16ebb Mon Sep 17 00:00:00 2001
From: Andrew Belt <andrewpbelt@gmail.com>
Date: Fri, 10 Feb 2023 05:16:17 -0500
Subject: [PATCH] Add dsp::polyDirect(), polyHorner(), and polyEstrin(). Rename
 approxExp2Floor() to exp2Floor() and make it correctly handle negative `x`.
 Rename approxExp2_taylor5() to exp2_taylor5() and improve polynomial
 coefficients.

---
 include/dsp/approx.hpp | 131 +++++++++++++++++++++++++++++++----------
 1 file changed, 101 insertions(+), 30 deletions(-)
diff --git a/include/dsp/approx.hpp b/include/dsp/approx.hpp
index e9c1da8d..d2d75ee7 100644
--- a/include/dsp/approx.hpp
+++ b/include/dsp/approx.hpp
@@ -7,9 +7,6 @@ namespace dsp {
 
 
 /*
-In this header, function names are divided into two or more parts, separated by "_".
-The functionality is the first part, and the approximation methods are the following parts.
-
 Glossary:
 https://en.wikipedia.org/wiki/Taylor_series
 https://en.wikipedia.org/wiki/Chebyshev_polynomials
@@ -20,50 +17,124 @@ https://en.wikipedia.org/wiki/CORDIC
 */
 
 
-/** Returns 2^floor(x), assuming that x >= 0.
-If `xf` is non-NULL, it is set to the fractional part of x.
+/** Evaluates a polynomial with coefficients `a[n]` at `x`.
+Uses naive direct evaluation.
+*/
+template <typename T, size_t N>
+T polyDirect(const T (&a)[N], T x) {
+	T y = 0;
+	T xn = 1;
+	for (size_t n = 0; n < N; n++) {
+		y += a[n] * xn;
+		xn *= x;
+	}
+	return y;
+}
+
+/** Evaluates a polynomial with coefficients `a[n]` at `x`.
+Uses Horner's method.
+https://en.wikipedia.org/wiki/Horner%27s_method
+*/
+template <typename T, size_t N>
+T polyHorner(const T (&a)[N], T x) {
+	if (N == 0)
+		return 0;
+
+	T y = a[N - 1];
+	for (size_t n = 1; n < N; n++) {
+		y = a[N - 1 - n] + y * x;
+	}
+	return y;
+}
+
+/** Evaluates a polynomial with coefficients `a[n]` at `x`.
+Uses Estrin's method.
+https://en.wikipedia.org/wiki/Estrin%27s_scheme
+*/
+template <typename T, size_t N>
+T polyEstrin(const T (&a)[N], T x) {
+	if (N == 0)
+		return 0;
+	if (N == 1)
+		return a[0];
+
+	const size_t M = (N + 1) / 2;
+	T b[M];
+	for (size_t i = 0; i < M; i++) {
+		b[i] = a[2 * i];
+		if (2 * i + 1 < N)
+			b[i] += a[2 * i + 1] * x;
+	}
+	return polyEstrin(b, x * x);
+}
+
+
+/** Returns `2^floor(x)`.
+If xf is given, sets it to the fractional part of x.
+This is useful in the computation `2^x = 2^floor(x) * 2^frac(x)`.
 */
 template <typename T>
-T approxExp2Floor(T x, T* xf);
+T exp2Floor(T x, T* xf);
 
 template <>
-inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) {
-	simd::int32_4 xi = x;
+inline float exp2Floor(float x, float* xf) {
+	x += 127;
+	// x should be positive now, so this always truncates towards -inf.
+	int32_t xi = x;
 	if (xf)
-		*xf = x - simd::float_4(xi);
-	// Set float exponent directly
-	// https://stackoverflow.com/a/57454528/272642
-	simd::int32_4 y = (xi + 127) << 23;
-	return simd::float_4::cast(y);
+		*xf = x - xi;
+	// Set mantissa of float
+	union {
+		float yi;
+		int32_t yii;
+	};
+	yii = xi << 23;
+	return yi;
 }
 
 template <>
-inline float approxExp2Floor(float x, float* xf) {
-	int32_t xi = x;
+inline simd::float_4 exp2Floor(simd::float_4 x, simd::float_4* xf) {
+	x += 127;
+	simd::int32_4 xi = x;
 	if (xf)
-		*xf = x - xi;
-	int32_t y = (xi + 127) << 23;
-	return bitCast<float>(y);
+		*xf = x - simd::float_4(xi);
+	simd::int32_4 yii = xi << 23;
+	return simd::float_4::cast(yii);
 }
 
+/** Deprecated alias of exp2Floor() */
+template <typename T>
+T approxExp2Floor(T x, T* xf) {
+	return exp2Floor(x, xf);
+}
 
-/** Returns 2^x, assuming that x >= 0.
-Maximum 0.00024% error.
-For float, roughly 3x faster than `std::pow(2.f, x)`.
-For float_4, roughly 2x faster than `simd::pow(2.f, x)`.
 
-If negative powers are needed, you may use a lower bound and rescale.
+/** Returns 2^x with at most 6e-06 relative error.
 
-	approxExp2(x + 20) / 1048576
+Polynomial coefficients are chosen to minimize relative error while maintaining continuity and giving exact values at integer values of `x`.
+Thanks to Andy Simper for coefficients.
 */
 template <typename T>
+T exp2_taylor5(T x) {
+	T xf;
+	T yi = exp2Floor(x, &xf);
+
+	const T a[] = {
+		1.0,
+		0.69315169353961,
+		0.2401595990753,
+		0.055817908652,
+		0.008991698010,
+		0.001879100722,
+	};
+	T yf = polyHorner(a, xf);
+	return yi * yf;
+}
+
+/** Deprecated alias of exp2_taylor5() */
+template <typename T>
 T approxExp2_taylor5(T x) {
-	// Use bit-shifting for integer part of x.
-	T y = approxExp2Floor(x, &x);
-	// 5th order expansion of 2^x around 0.4752 in Horner form.
-	// The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers.
-	y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202)))));
-	return y;
+	return exp2_taylor5(x);
 }