Add dsp::polyDirect(), polyHorner(), and polyEstrin(). Rename approxExp2Floor() to exp2Floor() and make it correctly handle negative `x`. Rename approxExp2_taylor5() to exp2_taylor5() and improve polynomial coefficients.

2 years ago · 1c523ae373
--- a/include/dsp/approx.hpp
+++ b/include/dsp/approx.hpp
@@ -7,9 +7,6 @@ namespace dsp {


 /*
 In this header, function names are divided into two or more parts, separated by "_".
 The functionality is the first part, and the approximation methods are the following parts.

 Glossary:
 https://en.wikipedia.org/wiki/Taylor_series
 https://en.wikipedia.org/wiki/Chebyshev_polynomials
@@ -20,50 +17,124 @@ https://en.wikipedia.org/wiki/CORDIC
 */


 /** Returns 2^floor(x), assuming that x >= 0.
 If `xf` is non-NULL, it is set to the fractional part of x.
 /** Evaluates a polynomial with coefficients `a[n]` at `x`.
 Uses naive direct evaluation.
 */
 template <typename T, size_t N>
 T polyDirect(const T (&a)[N], T x) {
 	T y = 0;
 	T xn = 1;
 	for (size_t n = 0; n < N; n++) {
 		y += a[n] * xn;
 		xn *= x;
 	}
 	return y;
 }

 /** Evaluates a polynomial with coefficients `a[n]` at `x`.
 Uses Horner's method.
 https://en.wikipedia.org/wiki/Horner%27s_method
 */
 template <typename T, size_t N>
 T polyHorner(const T (&a)[N], T x) {
 	if (N == 0)
 		return 0;

 	T y = a[N - 1];
 	for (size_t n = 1; n < N; n++) {
 		y = a[N - 1 - n] + y * x;
 	}
 	return y;
 }

 /** Evaluates a polynomial with coefficients `a[n]` at `x`.
 Uses Estrin's method.
 https://en.wikipedia.org/wiki/Estrin%27s_scheme
 */
 template <typename T, size_t N>
 T polyEstrin(const T (&a)[N], T x) {
 	if (N == 0)
 		return 0;
 	if (N == 1)
 		return a[0];

 	const size_t M = (N + 1) / 2;
 	T b[M];
 	for (size_t i = 0; i < M; i++) {
 		b[i] = a[2 * i];
 		if (2 * i + 1 < N)
 			b[i] += a[2 * i + 1] * x;
 	}
 	return polyEstrin(b, x * x);
 }


 /** Returns `2^floor(x)`.
 If xf is given, sets it to the fractional part of x.
 This is useful in the computation `2^x = 2^floor(x) * 2^frac(x)`.
 */
 template <typename T>
 T approxExp2Floor(T x, T* xf);
 T exp2Floor(T x, T* xf);

 template <>
 inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) {
 	simd::int32_4 xi = x;
 inline float exp2Floor(float x, float* xf) {
 	x += 127;
 	// x should be positive now, so this always truncates towards -inf.
 	int32_t xi = x;
 	if (xf)
 		*xf = x - simd::float_4(xi);
 	// Set float exponent directly
 	// https://stackoverflow.com/a/57454528/272642
 	simd::int32_4 y = (xi + 127) << 23;
 	return simd::float_4::cast(y);
 		*xf = x - xi;
 	// Set mantissa of float
 	union {
 		float yi;
 		int32_t yii;
 	};
 	yii = xi << 23;
 	return yi;
 }

 template <>
 inline float approxExp2Floor(float x, float* xf) {
 	int32_t xi = x;
 inline simd::float_4 exp2Floor(simd::float_4 x, simd::float_4* xf) {
 	x += 127;
 	simd::int32_4 xi = x;
 	if (xf)
 		*xf = x - xi;
 	int32_t y = (xi + 127) << 23;
 	return bitCast<float>(y);
 		*xf = x - simd::float_4(xi);
 	simd::int32_4 yii = xi << 23;
 	return simd::float_4::cast(yii);
 }

 /** Deprecated alias of exp2Floor() */
 template <typename T>
 T approxExp2Floor(T x, T* xf) {
 	return exp2Floor(x, xf);
 }

 /** Returns 2^x, assuming that x >= 0.
 Maximum 0.00024% error.
 For float, roughly 3x faster than `std::pow(2.f, x)`.
 For float_4, roughly 2x faster than `simd::pow(2.f, x)`.

 If negative powers are needed, you may use a lower bound and rescale.
 /** Returns 2^x with at most 6e-06 relative error.

 	approxExp2(x + 20) / 1048576
 Polynomial coefficients are chosen to minimize relative error while maintaining continuity and giving exact values at integer values of `x`.
 Thanks to Andy Simper for coefficients.
 */
 template <typename T>
 T exp2_taylor5(T x) {
 	T xf;
 	T yi = exp2Floor(x, &xf);

 	const T a[] = {
 		1.0,
 		0.69315169353961,
 		0.2401595990753,
 		0.055817908652,
 		0.008991698010,
 		0.001879100722,
 	};
 	T yf = polyHorner(a, xf);
 	return yi * yf;
 }

 /** Deprecated alias of exp2_taylor5() */
 template <typename T>
 T approxExp2_taylor5(T x) {
 	// Use bit-shifting for integer part of x.
 	T y = approxExp2Floor(x, &x);
 	// 5th order expansion of 2^x around 0.4752 in Horner form.
 	// The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers.
 	y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202)))));
 	return y;
 	return exp2_taylor5(x);
 }