From 1c523ae37396ee80b3814457cb898d9471a16ebb Mon Sep 17 00:00:00 2001 From: Andrew Belt Date: Fri, 10 Feb 2023 05:16:17 -0500 Subject: [PATCH] Add dsp::polyDirect(), polyHorner(), and polyEstrin(). Rename approxExp2Floor() to exp2Floor() and make it correctly handle negative `x`. Rename approxExp2_taylor5() to exp2_taylor5() and improve polynomial coefficients. --- include/dsp/approx.hpp | 131 +++++++++++++++++++++++++++++++---------- 1 file changed, 101 insertions(+), 30 deletions(-) diff --git a/include/dsp/approx.hpp b/include/dsp/approx.hpp index e9c1da8d..d2d75ee7 100644 --- a/include/dsp/approx.hpp +++ b/include/dsp/approx.hpp @@ -7,9 +7,6 @@ namespace dsp { /* -In this header, function names are divided into two or more parts, separated by "_". -The functionality is the first part, and the approximation methods are the following parts. - Glossary: https://en.wikipedia.org/wiki/Taylor_series https://en.wikipedia.org/wiki/Chebyshev_polynomials @@ -20,50 +17,124 @@ https://en.wikipedia.org/wiki/CORDIC */ -/** Returns 2^floor(x), assuming that x >= 0. -If `xf` is non-NULL, it is set to the fractional part of x. +/** Evaluates a polynomial with coefficients `a[n]` at `x`. +Uses naive direct evaluation. +*/ +template +T polyDirect(const T (&a)[N], T x) { + T y = 0; + T xn = 1; + for (size_t n = 0; n < N; n++) { + y += a[n] * xn; + xn *= x; + } + return y; +} + +/** Evaluates a polynomial with coefficients `a[n]` at `x`. +Uses Horner's method. +https://en.wikipedia.org/wiki/Horner%27s_method +*/ +template +T polyHorner(const T (&a)[N], T x) { + if (N == 0) + return 0; + + T y = a[N - 1]; + for (size_t n = 1; n < N; n++) { + y = a[N - 1 - n] + y * x; + } + return y; +} + +/** Evaluates a polynomial with coefficients `a[n]` at `x`. +Uses Estrin's method. +https://en.wikipedia.org/wiki/Estrin%27s_scheme +*/ +template +T polyEstrin(const T (&a)[N], T x) { + if (N == 0) + return 0; + if (N == 1) + return a[0]; + + const size_t M = (N + 1) / 2; + T b[M]; + for (size_t i = 0; i < M; i++) { + b[i] = a[2 * i]; + if (2 * i + 1 < N) + b[i] += a[2 * i + 1] * x; + } + return polyEstrin(b, x * x); +} + + +/** Returns `2^floor(x)`. +If xf is given, sets it to the fractional part of x. +This is useful in the computation `2^x = 2^floor(x) * 2^frac(x)`. */ template -T approxExp2Floor(T x, T* xf); +T exp2Floor(T x, T* xf); template <> -inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) { - simd::int32_4 xi = x; +inline float exp2Floor(float x, float* xf) { + x += 127; + // x should be positive now, so this always truncates towards -inf. + int32_t xi = x; if (xf) - *xf = x - simd::float_4(xi); - // Set float exponent directly - // https://stackoverflow.com/a/57454528/272642 - simd::int32_4 y = (xi + 127) << 23; - return simd::float_4::cast(y); + *xf = x - xi; + // Set mantissa of float + union { + float yi; + int32_t yii; + }; + yii = xi << 23; + return yi; } template <> -inline float approxExp2Floor(float x, float* xf) { - int32_t xi = x; +inline simd::float_4 exp2Floor(simd::float_4 x, simd::float_4* xf) { + x += 127; + simd::int32_4 xi = x; if (xf) - *xf = x - xi; - int32_t y = (xi + 127) << 23; - return bitCast(y); + *xf = x - simd::float_4(xi); + simd::int32_4 yii = xi << 23; + return simd::float_4::cast(yii); } +/** Deprecated alias of exp2Floor() */ +template +T approxExp2Floor(T x, T* xf) { + return exp2Floor(x, xf); +} -/** Returns 2^x, assuming that x >= 0. -Maximum 0.00024% error. -For float, roughly 3x faster than `std::pow(2.f, x)`. -For float_4, roughly 2x faster than `simd::pow(2.f, x)`. -If negative powers are needed, you may use a lower bound and rescale. +/** Returns 2^x with at most 6e-06 relative error. - approxExp2(x + 20) / 1048576 +Polynomial coefficients are chosen to minimize relative error while maintaining continuity and giving exact values at integer values of `x`. +Thanks to Andy Simper for coefficients. */ template +T exp2_taylor5(T x) { + T xf; + T yi = exp2Floor(x, &xf); + + const T a[] = { + 1.0, + 0.69315169353961, + 0.2401595990753, + 0.055817908652, + 0.008991698010, + 0.001879100722, + }; + T yf = polyHorner(a, xf); + return yi * yf; +} + +/** Deprecated alias of exp2_taylor5() */ +template T approxExp2_taylor5(T x) { - // Use bit-shifting for integer part of x. - T y = approxExp2Floor(x, &x); - // 5th order expansion of 2^x around 0.4752 in Horner form. - // The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers. - y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202))))); - return y; + return exp2_taylor5(x); }