| @@ -7,9 +7,6 @@ namespace dsp { | |||
| /* | |||
| In this header, function names are divided into two or more parts, separated by "_". | |||
| The functionality is the first part, and the approximation methods are the following parts. | |||
| Glossary: | |||
| https://en.wikipedia.org/wiki/Taylor_series | |||
| https://en.wikipedia.org/wiki/Chebyshev_polynomials | |||
| @@ -20,50 +17,124 @@ https://en.wikipedia.org/wiki/CORDIC | |||
| */ | |||
| /** Returns 2^floor(x), assuming that x >= 0. | |||
| If `xf` is non-NULL, it is set to the fractional part of x. | |||
| /** Evaluates a polynomial with coefficients `a[n]` at `x`. | |||
| Uses naive direct evaluation. | |||
| */ | |||
| template <typename T, size_t N> | |||
| T polyDirect(const T (&a)[N], T x) { | |||
| T y = 0; | |||
| T xn = 1; | |||
| for (size_t n = 0; n < N; n++) { | |||
| y += a[n] * xn; | |||
| xn *= x; | |||
| } | |||
| return y; | |||
| } | |||
| /** Evaluates a polynomial with coefficients `a[n]` at `x`. | |||
| Uses Horner's method. | |||
| https://en.wikipedia.org/wiki/Horner%27s_method | |||
| */ | |||
| template <typename T, size_t N> | |||
| T polyHorner(const T (&a)[N], T x) { | |||
| if (N == 0) | |||
| return 0; | |||
| T y = a[N - 1]; | |||
| for (size_t n = 1; n < N; n++) { | |||
| y = a[N - 1 - n] + y * x; | |||
| } | |||
| return y; | |||
| } | |||
| /** Evaluates a polynomial with coefficients `a[n]` at `x`. | |||
| Uses Estrin's method. | |||
| https://en.wikipedia.org/wiki/Estrin%27s_scheme | |||
| */ | |||
| template <typename T, size_t N> | |||
| T polyEstrin(const T (&a)[N], T x) { | |||
| if (N == 0) | |||
| return 0; | |||
| if (N == 1) | |||
| return a[0]; | |||
| const size_t M = (N + 1) / 2; | |||
| T b[M]; | |||
| for (size_t i = 0; i < M; i++) { | |||
| b[i] = a[2 * i]; | |||
| if (2 * i + 1 < N) | |||
| b[i] += a[2 * i + 1] * x; | |||
| } | |||
| return polyEstrin(b, x * x); | |||
| } | |||
| /** Returns `2^floor(x)`. | |||
| If xf is given, sets it to the fractional part of x. | |||
| This is useful in the computation `2^x = 2^floor(x) * 2^frac(x)`. | |||
| */ | |||
| template <typename T> | |||
| T approxExp2Floor(T x, T* xf); | |||
| T exp2Floor(T x, T* xf); | |||
| template <> | |||
| inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) { | |||
| simd::int32_4 xi = x; | |||
| inline float exp2Floor(float x, float* xf) { | |||
| x += 127; | |||
| // x should be positive now, so this always truncates towards -inf. | |||
| int32_t xi = x; | |||
| if (xf) | |||
| *xf = x - simd::float_4(xi); | |||
| // Set float exponent directly | |||
| // https://stackoverflow.com/a/57454528/272642 | |||
| simd::int32_4 y = (xi + 127) << 23; | |||
| return simd::float_4::cast(y); | |||
| *xf = x - xi; | |||
| // Set mantissa of float | |||
| union { | |||
| float yi; | |||
| int32_t yii; | |||
| }; | |||
| yii = xi << 23; | |||
| return yi; | |||
| } | |||
| template <> | |||
| inline float approxExp2Floor(float x, float* xf) { | |||
| int32_t xi = x; | |||
| inline simd::float_4 exp2Floor(simd::float_4 x, simd::float_4* xf) { | |||
| x += 127; | |||
| simd::int32_4 xi = x; | |||
| if (xf) | |||
| *xf = x - xi; | |||
| int32_t y = (xi + 127) << 23; | |||
| return bitCast<float>(y); | |||
| *xf = x - simd::float_4(xi); | |||
| simd::int32_4 yii = xi << 23; | |||
| return simd::float_4::cast(yii); | |||
| } | |||
| /** Deprecated alias of exp2Floor() */ | |||
| template <typename T> | |||
| T approxExp2Floor(T x, T* xf) { | |||
| return exp2Floor(x, xf); | |||
| } | |||
| /** Returns 2^x, assuming that x >= 0. | |||
| Maximum 0.00024% error. | |||
| For float, roughly 3x faster than `std::pow(2.f, x)`. | |||
| For float_4, roughly 2x faster than `simd::pow(2.f, x)`. | |||
| If negative powers are needed, you may use a lower bound and rescale. | |||
| /** Returns 2^x with at most 6e-06 relative error. | |||
| approxExp2(x + 20) / 1048576 | |||
| Polynomial coefficients are chosen to minimize relative error while maintaining continuity and giving exact values at integer values of `x`. | |||
| Thanks to Andy Simper for coefficients. | |||
| */ | |||
| template <typename T> | |||
| T exp2_taylor5(T x) { | |||
| T xf; | |||
| T yi = exp2Floor(x, &xf); | |||
| const T a[] = { | |||
| 1.0, | |||
| 0.69315169353961, | |||
| 0.2401595990753, | |||
| 0.055817908652, | |||
| 0.008991698010, | |||
| 0.001879100722, | |||
| }; | |||
| T yf = polyHorner(a, xf); | |||
| return yi * yf; | |||
| } | |||
| /** Deprecated alias of exp2_taylor5() */ | |||
| template <typename T> | |||
| T approxExp2_taylor5(T x) { | |||
| // Use bit-shifting for integer part of x. | |||
| T y = approxExp2Floor(x, &x); | |||
| // 5th order expansion of 2^x around 0.4752 in Horner form. | |||
| // The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers. | |||
| y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202))))); | |||
| return y; | |||
| return exp2_taylor5(x); | |||
| } | |||