Browse Source

Add dsp::polyDirect(), polyHorner(), and polyEstrin(). Rename approxExp2Floor() to exp2Floor() and make it correctly handle negative `x`. Rename approxExp2_taylor5() to exp2_taylor5() and improve polynomial coefficients.

tags/v2.3.0
Andrew Belt 1 year ago
parent
commit
1c523ae373
1 changed files with 101 additions and 30 deletions
  1. +101
    -30
      include/dsp/approx.hpp

+ 101
- 30
include/dsp/approx.hpp View File

@@ -7,9 +7,6 @@ namespace dsp {


/*
In this header, function names are divided into two or more parts, separated by "_".
The functionality is the first part, and the approximation methods are the following parts.

Glossary:
https://en.wikipedia.org/wiki/Taylor_series
https://en.wikipedia.org/wiki/Chebyshev_polynomials
@@ -20,50 +17,124 @@ https://en.wikipedia.org/wiki/CORDIC
*/


/** Returns 2^floor(x), assuming that x >= 0.
If `xf` is non-NULL, it is set to the fractional part of x.
/** Evaluates a polynomial with coefficients `a[n]` at `x`.
Uses naive direct evaluation.
*/
template <typename T, size_t N>
T polyDirect(const T (&a)[N], T x) {
T y = 0;
T xn = 1;
for (size_t n = 0; n < N; n++) {
y += a[n] * xn;
xn *= x;
}
return y;
}

/** Evaluates a polynomial with coefficients `a[n]` at `x`.
Uses Horner's method.
https://en.wikipedia.org/wiki/Horner%27s_method
*/
template <typename T, size_t N>
T polyHorner(const T (&a)[N], T x) {
if (N == 0)
return 0;

T y = a[N - 1];
for (size_t n = 1; n < N; n++) {
y = a[N - 1 - n] + y * x;
}
return y;
}

/** Evaluates a polynomial with coefficients `a[n]` at `x`.
Uses Estrin's method.
https://en.wikipedia.org/wiki/Estrin%27s_scheme
*/
template <typename T, size_t N>
T polyEstrin(const T (&a)[N], T x) {
if (N == 0)
return 0;
if (N == 1)
return a[0];

const size_t M = (N + 1) / 2;
T b[M];
for (size_t i = 0; i < M; i++) {
b[i] = a[2 * i];
if (2 * i + 1 < N)
b[i] += a[2 * i + 1] * x;
}
return polyEstrin(b, x * x);
}


/** Returns `2^floor(x)`.
If xf is given, sets it to the fractional part of x.
This is useful in the computation `2^x = 2^floor(x) * 2^frac(x)`.
*/
template <typename T>
T approxExp2Floor(T x, T* xf);
T exp2Floor(T x, T* xf);

template <>
inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) {
simd::int32_4 xi = x;
inline float exp2Floor(float x, float* xf) {
x += 127;
// x should be positive now, so this always truncates towards -inf.
int32_t xi = x;
if (xf)
*xf = x - simd::float_4(xi);
// Set float exponent directly
// https://stackoverflow.com/a/57454528/272642
simd::int32_4 y = (xi + 127) << 23;
return simd::float_4::cast(y);
*xf = x - xi;
// Set mantissa of float
union {
float yi;
int32_t yii;
};
yii = xi << 23;
return yi;
}

template <>
inline float approxExp2Floor(float x, float* xf) {
int32_t xi = x;
inline simd::float_4 exp2Floor(simd::float_4 x, simd::float_4* xf) {
x += 127;
simd::int32_4 xi = x;
if (xf)
*xf = x - xi;
int32_t y = (xi + 127) << 23;
return bitCast<float>(y);
*xf = x - simd::float_4(xi);
simd::int32_4 yii = xi << 23;
return simd::float_4::cast(yii);
}

/** Deprecated alias of exp2Floor() */
template <typename T>
T approxExp2Floor(T x, T* xf) {
return exp2Floor(x, xf);
}

/** Returns 2^x, assuming that x >= 0.
Maximum 0.00024% error.
For float, roughly 3x faster than `std::pow(2.f, x)`.
For float_4, roughly 2x faster than `simd::pow(2.f, x)`.

If negative powers are needed, you may use a lower bound and rescale.
/** Returns 2^x with at most 6e-06 relative error.

approxExp2(x + 20) / 1048576
Polynomial coefficients are chosen to minimize relative error while maintaining continuity and giving exact values at integer values of `x`.
Thanks to Andy Simper for coefficients.
*/
template <typename T>
T exp2_taylor5(T x) {
T xf;
T yi = exp2Floor(x, &xf);

const T a[] = {
1.0,
0.69315169353961,
0.2401595990753,
0.055817908652,
0.008991698010,
0.001879100722,
};
T yf = polyHorner(a, xf);
return yi * yf;
}

/** Deprecated alias of exp2_taylor5() */
template <typename T>
T approxExp2_taylor5(T x) {
// Use bit-shifting for integer part of x.
T y = approxExp2Floor(x, &x);
// 5th order expansion of 2^x around 0.4752 in Horner form.
// The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers.
y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202)))));
return y;
return exp2_taylor5(x);
}




Loading…
Cancel
Save