|
|
@@ -7,9 +7,6 @@ namespace dsp { |
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
|
In this header, function names are divided into two or more parts, separated by "_". |
|
|
|
The functionality is the first part, and the approximation methods are the following parts. |
|
|
|
|
|
|
|
Glossary: |
|
|
|
https://en.wikipedia.org/wiki/Taylor_series |
|
|
|
https://en.wikipedia.org/wiki/Chebyshev_polynomials |
|
|
@@ -20,50 +17,124 @@ https://en.wikipedia.org/wiki/CORDIC |
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
/** Returns 2^floor(x), assuming that x >= 0. |
|
|
|
If `xf` is non-NULL, it is set to the fractional part of x. |
|
|
|
/** Evaluates a polynomial with coefficients `a[n]` at `x`. |
|
|
|
Uses naive direct evaluation. |
|
|
|
*/ |
|
|
|
template <typename T, size_t N> |
|
|
|
T polyDirect(const T (&a)[N], T x) { |
|
|
|
T y = 0; |
|
|
|
T xn = 1; |
|
|
|
for (size_t n = 0; n < N; n++) { |
|
|
|
y += a[n] * xn; |
|
|
|
xn *= x; |
|
|
|
} |
|
|
|
return y; |
|
|
|
} |
|
|
|
|
|
|
|
/** Evaluates a polynomial with coefficients `a[n]` at `x`. |
|
|
|
Uses Horner's method. |
|
|
|
https://en.wikipedia.org/wiki/Horner%27s_method |
|
|
|
*/ |
|
|
|
template <typename T, size_t N> |
|
|
|
T polyHorner(const T (&a)[N], T x) { |
|
|
|
if (N == 0) |
|
|
|
return 0; |
|
|
|
|
|
|
|
T y = a[N - 1]; |
|
|
|
for (size_t n = 1; n < N; n++) { |
|
|
|
y = a[N - 1 - n] + y * x; |
|
|
|
} |
|
|
|
return y; |
|
|
|
} |
|
|
|
|
|
|
|
/** Evaluates a polynomial with coefficients `a[n]` at `x`. |
|
|
|
Uses Estrin's method. |
|
|
|
https://en.wikipedia.org/wiki/Estrin%27s_scheme |
|
|
|
*/ |
|
|
|
template <typename T, size_t N> |
|
|
|
T polyEstrin(const T (&a)[N], T x) { |
|
|
|
if (N == 0) |
|
|
|
return 0; |
|
|
|
if (N == 1) |
|
|
|
return a[0]; |
|
|
|
|
|
|
|
const size_t M = (N + 1) / 2; |
|
|
|
T b[M]; |
|
|
|
for (size_t i = 0; i < M; i++) { |
|
|
|
b[i] = a[2 * i]; |
|
|
|
if (2 * i + 1 < N) |
|
|
|
b[i] += a[2 * i + 1] * x; |
|
|
|
} |
|
|
|
return polyEstrin(b, x * x); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** Returns `2^floor(x)`. |
|
|
|
If xf is given, sets it to the fractional part of x. |
|
|
|
This is useful in the computation `2^x = 2^floor(x) * 2^frac(x)`. |
|
|
|
*/ |
|
|
|
template <typename T> |
|
|
|
T approxExp2Floor(T x, T* xf); |
|
|
|
T exp2Floor(T x, T* xf); |
|
|
|
|
|
|
|
template <> |
|
|
|
inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) { |
|
|
|
simd::int32_4 xi = x; |
|
|
|
inline float exp2Floor(float x, float* xf) { |
|
|
|
x += 127; |
|
|
|
// x should be positive now, so this always truncates towards -inf. |
|
|
|
int32_t xi = x; |
|
|
|
if (xf) |
|
|
|
*xf = x - simd::float_4(xi); |
|
|
|
// Set float exponent directly |
|
|
|
// https://stackoverflow.com/a/57454528/272642 |
|
|
|
simd::int32_4 y = (xi + 127) << 23; |
|
|
|
return simd::float_4::cast(y); |
|
|
|
*xf = x - xi; |
|
|
|
// Set mantissa of float |
|
|
|
union { |
|
|
|
float yi; |
|
|
|
int32_t yii; |
|
|
|
}; |
|
|
|
yii = xi << 23; |
|
|
|
return yi; |
|
|
|
} |
|
|
|
|
|
|
|
template <> |
|
|
|
inline float approxExp2Floor(float x, float* xf) { |
|
|
|
int32_t xi = x; |
|
|
|
inline simd::float_4 exp2Floor(simd::float_4 x, simd::float_4* xf) { |
|
|
|
x += 127; |
|
|
|
simd::int32_4 xi = x; |
|
|
|
if (xf) |
|
|
|
*xf = x - xi; |
|
|
|
int32_t y = (xi + 127) << 23; |
|
|
|
return bitCast<float>(y); |
|
|
|
*xf = x - simd::float_4(xi); |
|
|
|
simd::int32_4 yii = xi << 23; |
|
|
|
return simd::float_4::cast(yii); |
|
|
|
} |
|
|
|
|
|
|
|
/** Deprecated alias of exp2Floor() */ |
|
|
|
template <typename T> |
|
|
|
T approxExp2Floor(T x, T* xf) { |
|
|
|
return exp2Floor(x, xf); |
|
|
|
} |
|
|
|
|
|
|
|
/** Returns 2^x, assuming that x >= 0. |
|
|
|
Maximum 0.00024% error. |
|
|
|
For float, roughly 3x faster than `std::pow(2.f, x)`. |
|
|
|
For float_4, roughly 2x faster than `simd::pow(2.f, x)`. |
|
|
|
|
|
|
|
If negative powers are needed, you may use a lower bound and rescale. |
|
|
|
/** Returns 2^x with at most 6e-06 relative error. |
|
|
|
|
|
|
|
approxExp2(x + 20) / 1048576 |
|
|
|
Polynomial coefficients are chosen to minimize relative error while maintaining continuity and giving exact values at integer values of `x`. |
|
|
|
Thanks to Andy Simper for coefficients. |
|
|
|
*/ |
|
|
|
template <typename T> |
|
|
|
T exp2_taylor5(T x) { |
|
|
|
T xf; |
|
|
|
T yi = exp2Floor(x, &xf); |
|
|
|
|
|
|
|
const T a[] = { |
|
|
|
1.0, |
|
|
|
0.69315169353961, |
|
|
|
0.2401595990753, |
|
|
|
0.055817908652, |
|
|
|
0.008991698010, |
|
|
|
0.001879100722, |
|
|
|
}; |
|
|
|
T yf = polyHorner(a, xf); |
|
|
|
return yi * yf; |
|
|
|
} |
|
|
|
|
|
|
|
/** Deprecated alias of exp2_taylor5() */ |
|
|
|
template <typename T> |
|
|
|
T approxExp2_taylor5(T x) { |
|
|
|
// Use bit-shifting for integer part of x. |
|
|
|
T y = approxExp2Floor(x, &x); |
|
|
|
// 5th order expansion of 2^x around 0.4752 in Horner form. |
|
|
|
// The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers. |
|
|
|
y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202))))); |
|
|
|
return y; |
|
|
|
return exp2_taylor5(x); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|