You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.0KB

  1. #pragma once
  2. #include <dsp/common.hpp>
  3. namespace rack {
  4. namespace dsp {
  5. /*
  6. In this header, function names are divided into two or more parts, separated by "_".
  7. The functionality is the first part, and the approximation methods are the following parts.
  8. Glossary:
  9. https://en.wikipedia.org/wiki/Taylor_series
  10. https://en.wikipedia.org/wiki/Chebyshev_polynomials
  11. https://en.wikipedia.org/wiki/Pad%C3%A9_approximant
  12. https://en.wikipedia.org/wiki/Horner%27s_method
  13. https://en.wikipedia.org/wiki/Estrin%27s_scheme
  14. https://en.wikipedia.org/wiki/CORDIC
  15. */
  16. /** Returns 2^floor(x), assuming that x >= 0.
  17. If `xf` is non-NULL, it is set to the fractional part of x.
  18. */
  19. template <typename T>
  20. T approxExp2Floor(T x, T* xf);
  21. template <>
  22. inline simd::float_4 approxExp2Floor(simd::float_4 x, simd::float_4* xf) {
  23. simd::int32_4 xi = x;
  24. if (xf)
  25. *xf = x - simd::float_4(xi);
  26. // Set float exponent directly
  27. // https://stackoverflow.com/a/57454528/272642
  28. simd::int32_4 y = (xi + 127) << 23;
  29. return simd::float_4::cast(y);
  30. }
  31. template <>
  32. inline float approxExp2Floor(float x, float* xf) {
  33. int32_t xi = x;
  34. if (xf)
  35. *xf = x - xi;
  36. int32_t y = (xi + 127) << 23;
  37. return bitCast<float>(y);
  38. }
  39. /** Returns 2^x, assuming that x >= 0.
  40. Maximum 0.00024% error.
  41. For float, roughly 3x faster than `std::pow(2.f, x)`.
  42. For float_4, roughly 2x faster than `simd::pow(2.f, x)`.
  43. If negative powers are needed, you may use a lower bound and rescale.
  44. approxExp2(x + 20) / 1048576
  45. */
  46. template <typename T>
  47. T approxExp2_taylor5(T x) {
  48. // Use bit-shifting for integer part of x.
  49. T y = approxExp2Floor(x, &x);
  50. // 5th order expansion of 2^x around 0.4752 in Horner form.
  51. // The center is chosen so that the endpoints of [0, 1] have equal error, creating no discontinuity at integers.
  52. y *= T(0.9999976457798443) + x * (T(0.6931766804601935) + x * (T(0.2400729486415728) + x * (T(0.05592817518644387) + x * (T(0.008966320633544) + x * T(0.001853512473884202)))));
  53. return y;
  54. }
  55. } // namespace dsp
  56. } // namespace rack