You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

335 lines
12KB

  1. /*
  2. Implementation of the Lambert W function
  3. Copyright (C) 2011 Darko Veberic, darko.veberic@ijs.si
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include <cmath>
  16. #include "Horner.h"
  17. namespace dsp {
  18. // fork macro to keep the tree balanced
  19. #define Y2(d1, c12, d2) \
  20. ((c12) ? (d1) : (d2))
  21. #define Y3(d1, c12, d2, c23, d3) \
  22. Y2(Y2(d1, c12, d2), c23, d3)
  23. #define Y4(d1, c12, d2, c23, d3, c34, d4) \
  24. Y3(d1, c12, d2, c23, Y2(d3, c34, d4))
  25. #define Y5(d1, c12, d2, c23, d3, c34, d4, c45, d5) \
  26. Y4(Y2(d1, c12, d2), c23, d3, c34, d4, c45, d5)
  27. #define Y6(d1, c12, d2, c23, d3, c34, d4, c45, d5, c56, d6) \
  28. Y5(d1, c12, d2, c23, d3, c34, d4, c45, Y2(d5, c56, d6))
  29. #define Y7(d1, c12, d2, c23, d3, c34, d4, c45, d5, c56, d6, c67, d7) \
  30. Y6(d1, c12, d2, c23, Y2(d3, c34, d4), c45, d5, c56, d6, c67, d7)
  31. class BranchPoint {
  32. };
  33. HORNER_COEFF(BranchPoint, 0, -1);
  34. HORNER_COEFF(BranchPoint, 1, 1);
  35. HORNER_COEFF(BranchPoint, 2, -0.333333333333333333e0);
  36. HORNER_COEFF(BranchPoint, 3, 0.152777777777777777e0);
  37. HORNER_COEFF(BranchPoint, 4, -0.79629629629629630e-1);
  38. HORNER_COEFF(BranchPoint, 5, 0.44502314814814814e-1);
  39. HORNER_COEFF(BranchPoint, 6, -0.25984714873603760e-1);
  40. HORNER_COEFF(BranchPoint, 7, 0.15635632532333920e-1);
  41. HORNER_COEFF(BranchPoint, 8, -0.96168920242994320e-2);
  42. HORNER_COEFF(BranchPoint, 9, 0.60145432529561180e-2);
  43. HORNER_COEFF(BranchPoint, 10, -0.38112980348919993e-2);
  44. HORNER_COEFF(BranchPoint, 11, 0.24408779911439826e-2);
  45. HORNER_COEFF(BranchPoint, 12, -0.15769303446867841e-2);
  46. HORNER_COEFF(BranchPoint, 13, 0.10262633205076071e-2);
  47. HORNER_COEFF(BranchPoint, 14, -0.67206163115613620e-3);
  48. HORNER_COEFF(BranchPoint, 15, 0.44247306181462090e-3);
  49. HORNER_COEFF(BranchPoint, 16, -0.29267722472962746e-3);
  50. HORNER_COEFF(BranchPoint, 17, 0.19438727605453930e-3);
  51. HORNER_COEFF(BranchPoint, 18, -0.12957426685274883e-3);
  52. HORNER_COEFF(BranchPoint, 19, 0.86650358052081260e-4);
  53. template<unsigned int order>
  54. class AsymptoticPolynomialB {
  55. };
  56. //HORNER_COEFF(AsymptoticPolynomialB<0>, 0, 0);
  57. //HORNER_COEFF(AsymptoticPolynomialB<0>, 1, -1);
  58. //HORNER_COEFF(AsymptoticPolynomialB<1>, 0, 0);
  59. //HORNER_COEFF(AsymptoticPolynomialB<1>, 1, 1);
  60. HORNER_COEFF(AsymptoticPolynomialB<2>, 0, 0);
  61. HORNER_COEFF(AsymptoticPolynomialB<2>, 1, -1);
  62. HORNER_COEFF(AsymptoticPolynomialB<2>, 2, 1./2);
  63. HORNER_COEFF(AsymptoticPolynomialB<3>, 0, 0);
  64. HORNER_COEFF(AsymptoticPolynomialB<3>, 1, 1);
  65. HORNER_COEFF(AsymptoticPolynomialB<3>, 2, -3./2);
  66. HORNER_COEFF(AsymptoticPolynomialB<3>, 3, 1./3);
  67. HORNER_COEFF(AsymptoticPolynomialB<4>, 0, 0);
  68. HORNER_COEFF(AsymptoticPolynomialB<4>, 1, -1);
  69. HORNER_COEFF(AsymptoticPolynomialB<4>, 2, 3);
  70. HORNER_COEFF(AsymptoticPolynomialB<4>, 3, -11./6);
  71. HORNER_COEFF(AsymptoticPolynomialB<4>, 4, 1./4);
  72. HORNER_COEFF(AsymptoticPolynomialB<5>, 0, 0);
  73. HORNER_COEFF(AsymptoticPolynomialB<5>, 1, 1);
  74. HORNER_COEFF(AsymptoticPolynomialB<5>, 2, -5);
  75. HORNER_COEFF(AsymptoticPolynomialB<5>, 3, 35./6);
  76. HORNER_COEFF(AsymptoticPolynomialB<5>, 4, -25./12);
  77. HORNER_COEFF(AsymptoticPolynomialB<5>, 5, 1./5);
  78. class AsymptoticPolynomialA {
  79. };
  80. //HORNER_COEFF2(AsymptoticPolynomialA, 0, (Horner<Float, AsymptoticPolynomialB<0>, 1>::Eval(y)));
  81. HORNER_COEFF2(AsymptoticPolynomialA, 0, -y);
  82. //HORNER_COEFF2(AsymptoticPolynomialA, 1, (Horner<Float, AsymptoticPolynomialB<1>, 1>::Eval(y)));
  83. HORNER_COEFF2(AsymptoticPolynomialA, 1, y);
  84. HORNER_COEFF2(AsymptoticPolynomialA, 2, (Horner<Float, AsymptoticPolynomialB<2>, 2>::Eval(y)));
  85. HORNER_COEFF2(AsymptoticPolynomialA, 3, (Horner<Float, AsymptoticPolynomialB<3>, 3>::Eval(y)));
  86. HORNER_COEFF2(AsymptoticPolynomialA, 4, (Horner<Float, AsymptoticPolynomialB<4>, 4>::Eval(y)));
  87. HORNER_COEFF2(AsymptoticPolynomialA, 5, (Horner<Float, AsymptoticPolynomialB<5>, 5>::Eval(y)));
  88. template<typename Float, int order>
  89. inline
  90. Float
  91. AsymptoticExpansionImpl(const Float a, const Float b) {
  92. return a + Horner<Float, AsymptoticPolynomialA, order>::Eval(1 / a, b);
  93. }
  94. template<typename Float, int branch, int order>
  95. struct LogRecursionImpl {
  96. enum {
  97. eSign = 2 * branch + 1
  98. };
  99. static Float Step(const Float logsx) { return logsx - log(eSign * LogRecursionImpl<Float, branch, order - 1>::Step(logsx)); }
  100. };
  101. template<typename Float, int branch>
  102. struct LogRecursionImpl<Float, branch, 0> {
  103. static Float Step(const Float logsx) { return logsx; }
  104. };
  105. template<typename Float, int branch>
  106. class Branch {
  107. public:
  108. template<int order>
  109. static Float BranchPointExpansion(const Float x) {
  110. return Horner<Float, BranchPoint, order>::Eval(eSign * sqrt(2 * (Float(M_E) * x + 1)));
  111. }
  112. // Asymptotic expansion: Corless et al. 1996, de Bruijn (1981)
  113. template<int order>
  114. static
  115. Float
  116. AsymptoticExpansion(const Float x) {
  117. const Float logsx = log(eSign * x);
  118. const Float logslogsx = log(eSign * logsx);
  119. return AsymptoticExpansionImpl<Float, order>(logsx, logslogsx);
  120. }
  121. // Logarithmic recursion
  122. template<int order>
  123. static Float LogRecursion(const Float x) { return LogRecursionImpl<Float, branch, order>::Step(log(eSign * x)); }
  124. private:
  125. enum {
  126. eSign = 2 * branch + 1
  127. };
  128. };
  129. // iterations
  130. template<typename Float>
  131. inline
  132. Float
  133. HalleyStep(const Float x, const Float w) {
  134. const Float ew = exp(w);
  135. const Float wew = w * ew;
  136. const Float wewx = wew - x;
  137. const Float w1 = w + 1;
  138. return w - wewx / (ew * w1 - (w + 2) * wewx/(2*w1));
  139. }
  140. template<typename Float>
  141. inline
  142. Float
  143. FritschStep(const Float x, const Float w) {
  144. const Float z = log(x/w) - w;
  145. const Float w1 = w + 1;
  146. const Float q = 2 * w1 * (w1 + Float(2./3)*z);
  147. const Float eps = z / w1 * (q - z) / (q - 2*z);
  148. return w * (1 + eps);
  149. }
  150. template<typename Float>
  151. inline
  152. Float
  153. SchroederStep(const Float x, const Float w) {
  154. const Float y = x * exp(-w);
  155. const Float f0 = w - y;
  156. const Float f1 = 1 + y;
  157. const Float f00 = f0*f0;
  158. const Float f11 = f1*f1;
  159. const Float f0y = f0*y;
  160. return w - 4*f0*(6*f1*(f11 + f0y) + f00*y) / (f11*(24*f11 + 36*f0y) + f00*(6*y*y + 8*f1*y + f0y));
  161. }
  162. template<
  163. typename Float,
  164. double IterationStep(const Float x, const Float w)
  165. >
  166. struct Iterator {
  167. template<int n, class = void>
  168. struct Depth {
  169. static Float Recurse(const Float x, Float w) { return Depth<n - 1>::Recurse(x, IterationStep(x, w)); }
  170. };
  171. // stop condition
  172. template<class T>
  173. struct Depth<1, T> {
  174. static Float Recurse(const Float x, const Float w) { return IterationStep(x, w); }
  175. };
  176. // identity
  177. template<class T>
  178. struct Depth<0, T> {
  179. static Float Recurse(const Float x, const Float w) { return w; }
  180. };
  181. };
  182. // Rational approximations
  183. template<typename Float, int branch, int n>
  184. struct Pade {
  185. static inline Float Approximation(const Float x);
  186. };
  187. template<typename Float>
  188. struct Pade<Float, 0, 1> {
  189. static inline Float Approximation(const Float x) {
  190. return x * HORNER4(Float, x, 0.07066247420543414, 2.4326814530577687, 6.39672835731526, 4.663365025836821, 0.99999908757381) /
  191. HORNER4(Float, x, 1.2906660139511692, 7.164571775410987, 10.559985088953114, 5.66336307375819, 1);
  192. }
  193. };
  194. template<typename Float>
  195. struct Pade<Float, 0, 2> {
  196. static inline Float Approximation(const Float x) {
  197. const Float y = log(Float(0.5) * x) - 2;
  198. return 2 + y * HORNER3(Float, y, 0.00006979269679670452, 0.017110368846615806, 0.19338607770900237, 0.6666648896499793) /
  199. HORNER2(Float, y, 0.0188060684652668, 0.23451269827133317, 1);
  200. }
  201. };
  202. template<typename Float>
  203. struct Pade<Float, -1, 4> {
  204. static inline Float Approximation(const Float x) {
  205. return HORNER4(Float, x, -2793.4565508841197, -1987.3632221106518, 385.7992853617571, 277.2362778379572, -7.840776922133643) /
  206. HORNER4(Float, x, 280.6156995997829, 941.9414019982657, 190.64429338894644, -63.93540494358966, 1);
  207. }
  208. };
  209. template<typename Float>
  210. struct Pade<Float, -1, 5> {
  211. static inline Float Approximation(const Float x) {
  212. const Float y = log(-x);
  213. return -exp(
  214. HORNER3(Float, y, 0.16415668298255184, -3.334873920301941, 2.4831415860003747, 4.173424474574879) /
  215. HORNER3(Float, y, 0.031239411487374164, -1.2961659693400076, 4.517178492772906, 1)
  216. );
  217. }
  218. };
  219. template<typename Float>
  220. struct Pade<Float, -1, 6> {
  221. static inline Float Approximation(const Float x) {
  222. const Float y = log(-x);
  223. return -exp(
  224. HORNER4(Float, y, 0.000026987243254533254, -0.007692106448267341, 0.28793461719300206, -1.5267058884647018,
  225. -0.5370669268991288) /
  226. HORNER4(Float, y, 3.6006502104930343e-6, -0.0015552463555591487, 0.08801194682489769, -0.8973922357575583, 1)
  227. );
  228. }
  229. };
  230. template<typename Float>
  231. struct Pade<Float, -1, 7> {
  232. static inline Float Approximation(const Float x) {
  233. return -1 - sqrt(
  234. HORNER4(Float, x, 988.0070769375508, 1619.8111957356814, 989.2017745708083, 266.9332506485452, 26.875022558546036) /
  235. HORNER4(Float, x, -205.50469464210596, -270.0440832897079, -109.554245632316, -11.275355431307334, 1)
  236. );
  237. }
  238. };
  239. template<int branch>
  240. double LambertW(const double x);
  241. template<>
  242. double
  243. LambertW<0>(const double x) {
  244. typedef double d;
  245. return Y5(
  246. (Branch<d, 0>::BranchPointExpansion<8>(x)),
  247. x < -0.367679,
  248. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Branch<d, 0>::BranchPointExpansion<10>(x))),
  249. x < -0.311,
  250. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Pade<d, 0, 1>::Approximation(x))),
  251. x < 1.38,
  252. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Pade<d, 0, 2>::Approximation(x))),
  253. x < 236,
  254. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Branch<d, 0>::AsymptoticExpansion<6-1>(x)))
  255. );
  256. }
  257. template<>
  258. double
  259. LambertW<-1>(const double x) {
  260. typedef double d;
  261. return Y7(
  262. (Branch<d, -1>::BranchPointExpansion<8>(x)),
  263. x < -0.367579,
  264. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Branch<d, -1>::BranchPointExpansion<4>(x))),
  265. x < -0.366079,
  266. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Pade<d, -1, 7>::Approximation(x))),
  267. x < -0.289379,
  268. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Pade<d, -1, 4>::Approximation(x))),
  269. x < -0.0509,
  270. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Pade<d, -1, 5>::Approximation(x))),
  271. x < -0.000131826,
  272. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Pade<d, -1, 6>::Approximation(x))),
  273. x < -6.30957e-31,
  274. (Iterator<d, HalleyStep<d> >::Depth<1>::Recurse(x, Branch<d, -1>::LogRecursion<3>(x)))
  275. );
  276. }
  277. // instantiations
  278. template double LambertW<0>(const double x);
  279. template double LambertW<-1>(const double x);
  280. }