Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

384 lines
9.6KB

  1. // ----------------------------------------------------------------------------
  2. //
  3. // Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
  4. //
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License as published by
  7. // the Free Software Foundation; either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // This program is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU General Public License
  16. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. //
  18. // ----------------------------------------------------------------------------
  19. #include <stdlib.h>
  20. #include <stdio.h>
  21. #include <string.h>
  22. #include <math.h>
  23. #include "vresampler.h"
  24. #undef ENABLE_VEC4
  25. #ifndef CARLA_OS_WIN
  26. # if defined(__SSE2_MATH__)
  27. # define ENABLE_VEC4
  28. # include <xmmintrin.h>
  29. # elif defined(__ARM_NEON) || defined(__ARM_NEON__)
  30. # define ENABLE_VEC4
  31. # include <arm_neon.h>
  32. # endif
  33. #endif
  34. VResampler::VResampler (void) noexcept :
  35. _table (0),
  36. _nchan (0),
  37. _buff (0),
  38. _c1 (0),
  39. _c2 (0)
  40. {
  41. reset ();
  42. }
  43. VResampler::~VResampler (void)
  44. {
  45. clear ();
  46. }
  47. bool VResampler::setup (double ratio,
  48. unsigned int nchan,
  49. unsigned int hlen)
  50. {
  51. return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen);
  52. }
  53. bool VResampler::setup (double ratio,
  54. unsigned int nchan,
  55. unsigned int hlen,
  56. double frel)
  57. {
  58. unsigned int hl, mi, n;
  59. double dp;
  60. Resampler_table *T = 0;
  61. if (!nchan || (hlen < 8) || (hlen > 96) || (64 * ratio < 1) || (ratio > 256))
  62. {
  63. clear ();
  64. return false;
  65. }
  66. dp = NPHASE / ratio;
  67. hl = hlen;
  68. mi = 32;
  69. if (ratio < 1.0)
  70. {
  71. frel *= ratio;
  72. hl = (unsigned int)(ceil (hl / ratio));
  73. mi = (unsigned int)(ceil (mi / ratio));
  74. }
  75. #ifdef ENABLE_VEC4
  76. hl = (hl + 3) & ~3;
  77. #endif
  78. T = Resampler_table::create (frel, hl, NPHASE);
  79. clear ();
  80. if (T)
  81. {
  82. _table = T;
  83. n = nchan * (2 * hl + mi);
  84. #ifdef ENABLE_VEC4
  85. posix_memalign ((void **)(&_buff), 16, n * sizeof (float));
  86. posix_memalign ((void **)(&_c1), 16, hl * sizeof (float));
  87. posix_memalign ((void **)(&_c2), 16, hl * sizeof (float));
  88. #else
  89. _buff = new float [n];
  90. _c1 = new float [hl];
  91. _c2 = new float [hl];
  92. #endif
  93. _nchan = nchan;
  94. _ratio = ratio;
  95. _inmax = mi;
  96. _pstep = dp;
  97. _qstep = dp;
  98. _wstep = 1;
  99. return reset ();
  100. }
  101. else return false;
  102. }
  103. void VResampler::clear (void)
  104. {
  105. Resampler_table::destroy (_table);
  106. #ifdef ENABLE_VEC4
  107. free (_buff);
  108. free (_c1);
  109. free (_c2);
  110. #else
  111. delete[] _buff;
  112. delete[] _c1;
  113. delete[] _c2;
  114. #endif
  115. _buff = 0;
  116. _c1 = 0;
  117. _c2 = 0;
  118. _table = 0;
  119. _nchan = 0;
  120. _inmax = 0;
  121. _pstep = 0;
  122. _qstep = 0;
  123. _wstep = 1;
  124. reset ();
  125. }
  126. void VResampler::set_phase (double p)
  127. {
  128. if (!_table) return;
  129. _phase = (p - floor (p)) * _table->_np;
  130. }
  131. void VResampler::set_rrfilt (double t)
  132. {
  133. if (!_table) return;
  134. _wstep = (t < 1) ? 1 : 1 - exp (-1 / t);
  135. }
  136. void VResampler::set_rratio (double r)
  137. {
  138. if (!_table) return;
  139. if (r > 16.0) r = 16.0;
  140. if (r < 0.95) r = 0.95;
  141. _qstep = _table->_np / (_ratio * r);
  142. }
  143. double VResampler::inpdist (void) const noexcept
  144. {
  145. if (!_table) return 0;
  146. return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np;
  147. }
  148. int VResampler::inpsize (void) const noexcept
  149. {
  150. if (!_table) return 0;
  151. return 2 * _table->_hl;
  152. }
  153. bool VResampler::reset (void) noexcept
  154. {
  155. if (!_table) return false;
  156. inp_count = 0;
  157. out_count = 0;
  158. inp_data = 0;
  159. out_data = 0;
  160. _index = 0;
  161. _nread = 0;
  162. _nzero = 0;
  163. _phase = 0;
  164. if (_table)
  165. {
  166. _nread = 2 * _table->_hl;
  167. return true;
  168. }
  169. return false;
  170. }
  171. bool VResampler::process (void)
  172. {
  173. int nr, np, hl, nz, di, i, n;
  174. unsigned int in, j;
  175. double ph, dp, dd;
  176. float a, b, *p1, *p2, *q1, *q2;
  177. if (!_table) return false;
  178. hl = _table->_hl;
  179. np = _table->_np;
  180. in = _index;
  181. nr = _nread;
  182. nz = _nzero;
  183. ph = _phase;
  184. dp = _pstep;
  185. p1 = _buff + in;
  186. p2 = p1 + 2 * hl - nr;
  187. di = 2 * hl + _inmax;
  188. while (out_count)
  189. {
  190. while (nr && inp_count)
  191. {
  192. if (inp_data)
  193. {
  194. for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j];
  195. inp_data += _nchan;
  196. nz = 0;
  197. }
  198. else
  199. {
  200. for (j = 0; j < _nchan; j++) p2 [j * di] = 0;
  201. if (nz < 2 * hl) nz++;
  202. }
  203. p2++;
  204. nr--;
  205. inp_count--;
  206. }
  207. if (nr) break;
  208. if (out_data)
  209. {
  210. if (nz < 2 * hl)
  211. {
  212. n = (unsigned int) ph;
  213. b = (float)(ph - n);
  214. a = 1.0f - b;
  215. q1 = _table->_ctab + hl * n;
  216. q2 = _table->_ctab + hl * (np - n);
  217. #if defined(__SSE2_MATH__)
  218. __m128 C1, C2, Q1, Q2, S;
  219. C1 = _mm_load1_ps (&a);
  220. C2 = _mm_load1_ps (&b);
  221. for (i = 0; i < hl; i += 4)
  222. {
  223. Q1 = _mm_load_ps (q1 + i);
  224. Q2 = _mm_load_ps (q1 + i + hl);
  225. S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
  226. _mm_store_ps (_c1 + i, S);
  227. Q1 = _mm_load_ps (q2 + i);
  228. Q2 = _mm_load_ps (q2 + i - hl);
  229. S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
  230. _mm_store_ps (_c2 + i, S);
  231. }
  232. for (j = 0; j < _nchan; j++)
  233. {
  234. q1 = p1 + j * di;
  235. q2 = p2 + j * di;
  236. S = _mm_setzero_ps ();
  237. for (i = 0; i < hl; i += 4)
  238. {
  239. C1 = _mm_load_ps (_c1 + i);
  240. Q1 = _mm_loadu_ps (q1);
  241. q2 -= 4;
  242. S = _mm_add_ps (S, _mm_mul_ps (C1, Q1));
  243. C2 = _mm_loadr_ps (_c2 + i);
  244. Q2 = _mm_loadu_ps (q2);
  245. q1 += 4;
  246. S = _mm_add_ps (S, _mm_mul_ps (C2, Q2));
  247. }
  248. *out_data++ = S [0] + S [1] + S [2] + S [3];
  249. }
  250. #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
  251. // ARM64 version by Nicolas Belin <nbelin@baylibre.com>
  252. float32x4_t *C1 = (float32x4_t *)_c1;
  253. float32x4_t *C2 = (float32x4_t *)_c2;
  254. float32x4_t S, T;
  255. for (i = 0; i < (hl>>2); i++)
  256. {
  257. T = vmulq_n_f32 (vld1q_f32 (q1 + hl), b);
  258. C1 [i] = vmlaq_n_f32 (T, vld1q_f32 (q1), a);
  259. T = vmulq_n_f32 (vld1q_f32 (q2 - hl), b);
  260. C2 [i] = vmlaq_n_f32 (T, vld1q_f32 (q2), a);
  261. q2 += 4;
  262. q1 += 4;
  263. }
  264. for (j = 0; j < _nchan; j++)
  265. {
  266. q1 = p1 + j * di;
  267. q2 = p2 + j * di - 4;
  268. T = vrev64q_f32 (vld1q_f32 (q2));
  269. S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]);
  270. S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [0]);
  271. for (i = 1; i < (hl>>2); i++)
  272. {
  273. q2 -= 4;
  274. q1 += 4;
  275. T = vrev64q_f32 (vld1q_f32 (q2));
  276. S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]);
  277. S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]);
  278. }
  279. *out_data++ = vaddvq_f32 (S);
  280. }
  281. #else
  282. float s;
  283. for (i = 0; i < hl; i++)
  284. {
  285. _c1 [i] = a * q1 [i] + b * q1 [i + hl];
  286. _c2 [i] = a * q2 [i] + b * q2 [i - hl];
  287. }
  288. for (j = 0; j < _nchan; j++)
  289. {
  290. q1 = p1 + j * di;
  291. q2 = p2 + j * di;
  292. s = 1e-30f;
  293. for (i = 0; i < hl; i++)
  294. {
  295. q2--;
  296. s += *q1 * _c1 [i] + *q2 * _c2 [i];
  297. q1++;
  298. }
  299. *out_data++ = s - 1e-30f;
  300. }
  301. #endif
  302. }
  303. else
  304. {
  305. for (j = 0; j < _nchan; j++) *out_data++ = 0;
  306. }
  307. }
  308. out_count--;
  309. dd = _qstep - dp;
  310. if (fabs (dd) < 1e-20) dp = _qstep;
  311. else dp += _wstep * dd;
  312. ph += dp;
  313. if (ph >= np)
  314. {
  315. nr = (unsigned int) floor (ph / np);
  316. ph -= nr * np;;
  317. in += nr;
  318. p1 += nr;
  319. if (in >= _inmax)
  320. {
  321. n = 2 * hl - nr;
  322. p2 = _buff;
  323. for (j = 0; j < _nchan; j++)
  324. {
  325. memmove (p2 + j * di, p1 + j * di, n * sizeof (float));
  326. }
  327. in = 0;
  328. p1 = _buff;
  329. p2 = p1 + n;
  330. }
  331. }
  332. }
  333. _index = in;
  334. _nread = nr;
  335. _phase = ph;
  336. _pstep = dp;
  337. _nzero = nz;
  338. return true;
  339. }