|
- // ----------------------------------------------------------------------------
- //
- // Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
- //
- // This program is free software; you can redistribute it and/or modify
- // it under the terms of the GNU General Public License as published by
- // the Free Software Foundation; either version 3 of the License, or
- // (at your option) any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // You should have received a copy of the GNU General Public License
- // along with this program. If not, see <http://www.gnu.org/licenses/>.
- //
- // ----------------------------------------------------------------------------
-
-
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <math.h>
-
- #include "vresampler.h"
-
- #undef ENABLE_VEC4
- #ifndef CARLA_OS_WIN
- # if defined(__SSE2_MATH__)
- # define ENABLE_VEC4
- # include <xmmintrin.h>
- # elif defined(__ARM_NEON) || defined(__ARM_NEON__)
- # define ENABLE_VEC4
- # include <arm_neon.h>
- # endif
- #endif
-
-
- VResampler::VResampler (void) noexcept :
- _table (0),
- _nchan (0),
- _buff (0),
- _c1 (0),
- _c2 (0)
- {
- reset ();
- }
-
-
- VResampler::~VResampler (void)
- {
- clear ();
- }
-
-
- bool VResampler::setup (double ratio,
- unsigned int nchan,
- unsigned int hlen)
- {
- return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen);
- }
-
-
- bool VResampler::setup (double ratio,
- unsigned int nchan,
- unsigned int hlen,
- double frel)
- {
- unsigned int hl, mi, n;
- double dp;
- Resampler_table *T = 0;
-
- if (!nchan || (hlen < 8) || (hlen > 96) || (64 * ratio < 1) || (ratio > 256))
- {
- clear ();
- return false;
- }
-
- dp = NPHASE / ratio;
- hl = hlen;
- mi = 32;
- if (ratio < 1.0)
- {
- frel *= ratio;
- hl = (unsigned int)(ceil (hl / ratio));
- mi = (unsigned int)(ceil (mi / ratio));
- }
- #ifdef ENABLE_VEC4
- hl = (hl + 3) & ~3;
- #endif
- T = Resampler_table::create (frel, hl, NPHASE);
- clear ();
- if (T)
- {
- _table = T;
- n = nchan * (2 * hl + mi);
- #ifdef ENABLE_VEC4
- posix_memalign ((void **)(&_buff), 16, n * sizeof (float));
- posix_memalign ((void **)(&_c1), 16, hl * sizeof (float));
- posix_memalign ((void **)(&_c2), 16, hl * sizeof (float));
- #else
- _buff = new float [n];
- _c1 = new float [hl];
- _c2 = new float [hl];
- #endif
- _nchan = nchan;
- _ratio = ratio;
- _inmax = mi;
- _pstep = dp;
- _qstep = dp;
- _wstep = 1;
- return reset ();
- }
- else return false;
- }
-
-
- void VResampler::clear (void)
- {
- Resampler_table::destroy (_table);
- #ifdef ENABLE_VEC4
- free (_buff);
- free (_c1);
- free (_c2);
- #else
- delete[] _buff;
- delete[] _c1;
- delete[] _c2;
- #endif
- _buff = 0;
- _c1 = 0;
- _c2 = 0;
- _table = 0;
- _nchan = 0;
- _inmax = 0;
- _pstep = 0;
- _qstep = 0;
- _wstep = 1;
- reset ();
- }
-
-
- void VResampler::set_phase (double p)
- {
- if (!_table) return;
- _phase = (p - floor (p)) * _table->_np;
- }
-
-
- void VResampler::set_rrfilt (double t)
- {
- if (!_table) return;
- _wstep = (t < 1) ? 1 : 1 - exp (-1 / t);
- }
-
-
- void VResampler::set_rratio (double r)
- {
- if (!_table) return;
- if (r > 16.0) r = 16.0;
- if (r < 0.95) r = 0.95;
- _qstep = _table->_np / (_ratio * r);
- }
-
-
- double VResampler::inpdist (void) const noexcept
- {
- if (!_table) return 0;
- return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np;
- }
-
-
- int VResampler::inpsize (void) const noexcept
- {
- if (!_table) return 0;
- return 2 * _table->_hl;
- }
-
-
- bool VResampler::reset (void) noexcept
- {
- if (!_table) return false;
-
- inp_count = 0;
- out_count = 0;
- inp_data = 0;
- out_data = 0;
- _index = 0;
- _nread = 0;
- _nzero = 0;
- _phase = 0;
- if (_table)
- {
- _nread = 2 * _table->_hl;
- return true;
- }
- return false;
- }
-
-
- bool VResampler::process (void)
- {
- int nr, np, hl, nz, di, i, n;
- unsigned int in, j;
- double ph, dp, dd;
- float a, b, *p1, *p2, *q1, *q2;
-
- if (!_table) return false;
-
- hl = _table->_hl;
- np = _table->_np;
- in = _index;
- nr = _nread;
- nz = _nzero;
- ph = _phase;
- dp = _pstep;
-
- p1 = _buff + in;
- p2 = p1 + 2 * hl - nr;
- di = 2 * hl + _inmax;
-
- while (out_count)
- {
- while (nr && inp_count)
- {
- if (inp_data)
- {
- for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j];
- inp_data += _nchan;
- nz = 0;
- }
- else
- {
- for (j = 0; j < _nchan; j++) p2 [j * di] = 0;
- if (nz < 2 * hl) nz++;
- }
- p2++;
- nr--;
- inp_count--;
- }
- if (nr) break;
-
- if (out_data)
- {
- if (nz < 2 * hl)
- {
- n = (unsigned int) ph;
- b = (float)(ph - n);
- a = 1.0f - b;
- q1 = _table->_ctab + hl * n;
- q2 = _table->_ctab + hl * (np - n);
-
- #if defined(__SSE2_MATH__) && !defined(CARLA_OS_WIN)
- __m128 C1, C2, Q1, Q2, S;
- C1 = _mm_load1_ps (&a);
- C2 = _mm_load1_ps (&b);
- for (i = 0; i < hl; i += 4)
- {
- Q1 = _mm_load_ps (q1 + i);
- Q2 = _mm_load_ps (q1 + i + hl);
- S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
- _mm_store_ps (_c1 + i, S);
- Q1 = _mm_load_ps (q2 + i);
- Q2 = _mm_load_ps (q2 + i - hl);
- S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
- _mm_store_ps (_c2 + i, S);
- }
- for (j = 0; j < _nchan; j++)
- {
- q1 = p1 + j * di;
- q2 = p2 + j * di;
- S = _mm_setzero_ps ();
- for (i = 0; i < hl; i += 4)
- {
- C1 = _mm_load_ps (_c1 + i);
- Q1 = _mm_loadu_ps (q1);
- q2 -= 4;
- S = _mm_add_ps (S, _mm_mul_ps (C1, Q1));
- C2 = _mm_loadr_ps (_c2 + i);
- Q2 = _mm_loadu_ps (q2);
- q1 += 4;
- S = _mm_add_ps (S, _mm_mul_ps (C2, Q2));
- }
- *out_data++ = S [0] + S [1] + S [2] + S [3];
- }
-
- #elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(CARLA_OS_WIN)
- // ARM64 version by Nicolas Belin <nbelin@baylibre.com>
- float32x4_t *C1 = (float32x4_t *)_c1;
- float32x4_t *C2 = (float32x4_t *)_c2;
- float32x4_t S, T;
- for (i = 0; i < (hl>>2); i++)
- {
- T = vmulq_n_f32 (vld1q_f32 (q1 + hl), b);
- C1 [i] = vmlaq_n_f32 (T, vld1q_f32 (q1), a);
- T = vmulq_n_f32 (vld1q_f32 (q2 - hl), b);
- C2 [i] = vmlaq_n_f32 (T, vld1q_f32 (q2), a);
- q2 += 4;
- q1 += 4;
- }
- for (j = 0; j < _nchan; j++)
- {
- q1 = p1 + j * di;
- q2 = p2 + j * di - 4;
- T = vrev64q_f32 (vld1q_f32 (q2));
- S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]);
- S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [0]);
- for (i = 1; i < (hl>>2); i++)
- {
- q2 -= 4;
- q1 += 4;
- T = vrev64q_f32 (vld1q_f32 (q2));
- S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]);
- S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]);
- }
- *out_data++ = S [0] + S [1] + S [2] + S [3];
- }
-
- #else
- float s;
- for (i = 0; i < hl; i++)
- {
- _c1 [i] = a * q1 [i] + b * q1 [i + hl];
- _c2 [i] = a * q2 [i] + b * q2 [i - hl];
- }
- for (j = 0; j < _nchan; j++)
- {
- q1 = p1 + j * di;
- q2 = p2 + j * di;
- s = 1e-30f;
- for (i = 0; i < hl; i++)
- {
- q2--;
- s += *q1 * _c1 [i] + *q2 * _c2 [i];
- q1++;
- }
- *out_data++ = s - 1e-30f;
- }
- #endif
- }
- else
- {
- for (j = 0; j < _nchan; j++) *out_data++ = 0;
- }
- }
- out_count--;
-
- dd = _qstep - dp;
- if (fabs (dd) < 1e-20) dp = _qstep;
- else dp += _wstep * dd;
- ph += dp;
- if (ph >= np)
- {
- nr = (unsigned int) floor (ph / np);
- ph -= nr * np;;
- in += nr;
- p1 += nr;
-
- if (in >= _inmax)
- {
- n = 2 * hl - nr;
- p2 = _buff;
- for (j = 0; j < _nchan; j++)
- {
- memmove (p2 + j * di, p1 + j * di, n * sizeof (float));
- }
- in = 0;
- p1 = _buff;
- p2 = p1 + n;
- }
- }
- }
-
- _index = in;
- _nread = nr;
- _phase = ph;
- _pstep = dp;
- _nzero = nz;
-
- return true;
- }
|