From 67cc40dcfdc92e5ba861b89d0bd70e710827b31e Mon Sep 17 00:00:00 2001 From: falkTX Date: Tue, 29 Aug 2023 18:42:57 +0200 Subject: [PATCH] Update internal zita-resampler Signed-off-by: falkTX --- source/modules/zita-resampler/cresampler.cc | 118 +++--- .../modules/zita-resampler/resampler-table.cc | 25 +- .../modules/zita-resampler/resampler-table.h | 2 +- source/modules/zita-resampler/resampler.cc | 281 +++++++++----- source/modules/zita-resampler/resampler.h | 25 +- source/modules/zita-resampler/vresampler.cc | 361 ++++++++++++------ source/modules/zita-resampler/vresampler.h | 25 +- 7 files changed, 522 insertions(+), 315 deletions(-) diff --git a/source/modules/zita-resampler/cresampler.cc b/source/modules/zita-resampler/cresampler.cc index ea27e1f7d..6fa8d092f 100644 --- a/source/modules/zita-resampler/cresampler.cc +++ b/source/modules/zita-resampler/cresampler.cc @@ -116,70 +116,70 @@ int CResampler::process (void) while (out_count) { - if (nr) - { - if (inp_count == 0) break; + if (nr) + { + if (inp_count == 0) break; n = (4 - nr) * _nchan; - if (inp_data) - { + if (inp_data) + { for (c = 0; c < _nchan; c++) pb [n + c] = inp_data [c]; - inp_data += _nchan; - nz = 0; - } - else - { + inp_data += _nchan; + nz = 0; + } + else + { for (c = 0; c < _nchan; c++) pb [n + c] = 0; - if (nz < 4) nz++; - } - nr--; - inp_count--; - } - else - { - n = _nchan; - if (out_data) - { - if (nz < 4) - { - a = ph; - b = 1 - a; - d = a * b / 2; - m0 = -d * b; - m1 = b + (3 * b - 1) * d; - m2 = a + (3 * a - 1) * d; - m3 = -d * a; - for (c = 0; c < n; c++) - { - *out_data++ = m0 * pb [0] + if (nz < 4) nz++; + } + nr--; + inp_count--; + } + else + { + n = _nchan; + if (out_data) + { + if (nz < 4) + { + a = ph; + b = 1 - a; + d = a * b / 2; + m0 = -d * b; + m1 = b + (3 * b - 1) * d; + m2 = a + (3 * a - 1) * d; + m3 = -d * a; + for (c = 0; c < n; c++) + { + *out_data++ = m0 * pb [0] + m1 * pb [n] + m2 * pb [2 * n] - + m3 * pb [3 * n]; - pb++; - } - pb -= n; - } - else - { - for (c = 0; c < n; c++) *out_data++ = 0; - } - } - out_count--; - - ph += _pstep; - if (ph >= 1.0) - { - nr = (unsigned int) floor (ph); - ph -= nr; - in += nr; - pb += nr * _nchan; - if (in >= _inmax) - { - memcpy (_buff, pb, (4 - nr) * _nchan * sizeof (float)); - in = 0; - pb = _buff; - } - } - } + + m3 * pb [3 * n]; + pb++; + } + pb -= n; + } + else + { + for (c = 0; c < n; c++) *out_data++ = 0; + } + } + out_count--; + + ph += _pstep; + if (ph >= 1.0) + { + nr = (unsigned int) floor (ph); + ph -= nr; + in += nr; + pb += nr * _nchan; + if (in >= _inmax) + { + memcpy (_buff, pb, (4 - nr) * _nchan * sizeof (float)); + in = 0; + pb = _buff; + } + } + } } _index = in; diff --git a/source/modules/zita-resampler/resampler-table.cc b/source/modules/zita-resampler/resampler-table.cc index ad02685e8..c41b99b12 100644 --- a/source/modules/zita-resampler/resampler-table.cc +++ b/source/modules/zita-resampler/resampler-table.cc @@ -1,7 +1,7 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen -// +// Copyright (C) 2006-2023 Fons Adriaensen +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or @@ -24,6 +24,13 @@ #include #include "resampler-table.h" + +#undef ENABLE_VEC4 +#if defined(__SSE2_MATH__) || defined(__ARM_NEON) || defined(__ARM_NEON__) +# define ENABLE_VEC4 +#endif + + static double sinc (double x) { x = fabs (x); @@ -42,7 +49,6 @@ static double wind (double x) } - Resampler_table *Resampler_table::_list = 0; Resampler_mutex Resampler_table::_mutex; @@ -54,11 +60,16 @@ Resampler_table::Resampler_table (double fr, unsigned int hl, unsigned int np) : _hl (hl), _np (np) { - unsigned int i, j; + unsigned int i, j, n; double t; float *p; - _ctab = new float [hl * (np + 1)]; + n = hl * (np + 1); +#ifdef ENABLE_VEC4 + posix_memalign ((void **) &_ctab, 16, n * sizeof (float)); +#else + _ctab = new float [n]; +#endif p = _ctab; for (j = 0; j <= np; j++) { @@ -75,7 +86,11 @@ Resampler_table::Resampler_table (double fr, unsigned int hl, unsigned int np) : Resampler_table::~Resampler_table (void) { +#ifdef ENABLE_VEC4 + free (_ctab); +#else delete[] _ctab; +#endif } diff --git a/source/modules/zita-resampler/resampler-table.h b/source/modules/zita-resampler/resampler-table.h index 2c6493dd8..db28e6ba8 100644 --- a/source/modules/zita-resampler/resampler-table.h +++ b/source/modules/zita-resampler/resampler-table.h @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2023 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by diff --git a/source/modules/zita-resampler/resampler.cc b/source/modules/zita-resampler/resampler.cc index 5bbd19660..d966d2609 100644 --- a/source/modules/zita-resampler/resampler.cc +++ b/source/modules/zita-resampler/resampler.cc @@ -1,7 +1,7 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen -// +// Copyright (C) 2006-2023 Fons Adriaensen +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or @@ -22,6 +22,16 @@ #include #include #include + +#undef ENABLE_VEC4 +#if defined(__SSE2_MATH__) +# define ENABLE_VEC4 +# include +#elif defined(__ARM_NEON) || defined(__ARM_NEON__) +# define ENABLE_VEC4 +# include +#endif + #include "resampler.h" @@ -31,20 +41,20 @@ static unsigned int gcd (unsigned int a, unsigned int b) if (b == 0) return a; while (1) { - if (a > b) - { - a = a % b; - if (a == 0) return b; - if (a == 1) return 1; - } - else - { - b = b % a; - if (b == 0) return a; - if (b == 1) return 1; - } - } - return 1; + if (a > b) + { + a = a % b; + if (a == 0) return b; + if (a == 1) return 1; + } + else + { + b = b % a; + if (b == 0) return a; + if (b == 1) return 1; + } + } + return 1; } @@ -63,66 +73,82 @@ Resampler::~Resampler (void) } -int Resampler::setup (unsigned int fs_inp, - unsigned int fs_out, - unsigned int nchan, - unsigned int hlen) +bool Resampler::setup (unsigned int fs_inp, + unsigned int fs_out, + unsigned int nchan, + unsigned int hlen) { - if ((hlen < 8) || (hlen > 96)) return 1; return setup (fs_inp, fs_out, nchan, hlen, 1.0 - 2.6 / hlen); } -int Resampler::setup (unsigned int fs_inp, - unsigned int fs_out, - unsigned int nchan, - unsigned int hlen, - double frel) +bool Resampler::setup (unsigned int fs_inp, + unsigned int fs_out, + unsigned int nchan, + unsigned int hlen, + double frel) { - unsigned int g, h, k, n, s; + unsigned int np, dp, mi, hl, n; double r; - float *B = 0; Resampler_table *T = 0; - k = s = 0; - if (fs_inp && fs_out && nchan) + if (!nchan || (hlen < 8) || (hlen > 96)) { - r = (double) fs_out / (double) fs_inp; - g = gcd (fs_out, fs_inp); - n = fs_out / g; - s = fs_inp / g; - if ((16 * r >= 1) && (n <= 1000)) - { - h = hlen; - k = 250; - if (r < 1) - { - frel *= r; - h = (unsigned int)(ceil (h / r)); - k = (unsigned int)(ceil (k / r)); - } - T = Resampler_table::create (frel, h, n); - B = new float [nchan * (2 * h - 1 + k)]; - } + clear (); + return false; + } + + r = (double) fs_out / (double) fs_inp; + n = gcd (fs_out, fs_inp); + np = fs_out / n; + dp = fs_inp / n; + if ((64 * r < 1.0) || (np > 1000)) + { + clear (); + return false; } + + hl = hlen; + mi = 32; + if (r < 1.0) + { + frel *= r; + hl = (unsigned int)(ceil (hl / r)); + mi = (unsigned int)(ceil (mi / r)); + } +#ifdef ENABLE_VEC4 + hl = (hl + 3) & ~3; +#endif + T = Resampler_table::create (frel, hl, np); + clear (); if (T) { _table = T; - _buff = B; + n = nchan * (2 * hl + mi); +#ifdef ENABLE_VEC4 + posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); + memset (_buff, 0, n * sizeof (float)); +#else + _buff = new float [n]; +#endif _nchan = nchan; - _inmax = k; - _pstep = s; + _inmax = mi; + _pstep = dp; return reset (); } - return 1; + else return false; } void Resampler::clear (void) { Resampler_table::destroy (_table); +#ifdef ENABLE_VEC4 + free (_buff); +#else delete[] _buff; +#endif _buff = 0; _table = 0; _nchan = 0; @@ -139,7 +165,7 @@ double Resampler::inpdist (void) const noexcept } -unsigned int Resampler::inpsize (void) const noexcept +int Resampler::inpsize (void) const noexcept { if (!_table) return 0; return 2 * _table->_hl; @@ -152,102 +178,157 @@ bool Resampler::reset (void) noexcept inp_count = 0; out_count = 0; - inp_data = nullptr; - out_data = nullptr; + inp_data = 0; + out_data = 0; _index = 0; _nread = 0; _nzero = 0; _phase = 0; - _nread = 2 * _table->_hl; - return true; + if (_table) + { + _nread = 2 * _table->_hl; + return true; + } + return false; } bool Resampler::process (void) { - unsigned int hl, ph, np, dp, in, nr, nz, i, n, c; - float *p1, *p2; + unsigned int hl, np, ph, dp, in, nr, nz, di, i, j, n; + float *c1, *c2, *p1, *p2, *q1, *q2; if (!_table) return false; - hl = _table->_hl; np = _table->_np; dp = _pstep; in = _index; nr = _nread; - ph = _phase; nz = _nzero; - n = (2 * hl - nr) * _nchan; - p1 = _buff + in * _nchan; - p2 = p1 + n; + ph = _phase; + + p1 = _buff + in; + p2 = p1 + 2 * hl - nr; + di = 2 * hl + _inmax; while (out_count) { - if (nr) + while (nr && inp_count) { - if (inp_count == 0) break; if (inp_data) { - for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c]; + for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; inp_data += _nchan; - nz = 0; + nz = 0; } else { - for (c = 0; c < _nchan; c++) p2 [c] = 0; + for (j = 0; j < _nchan; j++) p2 [j * di] = 0; if (nz < 2 * hl) nz++; } + p2++; nr--; - p2 += _nchan; inp_count--; } - else + if (nr) break; + + if (out_data) { - if (out_data) + if (nz < 2 * hl) { - if (nz < 2 * hl) + c1 = _table->_ctab + hl * ph; + c2 = _table->_ctab + hl * (np - ph); + +#if defined(__SSE2_MATH__) + __m128 C1, C2, Q1, Q2, S; + for (j = 0; j < _nchan; j++) { - float *c1 = _table->_ctab + hl * ph; - float *c2 = _table->_ctab + hl * (np - ph); - for (c = 0; c < _nchan; c++) + q1 = p1 + j * di; + q2 = p2 + j * di; + S = _mm_setzero_ps (); + for (i = 0; i < hl; i += 4) { - float *q1 = p1 + c; - float *q2 = p2 + c; - float s = 1e-20f; - for (i = 0; i < hl; i++) - { - q2 -= _nchan; - s += *q1 * c1 [i] + *q2 * c2 [i]; - q1 += _nchan; - } - *out_data++ = s - 1e-20f; + C1 = _mm_load_ps (c1 + i); + Q1 = _mm_loadu_ps (q1); + q2 -= 4; + S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); + C2 = _mm_loadr_ps (c2 + i); + Q2 = _mm_loadu_ps (q2); + q1 += 4; + S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); } + *out_data++ = S [0] + S [1] + S [2] + S [3]; } - else + +#elif defined(__ARM_NEON) || defined(__ARM_NEON__) + // ARM64 version by Nicolas Belin + float32x4_t *C1 = (float32x4_t *)c1; + float32x4_t *C2 = (float32x4_t *)c2; + float32x4_t S, T; + for (j = 0; j < _nchan; j++) { - for (c = 0; c < _nchan; c++) *out_data++ = 0; + q1 = p1 + j * di; + q2 = p2 + j * di - 4; + T = vrev64q_f32 (vld1q_f32 (q2)); + S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]); + S = vmlaq_f32 (S, vld1q_f32(q1), C1 [0]); + for (i = 1; i < (hl>>2); i++) + { + q2 -= 4; + q1 += 4; + T = vrev64q_f32 (vld1q_f32 (q2)); + S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]); + S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]); + } + *out_data++ = vaddvq_f32(S); } + +#else + float s; + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di; + s = 1e-30f; + for (i = 0; i < hl; i++) + { + q2--; + s += *q1 * c1 [i] + *q2 * c2 [i]; + q1++; + } + *out_data++ = s - 1e-30f; + } +#endif } - out_count--; + else + { + for (j = 0; j < _nchan; j++) *out_data++ = 0; + } + } + out_count--; - ph += dp; - if (ph >= np) + ph += dp; + if (ph >= np) + { + nr = ph / np; + ph -= nr * np; + in += nr; + p1 += nr; + if (in >= _inmax) { - nr = ph / np; - ph -= nr * np; - in += nr; - p1 += nr * _nchan;; - if (in >= _inmax) + n = 2 * hl - nr; + p2 = _buff; + for (j = 0; j < _nchan; j++) { - n = (2 * hl - nr) * _nchan; - memcpy (_buff, p1, n * sizeof (float)); - in = 0; - p1 = _buff; - p2 = p1 + n; + memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); } + in = 0; + p1 = _buff; + p2 = p1 + n; } } } + _index = in; _nread = nr; _phase = ph; diff --git a/source/modules/zita-resampler/resampler.h b/source/modules/zita-resampler/resampler.h index ace45d551..38b1fe477 100644 --- a/source/modules/zita-resampler/resampler.h +++ b/source/modules/zita-resampler/resampler.h @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2023 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -32,31 +32,31 @@ public: Resampler (void) noexcept; ~Resampler (void); - int setup (unsigned int fs_inp, + bool setup (unsigned int fs_inp, unsigned int fs_out, unsigned int nchan, unsigned int hlen); - int setup (unsigned int fs_inp, + bool setup (unsigned int fs_inp, unsigned int fs_out, unsigned int nchan, unsigned int hlen, double frel); - void clear (void); - bool reset (void) noexcept; - unsigned int nchan (void) const noexcept { return _nchan; } - unsigned int filtlen (void) const noexcept { return inpsize (); } // Deprecated - unsigned int inpsize (void) const noexcept; - double inpdist (void) const noexcept; - bool process (void); + void clear (void); + bool reset (void) noexcept; + int nchan (void) const noexcept { return _nchan; } + int filtlen (void) const noexcept { return inpsize (); } // Deprecated + int inpsize (void) const noexcept; + double inpdist (void) const noexcept; + bool process (void); unsigned int inp_count; unsigned int out_count; float *inp_data; float *out_data; - void *inp_list; - void *out_list; + float **inp_list; + float **out_list; private: @@ -69,6 +69,7 @@ private: unsigned int _phase; unsigned int _pstep; float *_buff; + void *_dummy [8]; }; diff --git a/source/modules/zita-resampler/vresampler.cc b/source/modules/zita-resampler/vresampler.cc index b87c6dd4c..b39887da5 100644 --- a/source/modules/zita-resampler/vresampler.cc +++ b/source/modules/zita-resampler/vresampler.cc @@ -1,7 +1,7 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2013 Fons Adriaensen -// +// Copyright (C) 2006-2023 Fons Adriaensen +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or @@ -22,10 +22,20 @@ #include #include #include + +#undef ENABLE_VEC4 +#if defined(__SSE2_MATH__) +# define ENABLE_VEC4 +# include +#elif defined(__ARM_NEON) || defined(__ARM_NEON__) +# define ENABLE_VEC4 +# include +#endif + #include "vresampler.h" -VResampler::VResampler (void) : +VResampler::VResampler (void) noexcept : _table (0), _nchan (0), _buff (0), @@ -42,62 +52,81 @@ VResampler::~VResampler (void) } -int VResampler::setup (double ratio, - unsigned int nchan, - unsigned int hlen) +bool VResampler::setup (double ratio, + unsigned int nchan, + unsigned int hlen) { - if ((hlen < 8) || (hlen > 96) || (16 * ratio < 1) || (ratio > 256)) return 1; return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen); } -int VResampler::setup (double ratio, - unsigned int nchan, - unsigned int hlen, - double frel) +bool VResampler::setup (double ratio, + unsigned int nchan, + unsigned int hlen, + double frel) { - unsigned int h, k, n; - double s; + unsigned int hl, mi, n; + double dp; Resampler_table *T = 0; - if (! nchan) return 1; - n = NPHASE; - s = n / ratio; - h = hlen; - k = 250; - if (ratio < 1) + if (!nchan || (hlen < 8) || (hlen > 96) || (64 * ratio < 1) || (ratio > 256)) + { + clear (); + return false; + } + + dp = NPHASE / ratio; + hl = hlen; + mi = 32; + if (ratio < 1.0) { frel *= ratio; - h = (unsigned int)(ceil (h / ratio)); - k = (unsigned int)(ceil (k / ratio)); + hl = (unsigned int)(ceil (hl / ratio)); + mi = (unsigned int)(ceil (mi / ratio)); } - T = Resampler_table::create (frel, h, n); +#ifdef ENABLE_VEC4 + hl = (hl + 3) & ~3; +#endif + T = Resampler_table::create (frel, hl, NPHASE); clear (); if (T) { - _table = T; - _buff = new float [nchan * (2 * h - 1 + k)]; - _c1 = new float [2 * h]; - _c2 = new float [2 * h]; - _nchan = nchan; - _inmax = k; - _ratio = ratio; - _pstep = s; - _qstep = s; - _wstep = 1; - return reset (); + _table = T; + n = nchan * (2 * hl + mi); +#ifdef ENABLE_VEC4 + posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); + posix_memalign ((void **)(&_c1), 16, hl * sizeof (float)); + posix_memalign ((void **)(&_c2), 16, hl * sizeof (float)); +#else + _buff = new float [n]; + _c1 = new float [hl]; + _c2 = new float [hl]; +#endif + _nchan = nchan; + _ratio = ratio; + _inmax = mi; + _pstep = dp; + _qstep = dp; + _wstep = 1; + return reset (); } - else return 1; + else return false; } void VResampler::clear (void) { Resampler_table::destroy (_table); +#ifdef ENABLE_VEC4 + free (_buff); + free (_c1); + free (_c2); +#else delete[] _buff; delete[] _c1; delete[] _c2; - _buff = 0; +#endif + _buff = 0; _c1 = 0; _c2 = 0; _table = 0; @@ -133,44 +162,49 @@ void VResampler::set_rratio (double r) } -double VResampler::inpdist (void) const +double VResampler::inpdist (void) const noexcept { if (!_table) return 0; return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np; } -int VResampler::inpsize (void) const +int VResampler::inpsize (void) const noexcept { if (!_table) return 0; return 2 * _table->_hl; } -int VResampler::reset (void) +bool VResampler::reset (void) noexcept { - if (!_table) return 1; + if (!_table) return false; inp_count = 0; out_count = 0; inp_data = 0; out_data = 0; _index = 0; - _phase = 0; - _nread = 2 * _table->_hl; + _nread = 0; _nzero = 0; - return 0; + _phase = 0; + if (_table) + { + _nread = 2 * _table->_hl; + return true; + } + return false; } -int VResampler::process (void) +bool VResampler::process (void) { - unsigned int k, np, in, nr, n, c; - int i, hl, nz; - double ph, dp, dd; + int nr, np, hl, nz, di, i, n; + unsigned int in, j; + double ph, dp, dd; float a, b, *p1, *p2, *q1, *q2; - if (!_table) return 1; + if (!_table) return false; hl = _table->_hl; np = _table->_np; @@ -179,94 +213,169 @@ int VResampler::process (void) nz = _nzero; ph = _phase; dp = _pstep; - n = (2 * hl - nr) * _nchan; - p1 = _buff + in * _nchan; - p2 = p1 + n; + + p1 = _buff + in; + p2 = p1 + 2 * hl - nr; + di = 2 * hl + _inmax; while (out_count) { - if (nr) - { - if (inp_count == 0) break; - if (inp_data) - { - for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c]; - inp_data += _nchan; - nz = 0; - } - else - { - for (c = 0; c < _nchan; c++) p2 [c] = 0; - if (nz < 2 * hl) nz++; - } - nr--; - p2 += _nchan; - inp_count--; - } - else - { - if (out_data) - { - if (nz < 2 * hl) - { - k = (unsigned int) ph; - b = (float)(ph - k); - a = 1.0f - b; - q1 = _table->_ctab + hl * k; - q2 = _table->_ctab + hl * (np - k); - for (i = 0; i < hl; i++) - { - _c1 [i] = a * q1 [i] + b * q1 [i + hl]; - _c2 [i] = a * q2 [i] + b * q2 [i - hl]; - } - for (c = 0; c < _nchan; c++) - { - q1 = p1 + c; - q2 = p2 + c; - a = 1e-25f; - for (i = 0; i < hl; i++) - { - q2 -= _nchan; - a += *q1 * _c1 [i] + *q2 * _c2 [i]; - q1 += _nchan; - } - *out_data++ = a - 1e-25f; - } - } - else - { - for (c = 0; c < _nchan; c++) *out_data++ = 0; - } - } - out_count--; - - dd = _qstep - dp; - if (fabs (dd) < 1e-30) dp = _qstep; - else dp += _wstep * dd; - ph += dp; - if (ph >= np) - { - nr = (unsigned int) floor( ph / np); - ph -= nr * np;; - in += nr; - p1 += nr * _nchan;; - if (in >= _inmax) - { - n = (2 * hl - nr) * _nchan; - memcpy (_buff, p1, n * sizeof (float)); - in = 0; - p1 = _buff; - p2 = p1 + n; - } - } - } + while (nr && inp_count) + { + if (inp_data) + { + for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; + inp_data += _nchan; + nz = 0; + } + else + { + for (j = 0; j < _nchan; j++) p2 [j * di] = 0; + if (nz < 2 * hl) nz++; + } + p2++; + nr--; + inp_count--; + } + if (nr) break; + + if (out_data) + { + if (nz < 2 * hl) + { + n = (unsigned int) ph; + b = (float)(ph - n); + a = 1.0f - b; + q1 = _table->_ctab + hl * n; + q2 = _table->_ctab + hl * (np - n); + +#if defined(__SSE2_MATH__) + __m128 C1, C2, Q1, Q2, S; + C1 = _mm_load1_ps (&a); + C2 = _mm_load1_ps (&b); + for (i = 0; i < hl; i += 4) + { + Q1 = _mm_load_ps (q1 + i); + Q2 = _mm_load_ps (q1 + i + hl); + S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); + _mm_store_ps (_c1 + i, S); + Q1 = _mm_load_ps (q2 + i); + Q2 = _mm_load_ps (q2 + i - hl); + S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); + _mm_store_ps (_c2 + i, S); + } + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di; + S = _mm_setzero_ps (); + for (i = 0; i < hl; i += 4) + { + C1 = _mm_load_ps (_c1 + i); + Q1 = _mm_loadu_ps (q1); + q2 -= 4; + S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); + C2 = _mm_loadr_ps (_c2 + i); + Q2 = _mm_loadu_ps (q2); + q1 += 4; + S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); + } + *out_data++ = S [0] + S [1] + S [2] + S [3]; + } + +#elif defined(__ARM_NEON) || defined(__ARM_NEON__) + // ARM64 version by Nicolas Belin + float32x4_t *C1 = (float32x4_t *)_c1; + float32x4_t *C2 = (float32x4_t *)_c2; + float32x4_t S, T; + for (i = 0; i < (hl>>2); i++) + { + T = vmulq_n_f32 (vld1q_f32 (q1 + hl), b); + C1 [i] = vmlaq_n_f32 (T, vld1q_f32 (q1), a); + T = vmulq_n_f32 (vld1q_f32 (q2 - hl), b); + C2 [i] = vmlaq_n_f32 (T, vld1q_f32 (q2), a); + q2 += 4; + q1 += 4; + } + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di - 4; + T = vrev64q_f32 (vld1q_f32 (q2)); + S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]); + S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [0]); + for (i = 1; i < (hl>>2); i++) + { + q2 -= 4; + q1 += 4; + T = vrev64q_f32 (vld1q_f32 (q2)); + S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]); + S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]); + } + *out_data++ = vaddvq_f32 (S); + } + +#else + float s; + for (i = 0; i < hl; i++) + { + _c1 [i] = a * q1 [i] + b * q1 [i + hl]; + _c2 [i] = a * q2 [i] + b * q2 [i - hl]; + } + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di; + s = 1e-30f; + for (i = 0; i < hl; i++) + { + q2--; + s += *q1 * _c1 [i] + *q2 * _c2 [i]; + q1++; + } + *out_data++ = s - 1e-30f; + } +#endif + } + else + { + for (j = 0; j < _nchan; j++) *out_data++ = 0; + } + } + out_count--; + + dd = _qstep - dp; + if (fabs (dd) < 1e-20) dp = _qstep; + else dp += _wstep * dd; + ph += dp; + if (ph >= np) + { + nr = (unsigned int) floor (ph / np); + ph -= nr * np;; + in += nr; + p1 += nr; + + if (in >= _inmax) + { + n = 2 * hl - nr; + p2 = _buff; + for (j = 0; j < _nchan; j++) + { + memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); + } + in = 0; + p1 = _buff; + p2 = p1 + n; + } + } } + _index = in; _nread = nr; _phase = ph; _pstep = dp; _nzero = nz; - return 0; + return true; } diff --git a/source/modules/zita-resampler/vresampler.h b/source/modules/zita-resampler/vresampler.h index 41e1111f5..5ae87dbaf 100644 --- a/source/modules/zita-resampler/vresampler.h +++ b/source/modules/zita-resampler/vresampler.h @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2023 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -29,28 +29,28 @@ class VResampler { public: - VResampler (void); + VResampler (void) noexcept; ~VResampler (void); - int setup (double ratio, + bool setup (double ratio, unsigned int nchan, unsigned int hlen); - int setup (double ratio, + bool setup (double ratio, unsigned int nchan, unsigned int hlen, double frel); void clear (void); - int reset (void); - int nchan (void) const { return _nchan; } - int inpsize (void) const; - double inpdist (void) const; - int process (void); - + bool reset (void) noexcept; + int nchan (void) const noexcept { return _nchan; } + int inpsize (void) const noexcept; + double inpdist (void) const noexcept; + bool process (void); + void set_phase (double p); void set_rrfilt (double t); - void set_rratio (double r); + void set_rratio (double r); unsigned int inp_count; unsigned int out_count; @@ -61,7 +61,7 @@ public: private: - enum { NPHASE = 256 }; + enum { NPHASE = 120 }; Resampler_table *_table; unsigned int _nchan; @@ -77,6 +77,7 @@ private: float *_buff; float *_c1; float *_c2; + void *_dummy [8]; };