// ---------------------------------------------------------------------------- // // Copyright (C) 2006-2023 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // // ---------------------------------------------------------------------------- #include #include #include #include #include "vresampler.h" #undef ENABLE_VEC4 #ifndef CARLA_OS_WIN # if defined(__SSE2_MATH__) # define ENABLE_VEC4 # include # elif defined(__ARM_NEON) || defined(__ARM_NEON__) # define ENABLE_VEC4 # include # endif #endif VResampler::VResampler (void) noexcept : _table (0), _nchan (0), _buff (0), _c1 (0), _c2 (0) { reset (); } VResampler::~VResampler (void) { clear (); } bool VResampler::setup (double ratio, unsigned int nchan, unsigned int hlen) { return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen); } bool VResampler::setup (double ratio, unsigned int nchan, unsigned int hlen, double frel) { unsigned int hl, mi, n; double dp; Resampler_table *T = 0; if (!nchan || (hlen < 8) || (hlen > 96) || (64 * ratio < 1) || (ratio > 256)) { clear (); return false; } dp = NPHASE / ratio; hl = hlen; mi = 32; if (ratio < 1.0) { frel *= ratio; hl = (unsigned int)(ceil (hl / ratio)); mi = (unsigned int)(ceil (mi / ratio)); } #ifdef ENABLE_VEC4 hl = (hl + 3) & ~3; #endif T = Resampler_table::create (frel, hl, NPHASE); clear (); if (T) { _table = T; n = nchan * (2 * hl + mi); #ifdef ENABLE_VEC4 posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); posix_memalign ((void **)(&_c1), 16, hl * sizeof (float)); posix_memalign ((void **)(&_c2), 16, hl * sizeof (float)); #else _buff = new float [n]; _c1 = new float [hl]; _c2 = new float [hl]; #endif _nchan = nchan; _ratio = ratio; _inmax = mi; _pstep = dp; _qstep = dp; _wstep = 1; return reset (); } else return false; } void VResampler::clear (void) { Resampler_table::destroy (_table); #ifdef ENABLE_VEC4 free (_buff); free (_c1); free (_c2); #else delete[] _buff; delete[] _c1; delete[] _c2; #endif _buff = 0; _c1 = 0; _c2 = 0; _table = 0; _nchan = 0; _inmax = 0; _pstep = 0; _qstep = 0; _wstep = 1; reset (); } void VResampler::set_phase (double p) { if (!_table) return; _phase = (p - floor (p)) * _table->_np; } void VResampler::set_rrfilt (double t) { if (!_table) return; _wstep = (t < 1) ? 1 : 1 - exp (-1 / t); } void VResampler::set_rratio (double r) { if (!_table) return; if (r > 16.0) r = 16.0; if (r < 0.95) r = 0.95; _qstep = _table->_np / (_ratio * r); } double VResampler::inpdist (void) const noexcept { if (!_table) return 0; return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np; } int VResampler::inpsize (void) const noexcept { if (!_table) return 0; return 2 * _table->_hl; } bool VResampler::reset (void) noexcept { if (!_table) return false; inp_count = 0; out_count = 0; inp_data = 0; out_data = 0; _index = 0; _nread = 0; _nzero = 0; _phase = 0; if (_table) { _nread = 2 * _table->_hl; return true; } return false; } bool VResampler::process (void) { int nr, np, hl, nz, di, i, n; unsigned int in, j; double ph, dp, dd; float a, b, *p1, *p2, *q1, *q2; if (!_table) return false; hl = _table->_hl; np = _table->_np; in = _index; nr = _nread; nz = _nzero; ph = _phase; dp = _pstep; p1 = _buff + in; p2 = p1 + 2 * hl - nr; di = 2 * hl + _inmax; while (out_count) { while (nr && inp_count) { if (inp_data) { for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; inp_data += _nchan; nz = 0; } else { for (j = 0; j < _nchan; j++) p2 [j * di] = 0; if (nz < 2 * hl) nz++; } p2++; nr--; inp_count--; } if (nr) break; if (out_data) { if (nz < 2 * hl) { n = (unsigned int) ph; b = (float)(ph - n); a = 1.0f - b; q1 = _table->_ctab + hl * n; q2 = _table->_ctab + hl * (np - n); #if defined(__SSE2_MATH__) && !defined(CARLA_OS_WIN) __m128 C1, C2, Q1, Q2, S; C1 = _mm_load1_ps (&a); C2 = _mm_load1_ps (&b); for (i = 0; i < hl; i += 4) { Q1 = _mm_load_ps (q1 + i); Q2 = _mm_load_ps (q1 + i + hl); S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); _mm_store_ps (_c1 + i, S); Q1 = _mm_load_ps (q2 + i); Q2 = _mm_load_ps (q2 + i - hl); S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); _mm_store_ps (_c2 + i, S); } for (j = 0; j < _nchan; j++) { q1 = p1 + j * di; q2 = p2 + j * di; S = _mm_setzero_ps (); for (i = 0; i < hl; i += 4) { C1 = _mm_load_ps (_c1 + i); Q1 = _mm_loadu_ps (q1); q2 -= 4; S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); C2 = _mm_loadr_ps (_c2 + i); Q2 = _mm_loadu_ps (q2); q1 += 4; S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); } *out_data++ = S [0] + S [1] + S [2] + S [3]; } #elif (defined(__ARM_NEON) || defined(__ARM_NEON__)) && !defined(CARLA_OS_WIN) // ARM64 version by Nicolas Belin float32x4_t *C1 = (float32x4_t *)_c1; float32x4_t *C2 = (float32x4_t *)_c2; float32x4_t S, T; for (i = 0; i < (hl>>2); i++) { T = vmulq_n_f32 (vld1q_f32 (q1 + hl), b); C1 [i] = vmlaq_n_f32 (T, vld1q_f32 (q1), a); T = vmulq_n_f32 (vld1q_f32 (q2 - hl), b); C2 [i] = vmlaq_n_f32 (T, vld1q_f32 (q2), a); q2 += 4; q1 += 4; } for (j = 0; j < _nchan; j++) { q1 = p1 + j * di; q2 = p2 + j * di - 4; T = vrev64q_f32 (vld1q_f32 (q2)); S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]); S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [0]); for (i = 1; i < (hl>>2); i++) { q2 -= 4; q1 += 4; T = vrev64q_f32 (vld1q_f32 (q2)); S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]); S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]); } *out_data++ = vaddvq_f32 (S); } #else float s; for (i = 0; i < hl; i++) { _c1 [i] = a * q1 [i] + b * q1 [i + hl]; _c2 [i] = a * q2 [i] + b * q2 [i - hl]; } for (j = 0; j < _nchan; j++) { q1 = p1 + j * di; q2 = p2 + j * di; s = 1e-30f; for (i = 0; i < hl; i++) { q2--; s += *q1 * _c1 [i] + *q2 * _c2 [i]; q1++; } *out_data++ = s - 1e-30f; } #endif } else { for (j = 0; j < _nchan; j++) *out_data++ = 0; } } out_count--; dd = _qstep - dp; if (fabs (dd) < 1e-20) dp = _qstep; else dp += _wstep * dd; ph += dp; if (ph >= np) { nr = (unsigned int) floor (ph / np); ph -= nr * np;; in += nr; p1 += nr; if (in >= _inmax) { n = 2 * hl - nr; p2 = _buff; for (j = 0; j < _nchan; j++) { memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); } in = 0; p1 = _buff; p2 = p1 + n; } } } _index = in; _nread = nr; _phase = ph; _pstep = dp; _nzero = nz; return true; }