Signed-off-by: falkTX <falktx@falktx.com>pull/1807/head
@@ -116,70 +116,70 @@ int CResampler::process (void) | |||||
while (out_count) | while (out_count) | ||||
{ | { | ||||
if (nr) | |||||
{ | |||||
if (inp_count == 0) break; | |||||
if (nr) | |||||
{ | |||||
if (inp_count == 0) break; | |||||
n = (4 - nr) * _nchan; | n = (4 - nr) * _nchan; | ||||
if (inp_data) | |||||
{ | |||||
if (inp_data) | |||||
{ | |||||
for (c = 0; c < _nchan; c++) pb [n + c] = inp_data [c]; | for (c = 0; c < _nchan; c++) pb [n + c] = inp_data [c]; | ||||
inp_data += _nchan; | |||||
nz = 0; | |||||
} | |||||
else | |||||
{ | |||||
inp_data += _nchan; | |||||
nz = 0; | |||||
} | |||||
else | |||||
{ | |||||
for (c = 0; c < _nchan; c++) pb [n + c] = 0; | for (c = 0; c < _nchan; c++) pb [n + c] = 0; | ||||
if (nz < 4) nz++; | |||||
} | |||||
nr--; | |||||
inp_count--; | |||||
} | |||||
else | |||||
{ | |||||
n = _nchan; | |||||
if (out_data) | |||||
{ | |||||
if (nz < 4) | |||||
{ | |||||
a = ph; | |||||
b = 1 - a; | |||||
d = a * b / 2; | |||||
m0 = -d * b; | |||||
m1 = b + (3 * b - 1) * d; | |||||
m2 = a + (3 * a - 1) * d; | |||||
m3 = -d * a; | |||||
for (c = 0; c < n; c++) | |||||
{ | |||||
*out_data++ = m0 * pb [0] | |||||
if (nz < 4) nz++; | |||||
} | |||||
nr--; | |||||
inp_count--; | |||||
} | |||||
else | |||||
{ | |||||
n = _nchan; | |||||
if (out_data) | |||||
{ | |||||
if (nz < 4) | |||||
{ | |||||
a = ph; | |||||
b = 1 - a; | |||||
d = a * b / 2; | |||||
m0 = -d * b; | |||||
m1 = b + (3 * b - 1) * d; | |||||
m2 = a + (3 * a - 1) * d; | |||||
m3 = -d * a; | |||||
for (c = 0; c < n; c++) | |||||
{ | |||||
*out_data++ = m0 * pb [0] | |||||
+ m1 * pb [n] | + m1 * pb [n] | ||||
+ m2 * pb [2 * n] | + m2 * pb [2 * n] | ||||
+ m3 * pb [3 * n]; | |||||
pb++; | |||||
} | |||||
pb -= n; | |||||
} | |||||
else | |||||
{ | |||||
for (c = 0; c < n; c++) *out_data++ = 0; | |||||
} | |||||
} | |||||
out_count--; | |||||
ph += _pstep; | |||||
if (ph >= 1.0) | |||||
{ | |||||
nr = (unsigned int) floor (ph); | |||||
ph -= nr; | |||||
in += nr; | |||||
pb += nr * _nchan; | |||||
if (in >= _inmax) | |||||
{ | |||||
memcpy (_buff, pb, (4 - nr) * _nchan * sizeof (float)); | |||||
in = 0; | |||||
pb = _buff; | |||||
} | |||||
} | |||||
} | |||||
+ m3 * pb [3 * n]; | |||||
pb++; | |||||
} | |||||
pb -= n; | |||||
} | |||||
else | |||||
{ | |||||
for (c = 0; c < n; c++) *out_data++ = 0; | |||||
} | |||||
} | |||||
out_count--; | |||||
ph += _pstep; | |||||
if (ph >= 1.0) | |||||
{ | |||||
nr = (unsigned int) floor (ph); | |||||
ph -= nr; | |||||
in += nr; | |||||
pb += nr * _nchan; | |||||
if (in >= _inmax) | |||||
{ | |||||
memcpy (_buff, pb, (4 - nr) * _nchan * sizeof (float)); | |||||
in = 0; | |||||
pb = _buff; | |||||
} | |||||
} | |||||
} | |||||
} | } | ||||
_index = in; | _index = in; | ||||
@@ -1,7 +1,7 @@ | |||||
// ---------------------------------------------------------------------------- | // ---------------------------------------------------------------------------- | ||||
// | // | ||||
// Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | |||||
// Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | |||||
// This program is free software; you can redistribute it and/or modify | // This program is free software; you can redistribute it and/or modify | ||||
// it under the terms of the GNU General Public License as published by | // it under the terms of the GNU General Public License as published by | ||||
// the Free Software Foundation; either version 3 of the License, or | // the Free Software Foundation; either version 3 of the License, or | ||||
@@ -24,6 +24,13 @@ | |||||
#include <math.h> | #include <math.h> | ||||
#include "resampler-table.h" | #include "resampler-table.h" | ||||
#undef ENABLE_VEC4 | |||||
#if defined(__SSE2_MATH__) || defined(__ARM_NEON) || defined(__ARM_NEON__) | |||||
# define ENABLE_VEC4 | |||||
#endif | |||||
static double sinc (double x) | static double sinc (double x) | ||||
{ | { | ||||
x = fabs (x); | x = fabs (x); | ||||
@@ -42,7 +49,6 @@ static double wind (double x) | |||||
} | } | ||||
Resampler_table *Resampler_table::_list = 0; | Resampler_table *Resampler_table::_list = 0; | ||||
Resampler_mutex Resampler_table::_mutex; | Resampler_mutex Resampler_table::_mutex; | ||||
@@ -54,11 +60,16 @@ Resampler_table::Resampler_table (double fr, unsigned int hl, unsigned int np) : | |||||
_hl (hl), | _hl (hl), | ||||
_np (np) | _np (np) | ||||
{ | { | ||||
unsigned int i, j; | |||||
unsigned int i, j, n; | |||||
double t; | double t; | ||||
float *p; | float *p; | ||||
_ctab = new float [hl * (np + 1)]; | |||||
n = hl * (np + 1); | |||||
#ifdef ENABLE_VEC4 | |||||
posix_memalign ((void **) &_ctab, 16, n * sizeof (float)); | |||||
#else | |||||
_ctab = new float [n]; | |||||
#endif | |||||
p = _ctab; | p = _ctab; | ||||
for (j = 0; j <= np; j++) | for (j = 0; j <= np; j++) | ||||
{ | { | ||||
@@ -75,7 +86,11 @@ Resampler_table::Resampler_table (double fr, unsigned int hl, unsigned int np) : | |||||
Resampler_table::~Resampler_table (void) | Resampler_table::~Resampler_table (void) | ||||
{ | { | ||||
#ifdef ENABLE_VEC4 | |||||
free (_ctab); | |||||
#else | |||||
delete[] _ctab; | delete[] _ctab; | ||||
#endif | |||||
} | } | ||||
@@ -1,6 +1,6 @@ | |||||
// ---------------------------------------------------------------------------- | // ---------------------------------------------------------------------------- | ||||
// | // | ||||
// Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org> | |||||
// Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | // | ||||
// This program is free software; you can redistribute it and/or modify | // This program is free software; you can redistribute it and/or modify | ||||
// it under the terms of the GNU General Public License as published by | // it under the terms of the GNU General Public License as published by | ||||
@@ -1,7 +1,7 @@ | |||||
// ---------------------------------------------------------------------------- | // ---------------------------------------------------------------------------- | ||||
// | // | ||||
// Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | |||||
// Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | |||||
// This program is free software; you can redistribute it and/or modify | // This program is free software; you can redistribute it and/or modify | ||||
// it under the terms of the GNU General Public License as published by | // it under the terms of the GNU General Public License as published by | ||||
// the Free Software Foundation; either version 3 of the License, or | // the Free Software Foundation; either version 3 of the License, or | ||||
@@ -22,6 +22,16 @@ | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include <math.h> | #include <math.h> | ||||
#undef ENABLE_VEC4 | |||||
#if defined(__SSE2_MATH__) | |||||
# define ENABLE_VEC4 | |||||
# include <xmmintrin.h> | |||||
#elif defined(__ARM_NEON) || defined(__ARM_NEON__) | |||||
# define ENABLE_VEC4 | |||||
# include <arm_neon.h> | |||||
#endif | |||||
#include "resampler.h" | #include "resampler.h" | ||||
@@ -31,20 +41,20 @@ static unsigned int gcd (unsigned int a, unsigned int b) | |||||
if (b == 0) return a; | if (b == 0) return a; | ||||
while (1) | while (1) | ||||
{ | { | ||||
if (a > b) | |||||
{ | |||||
a = a % b; | |||||
if (a == 0) return b; | |||||
if (a == 1) return 1; | |||||
} | |||||
else | |||||
{ | |||||
b = b % a; | |||||
if (b == 0) return a; | |||||
if (b == 1) return 1; | |||||
} | |||||
} | |||||
return 1; | |||||
if (a > b) | |||||
{ | |||||
a = a % b; | |||||
if (a == 0) return b; | |||||
if (a == 1) return 1; | |||||
} | |||||
else | |||||
{ | |||||
b = b % a; | |||||
if (b == 0) return a; | |||||
if (b == 1) return 1; | |||||
} | |||||
} | |||||
return 1; | |||||
} | } | ||||
@@ -63,66 +73,82 @@ Resampler::~Resampler (void) | |||||
} | } | ||||
int Resampler::setup (unsigned int fs_inp, | |||||
unsigned int fs_out, | |||||
unsigned int nchan, | |||||
unsigned int hlen) | |||||
bool Resampler::setup (unsigned int fs_inp, | |||||
unsigned int fs_out, | |||||
unsigned int nchan, | |||||
unsigned int hlen) | |||||
{ | { | ||||
if ((hlen < 8) || (hlen > 96)) return 1; | |||||
return setup (fs_inp, fs_out, nchan, hlen, 1.0 - 2.6 / hlen); | return setup (fs_inp, fs_out, nchan, hlen, 1.0 - 2.6 / hlen); | ||||
} | } | ||||
int Resampler::setup (unsigned int fs_inp, | |||||
unsigned int fs_out, | |||||
unsigned int nchan, | |||||
unsigned int hlen, | |||||
double frel) | |||||
bool Resampler::setup (unsigned int fs_inp, | |||||
unsigned int fs_out, | |||||
unsigned int nchan, | |||||
unsigned int hlen, | |||||
double frel) | |||||
{ | { | ||||
unsigned int g, h, k, n, s; | |||||
unsigned int np, dp, mi, hl, n; | |||||
double r; | double r; | ||||
float *B = 0; | |||||
Resampler_table *T = 0; | Resampler_table *T = 0; | ||||
k = s = 0; | |||||
if (fs_inp && fs_out && nchan) | |||||
if (!nchan || (hlen < 8) || (hlen > 96)) | |||||
{ | { | ||||
r = (double) fs_out / (double) fs_inp; | |||||
g = gcd (fs_out, fs_inp); | |||||
n = fs_out / g; | |||||
s = fs_inp / g; | |||||
if ((16 * r >= 1) && (n <= 1000)) | |||||
{ | |||||
h = hlen; | |||||
k = 250; | |||||
if (r < 1) | |||||
{ | |||||
frel *= r; | |||||
h = (unsigned int)(ceil (h / r)); | |||||
k = (unsigned int)(ceil (k / r)); | |||||
} | |||||
T = Resampler_table::create (frel, h, n); | |||||
B = new float [nchan * (2 * h - 1 + k)]; | |||||
} | |||||
clear (); | |||||
return false; | |||||
} | |||||
r = (double) fs_out / (double) fs_inp; | |||||
n = gcd (fs_out, fs_inp); | |||||
np = fs_out / n; | |||||
dp = fs_inp / n; | |||||
if ((64 * r < 1.0) || (np > 1000)) | |||||
{ | |||||
clear (); | |||||
return false; | |||||
} | } | ||||
hl = hlen; | |||||
mi = 32; | |||||
if (r < 1.0) | |||||
{ | |||||
frel *= r; | |||||
hl = (unsigned int)(ceil (hl / r)); | |||||
mi = (unsigned int)(ceil (mi / r)); | |||||
} | |||||
#ifdef ENABLE_VEC4 | |||||
hl = (hl + 3) & ~3; | |||||
#endif | |||||
T = Resampler_table::create (frel, hl, np); | |||||
clear (); | clear (); | ||||
if (T) | if (T) | ||||
{ | { | ||||
_table = T; | _table = T; | ||||
_buff = B; | |||||
n = nchan * (2 * hl + mi); | |||||
#ifdef ENABLE_VEC4 | |||||
posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); | |||||
memset (_buff, 0, n * sizeof (float)); | |||||
#else | |||||
_buff = new float [n]; | |||||
#endif | |||||
_nchan = nchan; | _nchan = nchan; | ||||
_inmax = k; | |||||
_pstep = s; | |||||
_inmax = mi; | |||||
_pstep = dp; | |||||
return reset (); | return reset (); | ||||
} | } | ||||
return 1; | |||||
else return false; | |||||
} | } | ||||
void Resampler::clear (void) | void Resampler::clear (void) | ||||
{ | { | ||||
Resampler_table::destroy (_table); | Resampler_table::destroy (_table); | ||||
#ifdef ENABLE_VEC4 | |||||
free (_buff); | |||||
#else | |||||
delete[] _buff; | delete[] _buff; | ||||
#endif | |||||
_buff = 0; | _buff = 0; | ||||
_table = 0; | _table = 0; | ||||
_nchan = 0; | _nchan = 0; | ||||
@@ -139,7 +165,7 @@ double Resampler::inpdist (void) const noexcept | |||||
} | } | ||||
unsigned int Resampler::inpsize (void) const noexcept | |||||
int Resampler::inpsize (void) const noexcept | |||||
{ | { | ||||
if (!_table) return 0; | if (!_table) return 0; | ||||
return 2 * _table->_hl; | return 2 * _table->_hl; | ||||
@@ -152,102 +178,157 @@ bool Resampler::reset (void) noexcept | |||||
inp_count = 0; | inp_count = 0; | ||||
out_count = 0; | out_count = 0; | ||||
inp_data = nullptr; | |||||
out_data = nullptr; | |||||
inp_data = 0; | |||||
out_data = 0; | |||||
_index = 0; | _index = 0; | ||||
_nread = 0; | _nread = 0; | ||||
_nzero = 0; | _nzero = 0; | ||||
_phase = 0; | _phase = 0; | ||||
_nread = 2 * _table->_hl; | |||||
return true; | |||||
if (_table) | |||||
{ | |||||
_nread = 2 * _table->_hl; | |||||
return true; | |||||
} | |||||
return false; | |||||
} | } | ||||
bool Resampler::process (void) | bool Resampler::process (void) | ||||
{ | { | ||||
unsigned int hl, ph, np, dp, in, nr, nz, i, n, c; | |||||
float *p1, *p2; | |||||
unsigned int hl, np, ph, dp, in, nr, nz, di, i, j, n; | |||||
float *c1, *c2, *p1, *p2, *q1, *q2; | |||||
if (!_table) return false; | if (!_table) return false; | ||||
hl = _table->_hl; | hl = _table->_hl; | ||||
np = _table->_np; | np = _table->_np; | ||||
dp = _pstep; | dp = _pstep; | ||||
in = _index; | in = _index; | ||||
nr = _nread; | nr = _nread; | ||||
ph = _phase; | |||||
nz = _nzero; | nz = _nzero; | ||||
n = (2 * hl - nr) * _nchan; | |||||
p1 = _buff + in * _nchan; | |||||
p2 = p1 + n; | |||||
ph = _phase; | |||||
p1 = _buff + in; | |||||
p2 = p1 + 2 * hl - nr; | |||||
di = 2 * hl + _inmax; | |||||
while (out_count) | while (out_count) | ||||
{ | { | ||||
if (nr) | |||||
while (nr && inp_count) | |||||
{ | { | ||||
if (inp_count == 0) break; | |||||
if (inp_data) | if (inp_data) | ||||
{ | { | ||||
for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c]; | |||||
for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; | |||||
inp_data += _nchan; | inp_data += _nchan; | ||||
nz = 0; | |||||
nz = 0; | |||||
} | } | ||||
else | else | ||||
{ | { | ||||
for (c = 0; c < _nchan; c++) p2 [c] = 0; | |||||
for (j = 0; j < _nchan; j++) p2 [j * di] = 0; | |||||
if (nz < 2 * hl) nz++; | if (nz < 2 * hl) nz++; | ||||
} | } | ||||
p2++; | |||||
nr--; | nr--; | ||||
p2 += _nchan; | |||||
inp_count--; | inp_count--; | ||||
} | } | ||||
else | |||||
if (nr) break; | |||||
if (out_data) | |||||
{ | { | ||||
if (out_data) | |||||
if (nz < 2 * hl) | |||||
{ | { | ||||
if (nz < 2 * hl) | |||||
c1 = _table->_ctab + hl * ph; | |||||
c2 = _table->_ctab + hl * (np - ph); | |||||
#if defined(__SSE2_MATH__) | |||||
__m128 C1, C2, Q1, Q2, S; | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | { | ||||
float *c1 = _table->_ctab + hl * ph; | |||||
float *c2 = _table->_ctab + hl * (np - ph); | |||||
for (c = 0; c < _nchan; c++) | |||||
q1 = p1 + j * di; | |||||
q2 = p2 + j * di; | |||||
S = _mm_setzero_ps (); | |||||
for (i = 0; i < hl; i += 4) | |||||
{ | { | ||||
float *q1 = p1 + c; | |||||
float *q2 = p2 + c; | |||||
float s = 1e-20f; | |||||
for (i = 0; i < hl; i++) | |||||
{ | |||||
q2 -= _nchan; | |||||
s += *q1 * c1 [i] + *q2 * c2 [i]; | |||||
q1 += _nchan; | |||||
} | |||||
*out_data++ = s - 1e-20f; | |||||
C1 = _mm_load_ps (c1 + i); | |||||
Q1 = _mm_loadu_ps (q1); | |||||
q2 -= 4; | |||||
S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); | |||||
C2 = _mm_loadr_ps (c2 + i); | |||||
Q2 = _mm_loadu_ps (q2); | |||||
q1 += 4; | |||||
S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); | |||||
} | } | ||||
*out_data++ = S [0] + S [1] + S [2] + S [3]; | |||||
} | } | ||||
else | |||||
#elif defined(__ARM_NEON) || defined(__ARM_NEON__) | |||||
// ARM64 version by Nicolas Belin <nbelin@baylibre.com> | |||||
float32x4_t *C1 = (float32x4_t *)c1; | |||||
float32x4_t *C2 = (float32x4_t *)c2; | |||||
float32x4_t S, T; | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | { | ||||
for (c = 0; c < _nchan; c++) *out_data++ = 0; | |||||
q1 = p1 + j * di; | |||||
q2 = p2 + j * di - 4; | |||||
T = vrev64q_f32 (vld1q_f32 (q2)); | |||||
S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]); | |||||
S = vmlaq_f32 (S, vld1q_f32(q1), C1 [0]); | |||||
for (i = 1; i < (hl>>2); i++) | |||||
{ | |||||
q2 -= 4; | |||||
q1 += 4; | |||||
T = vrev64q_f32 (vld1q_f32 (q2)); | |||||
S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]); | |||||
S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]); | |||||
} | |||||
*out_data++ = vaddvq_f32(S); | |||||
} | } | ||||
#else | |||||
float s; | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | |||||
q1 = p1 + j * di; | |||||
q2 = p2 + j * di; | |||||
s = 1e-30f; | |||||
for (i = 0; i < hl; i++) | |||||
{ | |||||
q2--; | |||||
s += *q1 * c1 [i] + *q2 * c2 [i]; | |||||
q1++; | |||||
} | |||||
*out_data++ = s - 1e-30f; | |||||
} | |||||
#endif | |||||
} | } | ||||
out_count--; | |||||
else | |||||
{ | |||||
for (j = 0; j < _nchan; j++) *out_data++ = 0; | |||||
} | |||||
} | |||||
out_count--; | |||||
ph += dp; | |||||
if (ph >= np) | |||||
ph += dp; | |||||
if (ph >= np) | |||||
{ | |||||
nr = ph / np; | |||||
ph -= nr * np; | |||||
in += nr; | |||||
p1 += nr; | |||||
if (in >= _inmax) | |||||
{ | { | ||||
nr = ph / np; | |||||
ph -= nr * np; | |||||
in += nr; | |||||
p1 += nr * _nchan;; | |||||
if (in >= _inmax) | |||||
n = 2 * hl - nr; | |||||
p2 = _buff; | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | { | ||||
n = (2 * hl - nr) * _nchan; | |||||
memcpy (_buff, p1, n * sizeof (float)); | |||||
in = 0; | |||||
p1 = _buff; | |||||
p2 = p1 + n; | |||||
memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); | |||||
} | } | ||||
in = 0; | |||||
p1 = _buff; | |||||
p2 = p1 + n; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
_index = in; | _index = in; | ||||
_nread = nr; | _nread = nr; | ||||
_phase = ph; | _phase = ph; | ||||
@@ -1,6 +1,6 @@ | |||||
// ---------------------------------------------------------------------------- | // ---------------------------------------------------------------------------- | ||||
// | // | ||||
// Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org> | |||||
// Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | // | ||||
// This program is free software; you can redistribute it and/or modify | // This program is free software; you can redistribute it and/or modify | ||||
// it under the terms of the GNU General Public License as published by | // it under the terms of the GNU General Public License as published by | ||||
@@ -32,31 +32,31 @@ public: | |||||
Resampler (void) noexcept; | Resampler (void) noexcept; | ||||
~Resampler (void); | ~Resampler (void); | ||||
int setup (unsigned int fs_inp, | |||||
bool setup (unsigned int fs_inp, | |||||
unsigned int fs_out, | unsigned int fs_out, | ||||
unsigned int nchan, | unsigned int nchan, | ||||
unsigned int hlen); | unsigned int hlen); | ||||
int setup (unsigned int fs_inp, | |||||
bool setup (unsigned int fs_inp, | |||||
unsigned int fs_out, | unsigned int fs_out, | ||||
unsigned int nchan, | unsigned int nchan, | ||||
unsigned int hlen, | unsigned int hlen, | ||||
double frel); | double frel); | ||||
void clear (void); | |||||
bool reset (void) noexcept; | |||||
unsigned int nchan (void) const noexcept { return _nchan; } | |||||
unsigned int filtlen (void) const noexcept { return inpsize (); } // Deprecated | |||||
unsigned int inpsize (void) const noexcept; | |||||
double inpdist (void) const noexcept; | |||||
bool process (void); | |||||
void clear (void); | |||||
bool reset (void) noexcept; | |||||
int nchan (void) const noexcept { return _nchan; } | |||||
int filtlen (void) const noexcept { return inpsize (); } // Deprecated | |||||
int inpsize (void) const noexcept; | |||||
double inpdist (void) const noexcept; | |||||
bool process (void); | |||||
unsigned int inp_count; | unsigned int inp_count; | ||||
unsigned int out_count; | unsigned int out_count; | ||||
float *inp_data; | float *inp_data; | ||||
float *out_data; | float *out_data; | ||||
void *inp_list; | |||||
void *out_list; | |||||
float **inp_list; | |||||
float **out_list; | |||||
private: | private: | ||||
@@ -69,6 +69,7 @@ private: | |||||
unsigned int _phase; | unsigned int _phase; | ||||
unsigned int _pstep; | unsigned int _pstep; | ||||
float *_buff; | float *_buff; | ||||
void *_dummy [8]; | |||||
}; | }; | ||||
@@ -1,7 +1,7 @@ | |||||
// ---------------------------------------------------------------------------- | // ---------------------------------------------------------------------------- | ||||
// | // | ||||
// Copyright (C) 2006-2013 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | |||||
// Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | |||||
// This program is free software; you can redistribute it and/or modify | // This program is free software; you can redistribute it and/or modify | ||||
// it under the terms of the GNU General Public License as published by | // it under the terms of the GNU General Public License as published by | ||||
// the Free Software Foundation; either version 3 of the License, or | // the Free Software Foundation; either version 3 of the License, or | ||||
@@ -22,10 +22,20 @@ | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include <math.h> | #include <math.h> | ||||
#undef ENABLE_VEC4 | |||||
#if defined(__SSE2_MATH__) | |||||
# define ENABLE_VEC4 | |||||
# include <xmmintrin.h> | |||||
#elif defined(__ARM_NEON) || defined(__ARM_NEON__) | |||||
# define ENABLE_VEC4 | |||||
# include <arm_neon.h> | |||||
#endif | |||||
#include "vresampler.h" | #include "vresampler.h" | ||||
VResampler::VResampler (void) : | |||||
VResampler::VResampler (void) noexcept : | |||||
_table (0), | _table (0), | ||||
_nchan (0), | _nchan (0), | ||||
_buff (0), | _buff (0), | ||||
@@ -42,62 +52,81 @@ VResampler::~VResampler (void) | |||||
} | } | ||||
int VResampler::setup (double ratio, | |||||
unsigned int nchan, | |||||
unsigned int hlen) | |||||
bool VResampler::setup (double ratio, | |||||
unsigned int nchan, | |||||
unsigned int hlen) | |||||
{ | { | ||||
if ((hlen < 8) || (hlen > 96) || (16 * ratio < 1) || (ratio > 256)) return 1; | |||||
return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen); | return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen); | ||||
} | } | ||||
int VResampler::setup (double ratio, | |||||
unsigned int nchan, | |||||
unsigned int hlen, | |||||
double frel) | |||||
bool VResampler::setup (double ratio, | |||||
unsigned int nchan, | |||||
unsigned int hlen, | |||||
double frel) | |||||
{ | { | ||||
unsigned int h, k, n; | |||||
double s; | |||||
unsigned int hl, mi, n; | |||||
double dp; | |||||
Resampler_table *T = 0; | Resampler_table *T = 0; | ||||
if (! nchan) return 1; | |||||
n = NPHASE; | |||||
s = n / ratio; | |||||
h = hlen; | |||||
k = 250; | |||||
if (ratio < 1) | |||||
if (!nchan || (hlen < 8) || (hlen > 96) || (64 * ratio < 1) || (ratio > 256)) | |||||
{ | |||||
clear (); | |||||
return false; | |||||
} | |||||
dp = NPHASE / ratio; | |||||
hl = hlen; | |||||
mi = 32; | |||||
if (ratio < 1.0) | |||||
{ | { | ||||
frel *= ratio; | frel *= ratio; | ||||
h = (unsigned int)(ceil (h / ratio)); | |||||
k = (unsigned int)(ceil (k / ratio)); | |||||
hl = (unsigned int)(ceil (hl / ratio)); | |||||
mi = (unsigned int)(ceil (mi / ratio)); | |||||
} | } | ||||
T = Resampler_table::create (frel, h, n); | |||||
#ifdef ENABLE_VEC4 | |||||
hl = (hl + 3) & ~3; | |||||
#endif | |||||
T = Resampler_table::create (frel, hl, NPHASE); | |||||
clear (); | clear (); | ||||
if (T) | if (T) | ||||
{ | { | ||||
_table = T; | |||||
_buff = new float [nchan * (2 * h - 1 + k)]; | |||||
_c1 = new float [2 * h]; | |||||
_c2 = new float [2 * h]; | |||||
_nchan = nchan; | |||||
_inmax = k; | |||||
_ratio = ratio; | |||||
_pstep = s; | |||||
_qstep = s; | |||||
_wstep = 1; | |||||
return reset (); | |||||
_table = T; | |||||
n = nchan * (2 * hl + mi); | |||||
#ifdef ENABLE_VEC4 | |||||
posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); | |||||
posix_memalign ((void **)(&_c1), 16, hl * sizeof (float)); | |||||
posix_memalign ((void **)(&_c2), 16, hl * sizeof (float)); | |||||
#else | |||||
_buff = new float [n]; | |||||
_c1 = new float [hl]; | |||||
_c2 = new float [hl]; | |||||
#endif | |||||
_nchan = nchan; | |||||
_ratio = ratio; | |||||
_inmax = mi; | |||||
_pstep = dp; | |||||
_qstep = dp; | |||||
_wstep = 1; | |||||
return reset (); | |||||
} | } | ||||
else return 1; | |||||
else return false; | |||||
} | } | ||||
void VResampler::clear (void) | void VResampler::clear (void) | ||||
{ | { | ||||
Resampler_table::destroy (_table); | Resampler_table::destroy (_table); | ||||
#ifdef ENABLE_VEC4 | |||||
free (_buff); | |||||
free (_c1); | |||||
free (_c2); | |||||
#else | |||||
delete[] _buff; | delete[] _buff; | ||||
delete[] _c1; | delete[] _c1; | ||||
delete[] _c2; | delete[] _c2; | ||||
_buff = 0; | |||||
#endif | |||||
_buff = 0; | |||||
_c1 = 0; | _c1 = 0; | ||||
_c2 = 0; | _c2 = 0; | ||||
_table = 0; | _table = 0; | ||||
@@ -133,44 +162,49 @@ void VResampler::set_rratio (double r) | |||||
} | } | ||||
double VResampler::inpdist (void) const | |||||
double VResampler::inpdist (void) const noexcept | |||||
{ | { | ||||
if (!_table) return 0; | if (!_table) return 0; | ||||
return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np; | return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np; | ||||
} | } | ||||
int VResampler::inpsize (void) const | |||||
int VResampler::inpsize (void) const noexcept | |||||
{ | { | ||||
if (!_table) return 0; | if (!_table) return 0; | ||||
return 2 * _table->_hl; | return 2 * _table->_hl; | ||||
} | } | ||||
int VResampler::reset (void) | |||||
bool VResampler::reset (void) noexcept | |||||
{ | { | ||||
if (!_table) return 1; | |||||
if (!_table) return false; | |||||
inp_count = 0; | inp_count = 0; | ||||
out_count = 0; | out_count = 0; | ||||
inp_data = 0; | inp_data = 0; | ||||
out_data = 0; | out_data = 0; | ||||
_index = 0; | _index = 0; | ||||
_phase = 0; | |||||
_nread = 2 * _table->_hl; | |||||
_nread = 0; | |||||
_nzero = 0; | _nzero = 0; | ||||
return 0; | |||||
_phase = 0; | |||||
if (_table) | |||||
{ | |||||
_nread = 2 * _table->_hl; | |||||
return true; | |||||
} | |||||
return false; | |||||
} | } | ||||
int VResampler::process (void) | |||||
bool VResampler::process (void) | |||||
{ | { | ||||
unsigned int k, np, in, nr, n, c; | |||||
int i, hl, nz; | |||||
double ph, dp, dd; | |||||
int nr, np, hl, nz, di, i, n; | |||||
unsigned int in, j; | |||||
double ph, dp, dd; | |||||
float a, b, *p1, *p2, *q1, *q2; | float a, b, *p1, *p2, *q1, *q2; | ||||
if (!_table) return 1; | |||||
if (!_table) return false; | |||||
hl = _table->_hl; | hl = _table->_hl; | ||||
np = _table->_np; | np = _table->_np; | ||||
@@ -179,94 +213,169 @@ int VResampler::process (void) | |||||
nz = _nzero; | nz = _nzero; | ||||
ph = _phase; | ph = _phase; | ||||
dp = _pstep; | dp = _pstep; | ||||
n = (2 * hl - nr) * _nchan; | |||||
p1 = _buff + in * _nchan; | |||||
p2 = p1 + n; | |||||
p1 = _buff + in; | |||||
p2 = p1 + 2 * hl - nr; | |||||
di = 2 * hl + _inmax; | |||||
while (out_count) | while (out_count) | ||||
{ | { | ||||
if (nr) | |||||
{ | |||||
if (inp_count == 0) break; | |||||
if (inp_data) | |||||
{ | |||||
for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c]; | |||||
inp_data += _nchan; | |||||
nz = 0; | |||||
} | |||||
else | |||||
{ | |||||
for (c = 0; c < _nchan; c++) p2 [c] = 0; | |||||
if (nz < 2 * hl) nz++; | |||||
} | |||||
nr--; | |||||
p2 += _nchan; | |||||
inp_count--; | |||||
} | |||||
else | |||||
{ | |||||
if (out_data) | |||||
{ | |||||
if (nz < 2 * hl) | |||||
{ | |||||
k = (unsigned int) ph; | |||||
b = (float)(ph - k); | |||||
a = 1.0f - b; | |||||
q1 = _table->_ctab + hl * k; | |||||
q2 = _table->_ctab + hl * (np - k); | |||||
for (i = 0; i < hl; i++) | |||||
{ | |||||
_c1 [i] = a * q1 [i] + b * q1 [i + hl]; | |||||
_c2 [i] = a * q2 [i] + b * q2 [i - hl]; | |||||
} | |||||
for (c = 0; c < _nchan; c++) | |||||
{ | |||||
q1 = p1 + c; | |||||
q2 = p2 + c; | |||||
a = 1e-25f; | |||||
for (i = 0; i < hl; i++) | |||||
{ | |||||
q2 -= _nchan; | |||||
a += *q1 * _c1 [i] + *q2 * _c2 [i]; | |||||
q1 += _nchan; | |||||
} | |||||
*out_data++ = a - 1e-25f; | |||||
} | |||||
} | |||||
else | |||||
{ | |||||
for (c = 0; c < _nchan; c++) *out_data++ = 0; | |||||
} | |||||
} | |||||
out_count--; | |||||
dd = _qstep - dp; | |||||
if (fabs (dd) < 1e-30) dp = _qstep; | |||||
else dp += _wstep * dd; | |||||
ph += dp; | |||||
if (ph >= np) | |||||
{ | |||||
nr = (unsigned int) floor( ph / np); | |||||
ph -= nr * np;; | |||||
in += nr; | |||||
p1 += nr * _nchan;; | |||||
if (in >= _inmax) | |||||
{ | |||||
n = (2 * hl - nr) * _nchan; | |||||
memcpy (_buff, p1, n * sizeof (float)); | |||||
in = 0; | |||||
p1 = _buff; | |||||
p2 = p1 + n; | |||||
} | |||||
} | |||||
} | |||||
while (nr && inp_count) | |||||
{ | |||||
if (inp_data) | |||||
{ | |||||
for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; | |||||
inp_data += _nchan; | |||||
nz = 0; | |||||
} | |||||
else | |||||
{ | |||||
for (j = 0; j < _nchan; j++) p2 [j * di] = 0; | |||||
if (nz < 2 * hl) nz++; | |||||
} | |||||
p2++; | |||||
nr--; | |||||
inp_count--; | |||||
} | |||||
if (nr) break; | |||||
if (out_data) | |||||
{ | |||||
if (nz < 2 * hl) | |||||
{ | |||||
n = (unsigned int) ph; | |||||
b = (float)(ph - n); | |||||
a = 1.0f - b; | |||||
q1 = _table->_ctab + hl * n; | |||||
q2 = _table->_ctab + hl * (np - n); | |||||
#if defined(__SSE2_MATH__) | |||||
__m128 C1, C2, Q1, Q2, S; | |||||
C1 = _mm_load1_ps (&a); | |||||
C2 = _mm_load1_ps (&b); | |||||
for (i = 0; i < hl; i += 4) | |||||
{ | |||||
Q1 = _mm_load_ps (q1 + i); | |||||
Q2 = _mm_load_ps (q1 + i + hl); | |||||
S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); | |||||
_mm_store_ps (_c1 + i, S); | |||||
Q1 = _mm_load_ps (q2 + i); | |||||
Q2 = _mm_load_ps (q2 + i - hl); | |||||
S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); | |||||
_mm_store_ps (_c2 + i, S); | |||||
} | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | |||||
q1 = p1 + j * di; | |||||
q2 = p2 + j * di; | |||||
S = _mm_setzero_ps (); | |||||
for (i = 0; i < hl; i += 4) | |||||
{ | |||||
C1 = _mm_load_ps (_c1 + i); | |||||
Q1 = _mm_loadu_ps (q1); | |||||
q2 -= 4; | |||||
S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); | |||||
C2 = _mm_loadr_ps (_c2 + i); | |||||
Q2 = _mm_loadu_ps (q2); | |||||
q1 += 4; | |||||
S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); | |||||
} | |||||
*out_data++ = S [0] + S [1] + S [2] + S [3]; | |||||
} | |||||
#elif defined(__ARM_NEON) || defined(__ARM_NEON__) | |||||
// ARM64 version by Nicolas Belin <nbelin@baylibre.com> | |||||
float32x4_t *C1 = (float32x4_t *)_c1; | |||||
float32x4_t *C2 = (float32x4_t *)_c2; | |||||
float32x4_t S, T; | |||||
for (i = 0; i < (hl>>2); i++) | |||||
{ | |||||
T = vmulq_n_f32 (vld1q_f32 (q1 + hl), b); | |||||
C1 [i] = vmlaq_n_f32 (T, vld1q_f32 (q1), a); | |||||
T = vmulq_n_f32 (vld1q_f32 (q2 - hl), b); | |||||
C2 [i] = vmlaq_n_f32 (T, vld1q_f32 (q2), a); | |||||
q2 += 4; | |||||
q1 += 4; | |||||
} | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | |||||
q1 = p1 + j * di; | |||||
q2 = p2 + j * di - 4; | |||||
T = vrev64q_f32 (vld1q_f32 (q2)); | |||||
S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]); | |||||
S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [0]); | |||||
for (i = 1; i < (hl>>2); i++) | |||||
{ | |||||
q2 -= 4; | |||||
q1 += 4; | |||||
T = vrev64q_f32 (vld1q_f32 (q2)); | |||||
S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]); | |||||
S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]); | |||||
} | |||||
*out_data++ = vaddvq_f32 (S); | |||||
} | |||||
#else | |||||
float s; | |||||
for (i = 0; i < hl; i++) | |||||
{ | |||||
_c1 [i] = a * q1 [i] + b * q1 [i + hl]; | |||||
_c2 [i] = a * q2 [i] + b * q2 [i - hl]; | |||||
} | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | |||||
q1 = p1 + j * di; | |||||
q2 = p2 + j * di; | |||||
s = 1e-30f; | |||||
for (i = 0; i < hl; i++) | |||||
{ | |||||
q2--; | |||||
s += *q1 * _c1 [i] + *q2 * _c2 [i]; | |||||
q1++; | |||||
} | |||||
*out_data++ = s - 1e-30f; | |||||
} | |||||
#endif | |||||
} | |||||
else | |||||
{ | |||||
for (j = 0; j < _nchan; j++) *out_data++ = 0; | |||||
} | |||||
} | |||||
out_count--; | |||||
dd = _qstep - dp; | |||||
if (fabs (dd) < 1e-20) dp = _qstep; | |||||
else dp += _wstep * dd; | |||||
ph += dp; | |||||
if (ph >= np) | |||||
{ | |||||
nr = (unsigned int) floor (ph / np); | |||||
ph -= nr * np;; | |||||
in += nr; | |||||
p1 += nr; | |||||
if (in >= _inmax) | |||||
{ | |||||
n = 2 * hl - nr; | |||||
p2 = _buff; | |||||
for (j = 0; j < _nchan; j++) | |||||
{ | |||||
memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); | |||||
} | |||||
in = 0; | |||||
p1 = _buff; | |||||
p2 = p1 + n; | |||||
} | |||||
} | |||||
} | } | ||||
_index = in; | _index = in; | ||||
_nread = nr; | _nread = nr; | ||||
_phase = ph; | _phase = ph; | ||||
_pstep = dp; | _pstep = dp; | ||||
_nzero = nz; | _nzero = nz; | ||||
return 0; | |||||
return true; | |||||
} | } | ||||
@@ -1,6 +1,6 @@ | |||||
// ---------------------------------------------------------------------------- | // ---------------------------------------------------------------------------- | ||||
// | // | ||||
// Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org> | |||||
// Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org> | |||||
// | // | ||||
// This program is free software; you can redistribute it and/or modify | // This program is free software; you can redistribute it and/or modify | ||||
// it under the terms of the GNU General Public License as published by | // it under the terms of the GNU General Public License as published by | ||||
@@ -29,28 +29,28 @@ class VResampler | |||||
{ | { | ||||
public: | public: | ||||
VResampler (void); | |||||
VResampler (void) noexcept; | |||||
~VResampler (void); | ~VResampler (void); | ||||
int setup (double ratio, | |||||
bool setup (double ratio, | |||||
unsigned int nchan, | unsigned int nchan, | ||||
unsigned int hlen); | unsigned int hlen); | ||||
int setup (double ratio, | |||||
bool setup (double ratio, | |||||
unsigned int nchan, | unsigned int nchan, | ||||
unsigned int hlen, | unsigned int hlen, | ||||
double frel); | double frel); | ||||
void clear (void); | void clear (void); | ||||
int reset (void); | |||||
int nchan (void) const { return _nchan; } | |||||
int inpsize (void) const; | |||||
double inpdist (void) const; | |||||
int process (void); | |||||
bool reset (void) noexcept; | |||||
int nchan (void) const noexcept { return _nchan; } | |||||
int inpsize (void) const noexcept; | |||||
double inpdist (void) const noexcept; | |||||
bool process (void); | |||||
void set_phase (double p); | void set_phase (double p); | ||||
void set_rrfilt (double t); | void set_rrfilt (double t); | ||||
void set_rratio (double r); | |||||
void set_rratio (double r); | |||||
unsigned int inp_count; | unsigned int inp_count; | ||||
unsigned int out_count; | unsigned int out_count; | ||||
@@ -61,7 +61,7 @@ public: | |||||
private: | private: | ||||
enum { NPHASE = 256 }; | |||||
enum { NPHASE = 120 }; | |||||
Resampler_table *_table; | Resampler_table *_table; | ||||
unsigned int _nchan; | unsigned int _nchan; | ||||
@@ -77,6 +77,7 @@ private: | |||||
float *_buff; | float *_buff; | ||||
float *_c1; | float *_c1; | ||||
float *_c2; | float *_c2; | ||||
void *_dummy [8]; | |||||
}; | }; | ||||