Update internal zita-resampler

Signed-off-by: falkTX <falktx@falktx.com>
2 years ago · 67cc40dcfd
--- a/source/modules/zita-resampler/cresampler.cc
+++ b/source/modules/zita-resampler/cresampler.cc
@@ -116,70 +116,70 @@ int CResampler::process (void)

    while (out_count)
    {
 	if (nr)
 	{
 	    if (inp_count == 0) break;
        if (nr)
        {
            if (inp_count == 0) break;
            n = (4 - nr) * _nchan;
  	    if (inp_data)
 	    {
            if (inp_data)
            {
                for (c = 0; c < _nchan; c++) pb [n + c] = inp_data [c];
 		inp_data += _nchan;
 		nz = 0;
 	    }
 	    else
 	    {
                inp_data += _nchan;
                nz = 0;
            }
            else
            {
                for (c = 0; c < _nchan; c++) pb [n + c] = 0;
 		if (nz < 4) nz++;
 	    }
 	    nr--;
 	    inp_count--;
 	}
 	else
 	{
 	    n = _nchan;
 	    if (out_data)
 	    {
 		if (nz < 4)
 		{
 		    a = ph;
 		    b = 1 - a;
 		    d = a * b / 2;
 		    m0 = -d * b;
 		    m1 = b + (3 * b - 1) * d;
 		    m2 = a + (3 * a - 1) * d;
 		    m3 = -d * a;
 		    for (c = 0; c < n; c++)
 		    {
 			*out_data++ = m0 * pb [0] 
                if (nz < 4) nz++;
            }
            nr--;
            inp_count--;
        }
        else
        {
            n = _nchan;
            if (out_data)
            {
                if (nz < 4)
                {
                    a = ph;
                    b = 1 - a;
                    d = a * b / 2;
                    m0 = -d * b;
                    m1 = b + (3 * b - 1) * d;
                    m2 = a + (3 * a - 1) * d;
                    m3 = -d * a;
                    for (c = 0; c < n; c++)
                    {
                        *out_data++ = m0 * pb [0]
                                    + m1 * pb [n]
                                    + m2 * pb [2 * n]
 			            + m3 * pb [3 * n];
 			pb++;
 		    }
 		    pb -= n;
 		}
 		else
 		{
 		    for (c = 0; c < n; c++) *out_data++ = 0;
 		}
 	    }
 	    out_count--;

 	    ph += _pstep;
 	    if (ph >= 1.0)
 	    {
 		nr = (unsigned int) floor (ph);
 		ph -= nr;
 		in += nr;
 		pb += nr * _nchan;
 		if (in >= _inmax)
 		{
 		    memcpy (_buff, pb, (4 - nr) * _nchan * sizeof (float));
 		    in = 0;
 		    pb = _buff;
 		}
 	    }
 	}
                                    + m3 * pb [3 * n];
                        pb++;
                    }
                    pb -= n;
                }
                else
                {
                    for (c = 0; c < n; c++) *out_data++ = 0;
                }
            }
            out_count--;

            ph += _pstep;
            if (ph >= 1.0)
            {
                nr = (unsigned int) floor (ph);
                ph -= nr;
                in += nr;
                pb += nr * _nchan;
                if (in >= _inmax)
                {
                    memcpy (_buff, pb, (4 - nr) * _nchan * sizeof (float));
                    in = 0;
                    pb = _buff;
                }
            }
        }
    }

    _index = in;
--- a/source/modules/zita-resampler/resampler-table.cc
+++ b/source/modules/zita-resampler/resampler-table.cc
@@ -1,7 +1,7 @@
 // ----------------------------------------------------------------------------
 //
 //  Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org>
 //    
 //  Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
 //
 //  This program is free software; you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
 //  the Free Software Foundation; either version 3 of the License, or
@@ -24,6 +24,13 @@
 #include <math.h>
 #include "resampler-table.h"


 #undef ENABLE_VEC4
 #if defined(__SSE2_MATH__) || defined(__ARM_NEON) || defined(__ARM_NEON__)
 # define ENABLE_VEC4
 #endif


 static double sinc (double x)
 {
    x = fabs (x);
@@ -42,7 +49,6 @@ static double wind (double x)
 }



 Resampler_table  *Resampler_table::_list = 0;
 Resampler_mutex   Resampler_table::_mutex;

@@ -54,11 +60,16 @@ Resampler_table::Resampler_table (double fr, unsigned int hl, unsigned int np) :
    _hl (hl),
    _np (np)
 {
    unsigned int  i, j;
    unsigned int  i, j, n;
    double        t;
    float         *p;

    _ctab = new float [hl * (np + 1)];
    n = hl * (np + 1);
 #ifdef ENABLE_VEC4
    posix_memalign ((void **) &_ctab, 16, n * sizeof (float));
 #else
    _ctab = new float [n];
 #endif
    p = _ctab;
    for (j = 0; j <= np; j++)
    {
@@ -75,7 +86,11 @@ Resampler_table::Resampler_table (double fr, unsigned int hl, unsigned int np) :

 Resampler_table::~Resampler_table (void)
 {
 #ifdef ENABLE_VEC4
    free (_ctab);
 #else
    delete[] _ctab;
 #endif
 }


--- a/source/modules/zita-resampler/resampler-table.h
+++ b/source/modules/zita-resampler/resampler-table.h
@@ -1,6 +1,6 @@
 // ----------------------------------------------------------------------------
 //
 //  Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org>
 //  Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
 //    
 //  This program is free software; you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
--- a/source/modules/zita-resampler/resampler.cc
+++ b/source/modules/zita-resampler/resampler.cc
@@ -1,7 +1,7 @@
 // ----------------------------------------------------------------------------
 //
 //  Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org>
 //    
 //  Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
 //
 //  This program is free software; you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
 //  the Free Software Foundation; either version 3 of the License, or
@@ -22,6 +22,16 @@
 #include <stdio.h>
 #include <string.h>
 #include <math.h>

 #undef ENABLE_VEC4
 #if defined(__SSE2_MATH__)
 # define ENABLE_VEC4
 # include <xmmintrin.h>
 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
 # define ENABLE_VEC4
 # include <arm_neon.h>
 #endif

 #include "resampler.h"


@@ -31,20 +41,20 @@ static unsigned int gcd (unsigned int a, unsigned int b)
    if (b == 0) return a;
    while (1)
    {
 	if (a > b)
 	{
 	    a = a % b;
 	    if (a == 0) return b;
 	    if (a == 1) return 1;
 	}
 	else
 	{
 	    b = b % a;
 	    if (b == 0) return a;
 	    if (b == 1) return 1;
 	}
    }    
    return 1; 
        if (a > b)
        {
            a = a % b;
            if (a == 0) return b;
            if (a == 1) return 1;
        }
        else
        {
            b = b % a;
            if (b == 0) return a;
            if (b == 1) return 1;
        }
    }
    return 1;
 }


@@ -63,66 +73,82 @@ Resampler::~Resampler (void)
 }


 int Resampler::setup (unsigned int fs_inp,
                      unsigned int fs_out,
                      unsigned int nchan,
                      unsigned int hlen)
 bool Resampler::setup (unsigned int fs_inp,
                       unsigned int fs_out,
                       unsigned int nchan,
                       unsigned int hlen)
 {
    if ((hlen < 8) || (hlen > 96)) return 1;
    return setup (fs_inp, fs_out, nchan, hlen, 1.0 - 2.6 / hlen);
 }


 int Resampler::setup (unsigned int fs_inp,
                      unsigned int fs_out,
                      unsigned int nchan,
                      unsigned int hlen,
                      double       frel)
 bool Resampler::setup (unsigned int fs_inp,
                       unsigned int fs_out,
                       unsigned int nchan,
                       unsigned int hlen,
                       double       frel)
 {
    unsigned int       g, h, k, n, s;
    unsigned int       np, dp, mi, hl, n;
    double             r;
    float              *B = 0;
    Resampler_table    *T = 0;

    k = s = 0;
    if (fs_inp && fs_out && nchan)
    if (!nchan || (hlen < 8) || (hlen > 96))
    {
        r = (double) fs_out / (double) fs_inp;
        g = gcd (fs_out, fs_inp);
        n = fs_out / g;
        s = fs_inp / g;
        if ((16 * r >= 1) && (n <= 1000))
        {
            h = hlen;
            k = 250;
            if (r < 1)
            {
                frel *= r;
                h = (unsigned int)(ceil (h / r));
                k = (unsigned int)(ceil (k / r));
            }
            T = Resampler_table::create (frel, h, n);
            B = new float [nchan * (2 * h - 1 + k)];
        }
        clear ();
        return false;
    }

    r = (double) fs_out / (double) fs_inp;
    n = gcd (fs_out, fs_inp);
    np = fs_out / n;
    dp = fs_inp / n;
    if ((64 * r < 1.0) || (np > 1000))
    {
        clear ();
        return false;
    }

    hl = hlen;
    mi = 32;
    if (r < 1.0)
    {
        frel *= r;
        hl = (unsigned int)(ceil (hl / r));
        mi = (unsigned int)(ceil (mi / r));
    }
 #ifdef ENABLE_VEC4
    hl = (hl + 3) & ~3;
 #endif
    T = Resampler_table::create (frel, hl, np);

    clear ();
    if (T)
    {
        _table = T;
        _buff  = B;
         n = nchan * (2 * hl + mi);
 #ifdef ENABLE_VEC4
        posix_memalign ((void **)(&_buff), 16, n * sizeof (float));
        memset (_buff, 0, n * sizeof (float));
 #else
        _buff = new float [n];
 #endif
        _nchan = nchan;
        _inmax = k;
        _pstep = s;
        _inmax = mi;
        _pstep = dp;
        return reset ();
    }
    return 1;
    else return false;
 }


 void Resampler::clear (void)
 {
    Resampler_table::destroy (_table);
 #ifdef ENABLE_VEC4
    free (_buff);
 #else
    delete[] _buff;
 #endif
    _buff  = 0;
    _table = 0;
    _nchan = 0;
@@ -139,7 +165,7 @@ double Resampler::inpdist (void) const noexcept
 }


 unsigned int Resampler::inpsize (void) const noexcept
 int Resampler::inpsize (void) const noexcept
 {
    if (!_table) return 0;
    return 2 * _table->_hl;
@@ -152,102 +178,157 @@ bool Resampler::reset (void) noexcept

    inp_count = 0;
    out_count = 0;
    inp_data = nullptr;
    out_data = nullptr;
    inp_data = 0;
    out_data = 0;
    _index = 0;
    _nread = 0;
    _nzero = 0;
    _phase = 0;
    _nread = 2 * _table->_hl;
    return true;
    if (_table)
    {
        _nread = 2 * _table->_hl;
        return true;
    }
    return false;
 }


 bool Resampler::process (void)
 {
    unsigned int   hl, ph, np, dp, in, nr, nz, i, n, c;
    float          *p1, *p2;
    unsigned int   hl, np, ph, dp, in, nr, nz, di, i, j, n;
    float          *c1, *c2, *p1, *p2, *q1, *q2;

    if (!_table) return false;

    hl = _table->_hl;
    np = _table->_np;
    dp = _pstep;
    in = _index;
    nr = _nread;
    ph = _phase;
    nz = _nzero;
    n = (2 * hl - nr) * _nchan;
    p1 = _buff + in * _nchan;
    p2 = p1 + n;
    ph = _phase;

    p1 = _buff + in;
    p2 = p1 + 2 * hl - nr;
    di = 2 * hl + _inmax;

    while (out_count)
    {
        if (nr)
        while (nr && inp_count)
        {
            if (inp_count == 0) break;
            if (inp_data)
            {
                for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c];
                for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j];
                inp_data += _nchan;
              nz = 0;
                nz = 0;
            }
            else
            {
                for (c = 0; c < _nchan; c++) p2 [c] = 0;
                for (j = 0; j < _nchan; j++) p2 [j * di] = 0;
                if (nz < 2 * hl) nz++;
            }
            p2++;
            nr--;
            p2 += _nchan;
            inp_count--;
        }
        else
        if (nr) break;

        if (out_data)
        {
            if (out_data)
            if (nz < 2 * hl)
            {
                if (nz < 2 * hl)
                c1 = _table->_ctab + hl * ph;
                c2 = _table->_ctab + hl * (np - ph);

 #if defined(__SSE2_MATH__)
                __m128 C1, C2, Q1, Q2, S;
                for (j = 0; j < _nchan; j++)
                {
                    float *c1 = _table->_ctab + hl * ph;
                    float *c2 = _table->_ctab + hl * (np - ph);
                    for (c = 0; c < _nchan; c++)
                    q1 = p1 + j * di;
                    q2 = p2 + j * di;
                    S = _mm_setzero_ps ();
                    for (i = 0; i < hl; i += 4)
                    {
                        float *q1 = p1 + c;
                        float *q2 = p2 + c;
                        float s = 1e-20f;
                        for (i = 0; i < hl; i++)
                        {
                            q2 -= _nchan;
                            s += *q1 * c1 [i] + *q2 * c2 [i];
                            q1 += _nchan;
                        }
                        *out_data++ = s - 1e-20f;
                        C1 = _mm_load_ps (c1 + i);
                        Q1 = _mm_loadu_ps (q1);
                        q2 -= 4;
                        S = _mm_add_ps (S, _mm_mul_ps (C1, Q1));
                        C2 = _mm_loadr_ps (c2 + i);
                        Q2 = _mm_loadu_ps (q2);
                        q1 += 4;
                        S = _mm_add_ps (S, _mm_mul_ps (C2, Q2));
                    }
                    *out_data++ = S [0] + S [1] + S [2] + S [3];
                }
                else

 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
                // ARM64 version by Nicolas Belin <nbelin@baylibre.com>
                float32x4_t *C1 = (float32x4_t *)c1;
                float32x4_t *C2 = (float32x4_t *)c2;
                float32x4_t S, T;
                for (j = 0; j < _nchan; j++)
                {
                    for (c = 0; c < _nchan; c++) *out_data++ = 0;
                    q1 = p1 + j * di;
                    q2 = p2 + j * di - 4;
                    T = vrev64q_f32 (vld1q_f32 (q2));
                    S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]);
                    S = vmlaq_f32 (S, vld1q_f32(q1), C1 [0]);
                    for (i = 1; i < (hl>>2); i++)
                    {
                        q2 -= 4;
                        q1 += 4;
                        T = vrev64q_f32 (vld1q_f32 (q2));
                        S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]);
                        S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]);
                    }
                    *out_data++ = vaddvq_f32(S);
                }

 #else
                float s;
                for (j = 0; j < _nchan; j++)
                {
                    q1 = p1 + j * di;
                    q2 = p2 + j * di;
                    s = 1e-30f;
                    for (i = 0; i < hl; i++)
                    {
                        q2--;
                        s += *q1 * c1 [i] + *q2 * c2 [i];
                        q1++;
                    }
                    *out_data++ = s - 1e-30f;
                }
 #endif
            }
            out_count--;
            else
            {
                for (j = 0; j < _nchan; j++) *out_data++ = 0;
            }
        }
        out_count--;

            ph += dp;
            if (ph >= np)
        ph += dp;
        if (ph >= np)
        {
            nr = ph / np;
            ph -= nr * np;
            in += nr;
            p1 += nr;
            if (in >= _inmax)
            {
                nr = ph / np;
                ph -= nr * np;
                in += nr;
                p1 += nr * _nchan;;
                if (in >= _inmax)
                n = 2 * hl - nr;
                p2 = _buff;
                for (j = 0; j < _nchan; j++)
                {
                    n = (2 * hl - nr) * _nchan;
                    memcpy (_buff, p1, n * sizeof (float));
                    in = 0;
                    p1 = _buff;
                    p2 = p1 + n;
                    memmove (p2 + j * di, p1 + j * di, n * sizeof (float));
                }
                in = 0;
                p1 = _buff;
                p2 = p1 + n;
            }
        }
    }

    _index = in;
    _nread = nr;
    _phase = ph;
--- a/source/modules/zita-resampler/resampler.h
+++ b/source/modules/zita-resampler/resampler.h
@@ -1,6 +1,6 @@
 // ----------------------------------------------------------------------------
 //
 //  Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org>
 //  Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
 //    
 //  This program is free software; you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
@@ -32,31 +32,31 @@ public:
    Resampler (void) noexcept;
    ~Resampler (void);

    int  setup (unsigned int fs_inp,
    bool setup (unsigned int fs_inp,
                unsigned int fs_out,
                unsigned int nchan,
                unsigned int hlen);

    int  setup (unsigned int fs_inp,
    bool setup (unsigned int fs_inp,
                unsigned int fs_out,
                unsigned int nchan,
                unsigned int hlen,
                double       frel);

    void         clear (void);
    bool         reset (void) noexcept;
    unsigned int nchan (void) const noexcept { return _nchan; }
    unsigned int filtlen (void) const noexcept { return inpsize (); } // Deprecated
    unsigned int inpsize (void) const noexcept;
    double       inpdist (void) const noexcept;
    bool         process (void);
    void   clear (void);
    bool   reset (void) noexcept;
    int    nchan (void) const noexcept { return _nchan; }
    int    filtlen (void) const noexcept { return inpsize (); } // Deprecated
    int    inpsize (void) const noexcept;
    double inpdist (void) const noexcept;
    bool    process (void);

    unsigned int         inp_count;
    unsigned int         out_count;
    float               *inp_data;
    float               *out_data;
    void                *inp_list;
    void                *out_list;
    float              **inp_list;
    float              **out_list;

 private:

@@ -69,6 +69,7 @@ private:
    unsigned int         _phase;
    unsigned int         _pstep;
    float               *_buff;
    void                *_dummy [8];
 };


--- a/source/modules/zita-resampler/vresampler.cc
+++ b/source/modules/zita-resampler/vresampler.cc
@@ -1,7 +1,7 @@
 // ----------------------------------------------------------------------------
 //
 //  Copyright (C) 2006-2013 Fons Adriaensen <fons@linuxaudio.org>
 //    
 //  Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
 //
 //  This program is free software; you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
 //  the Free Software Foundation; either version 3 of the License, or
@@ -22,10 +22,20 @@
 #include <stdio.h>
 #include <string.h>
 #include <math.h>

 #undef ENABLE_VEC4
 #if defined(__SSE2_MATH__)
 # define ENABLE_VEC4
 # include <xmmintrin.h>
 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
 # define ENABLE_VEC4
 # include <arm_neon.h>
 #endif

 #include "vresampler.h"


 VResampler::VResampler (void) :
 VResampler::VResampler (void) noexcept :
    _table (0),
    _nchan (0),
    _buff  (0),
@@ -42,62 +52,81 @@ VResampler::~VResampler (void)
 }


 int VResampler::setup (double       ratio,
                       unsigned int nchan,
                       unsigned int hlen)
 bool VResampler::setup (double       ratio,
                        unsigned int nchan,
                        unsigned int hlen)
 {
    if ((hlen < 8) || (hlen > 96) || (16 * ratio < 1) || (ratio > 256)) return 1;
    return setup (ratio, nchan, hlen, 1.0 - 2.6 / hlen);
 }


 int VResampler::setup (double       ratio,
                       unsigned int nchan,
                       unsigned int hlen,
                       double       frel)
 bool VResampler::setup (double       ratio,
                        unsigned int nchan,
                        unsigned int hlen,
                        double       frel)
 {
    unsigned int       h, k, n;
    double             s;
    unsigned int       hl, mi, n;
    double             dp;
    Resampler_table    *T = 0;

    if (! nchan) return 1; 
    n = NPHASE;
    s = n / ratio;
    h = hlen;
    k = 250;
    if (ratio < 1) 
    if (!nchan || (hlen < 8) || (hlen > 96) || (64 * ratio < 1) || (ratio > 256))
    {
        clear ();
        return false;
    }

    dp = NPHASE / ratio;
    hl = hlen;
    mi = 32;
    if (ratio < 1.0)
    {
        frel *= ratio;
        h = (unsigned int)(ceil (h / ratio));
        k = (unsigned int)(ceil (k / ratio));
        hl = (unsigned int)(ceil (hl / ratio));
        mi = (unsigned int)(ceil (mi / ratio));
    }
    T = Resampler_table::create (frel, h, n);
 #ifdef ENABLE_VEC4
    hl = (hl + 3) & ~3;
 #endif
    T = Resampler_table::create (frel, hl, NPHASE);
    clear ();
    if (T)
    {
 	_table = T;
 	_buff  = new float [nchan * (2 * h - 1 + k)];
 	_c1 = new float [2 * h];
 	_c2 = new float [2 * h];
 	_nchan = nchan;
 	_inmax = k;
 	_ratio = ratio;
 	_pstep = s;
 	_qstep = s;
 	_wstep = 1;
 	return reset ();
        _table = T;
        n = nchan * (2 * hl + mi);
 #ifdef ENABLE_VEC4
        posix_memalign ((void **)(&_buff), 16, n * sizeof (float));
        posix_memalign ((void **)(&_c1), 16, hl * sizeof (float));
        posix_memalign ((void **)(&_c2), 16, hl * sizeof (float));
 #else
        _buff  = new float [n];
        _c1 = new float [hl];
        _c2 = new float [hl];
 #endif
        _nchan = nchan;
        _ratio = ratio;
        _inmax = mi;
        _pstep = dp;
        _qstep = dp;
        _wstep = 1;
        return reset ();
    }
    else return 1;
    else return false;
 }


 void VResampler::clear (void)
 {
    Resampler_table::destroy (_table);
 #ifdef ENABLE_VEC4
    free (_buff);
    free (_c1);
    free (_c2);
 #else
    delete[] _buff;
    delete[] _c1;
    delete[] _c2;
    _buff  = 0;
 #endif
    _buff = 0;
    _c1 = 0;
    _c2 = 0;
    _table = 0;
@@ -133,44 +162,49 @@ void VResampler::set_rratio (double r)
 }


 double VResampler::inpdist (void) const
 double VResampler::inpdist (void) const noexcept
 {
    if (!_table) return 0;
    return (int)(_table->_hl + 1 - _nread) - _phase / _table->_np;
 }


 int VResampler::inpsize (void) const
 int VResampler::inpsize (void) const noexcept
 {
    if (!_table) return 0;
    return 2 * _table->_hl;
 }


 int VResampler::reset (void)
 bool VResampler::reset (void) noexcept
 {
    if (!_table) return 1;
    if (!_table) return false;

    inp_count = 0;
    out_count = 0;
    inp_data = 0;
    out_data = 0;
    _index = 0;
    _phase = 0; 
    _nread = 2 * _table->_hl;
    _nread = 0;
    _nzero = 0;
    return 0;
    _phase = 0;
    if (_table)
    {
        _nread = 2 * _table->_hl;
        return true;
    }
    return false;
 }


 int VResampler::process (void)
 bool VResampler::process (void)
 {
    unsigned int   k, np, in, nr, n, c;
    int            i, hl, nz;
    double         ph, dp, dd; 
    int            nr, np, hl, nz, di, i, n;
    unsigned int   in, j;
    double         ph, dp, dd;
    float          a, b, *p1, *p2, *q1, *q2;

    if (!_table) return 1;
    if (!_table) return false;

    hl = _table->_hl;
    np = _table->_np;
@@ -179,94 +213,169 @@ int VResampler::process (void)
    nz = _nzero;
    ph = _phase;
    dp = _pstep;
    n = (2 * hl - nr) * _nchan;
    p1 = _buff + in * _nchan;
    p2 = p1 + n;

    p1 = _buff + in;
    p2 = p1 + 2 * hl - nr;
    di = 2 * hl + _inmax;

    while (out_count)
    {
 	if (nr)
 	{
 	    if (inp_count == 0) break;
  	    if (inp_data)
 	    {
                for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c];
 		inp_data += _nchan;
 		nz = 0;
 	    }
 	    else
 	    {
                for (c = 0; c < _nchan; c++) p2 [c] = 0;
 		if (nz < 2 * hl) nz++;
 	    }
 	    nr--;
 	    p2 += _nchan;
 	    inp_count--;
 	}
 	else
 	{
 	    if (out_data)
 	    {
 		if (nz < 2 * hl)
 		{
 		    k = (unsigned int) ph;
 		    b = (float)(ph - k);
 		    a = 1.0f - b;
 		    q1 = _table->_ctab + hl * k;
 		    q2 = _table->_ctab + hl * (np - k);
     		    for (i = 0; i < hl; i++)
 		    {
                        _c1 [i] = a * q1 [i] + b * q1 [i + hl];
    		        _c2 [i] = a * q2 [i] + b * q2 [i - hl];
 		    }
 		    for (c = 0; c < _nchan; c++)
 		    {
 			q1 = p1 + c;
 			q2 = p2 + c;
 			a = 1e-25f;
 			for (i = 0; i < hl; i++)
 			{
 			    q2 -= _nchan;
 			    a += *q1 * _c1 [i] + *q2 * _c2 [i];
 			    q1 += _nchan;
 			}
 			*out_data++ = a - 1e-25f;
 		    }
 		}
 		else
 		{
 		    for (c = 0; c < _nchan; c++) *out_data++ = 0;
 		}
 	    }
 	    out_count--;

 	    dd =  _qstep - dp;
 	    if (fabs (dd) < 1e-30) dp = _qstep;
   	    else dp += _wstep * dd;
 	    ph += dp;
 	    if (ph >= np)
 	    {
 		nr = (unsigned int) floor( ph / np);
 		ph -= nr * np;;
 		in += nr;
 		p1 += nr * _nchan;;
 		if (in >= _inmax)
 		{
 		    n = (2 * hl - nr) * _nchan;
 		    memcpy (_buff, p1, n * sizeof (float));
 		    in = 0;
 		    p1 = _buff;
 		    p2 = p1 + n;
 		}
 	    }
 	}
        while (nr && inp_count)
        {
            if (inp_data)
            {
                for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j];
                inp_data += _nchan;
                nz = 0;
            }
            else
            {
                for (j = 0; j < _nchan; j++) p2 [j * di] = 0;
                if (nz < 2 * hl) nz++;
            }
            p2++;
            nr--;
            inp_count--;
        }
        if (nr) break;

        if (out_data)
        {
            if (nz < 2 * hl)
            {
                n = (unsigned int) ph;
                b = (float)(ph - n);
                a = 1.0f - b;
                q1 = _table->_ctab + hl * n;
                q2 = _table->_ctab + hl * (np - n);

 #if defined(__SSE2_MATH__)
                __m128 C1, C2, Q1, Q2, S;
                C1 = _mm_load1_ps (&a);
                C2 = _mm_load1_ps (&b);
                for (i = 0; i < hl; i += 4)
                {
                    Q1 = _mm_load_ps (q1 + i);
                    Q2 = _mm_load_ps (q1 + i + hl);
                    S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
                    _mm_store_ps (_c1 + i, S);
                    Q1 = _mm_load_ps (q2 + i);
                    Q2 = _mm_load_ps (q2 + i - hl);
                    S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
                    _mm_store_ps (_c2 + i, S);
                }
                for (j = 0; j < _nchan; j++)
                {
                    q1 = p1 + j * di;
                    q2 = p2 + j * di;
                    S = _mm_setzero_ps ();
                    for (i = 0; i < hl; i += 4)
                    {
                        C1 = _mm_load_ps (_c1 + i);
                        Q1 = _mm_loadu_ps (q1);
                        q2 -= 4;
                        S = _mm_add_ps (S, _mm_mul_ps (C1, Q1));
                        C2 = _mm_loadr_ps (_c2 + i);
                        Q2 = _mm_loadu_ps (q2);
                        q1 += 4;
                        S = _mm_add_ps (S, _mm_mul_ps (C2, Q2));
                    }
                    *out_data++ = S [0] + S [1] + S [2] + S [3];
                }

 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
                // ARM64 version by Nicolas Belin <nbelin@baylibre.com>
                float32x4_t *C1 = (float32x4_t *)_c1;
                float32x4_t *C2 = (float32x4_t *)_c2;
                float32x4_t S, T;
                for (i = 0; i < (hl>>2); i++)
                {
                    T = vmulq_n_f32 (vld1q_f32 (q1 + hl), b);
                    C1 [i] = vmlaq_n_f32 (T, vld1q_f32 (q1), a);
                    T = vmulq_n_f32 (vld1q_f32 (q2 - hl), b);
                    C2 [i] = vmlaq_n_f32 (T, vld1q_f32 (q2), a);
                    q2 += 4;
                    q1 += 4;
                }
                for (j = 0; j < _nchan; j++)
                {
                    q1 = p1 + j * di;
                    q2 = p2 + j * di - 4;
                    T = vrev64q_f32 (vld1q_f32 (q2));
                    S = vmulq_f32 (vextq_f32 (T, T, 2), C2 [0]);
                    S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [0]);
                    for (i = 1; i < (hl>>2); i++)
                    {
                        q2 -= 4;
                        q1 += 4;
                        T = vrev64q_f32 (vld1q_f32 (q2));
                        S = vmlaq_f32 (S, vextq_f32 (T, T, 2), C2 [i]);
                        S = vmlaq_f32 (S, vld1q_f32 (q1), C1 [i]);
                    }
                    *out_data++ = vaddvq_f32 (S);
                }

 #else
                float s;
                for (i = 0; i < hl; i++)
                {
                    _c1 [i] = a * q1 [i] + b * q1 [i + hl];
                    _c2 [i] = a * q2 [i] + b * q2 [i - hl];
                }
                for (j = 0; j < _nchan; j++)
                {
                    q1 = p1 + j * di;
                    q2 = p2 + j * di;
                    s = 1e-30f;
                    for (i = 0; i < hl; i++)
                    {
                        q2--;
                        s += *q1 * _c1 [i] + *q2 * _c2 [i];
                        q1++;
                    }
                    *out_data++ = s - 1e-30f;
                }
 #endif
            }
            else
            {
                for (j = 0; j < _nchan; j++) *out_data++ = 0;
            }
        }
        out_count--;

        dd =  _qstep - dp;
        if (fabs (dd) < 1e-20) dp = _qstep;
        else dp += _wstep * dd;
        ph += dp;
        if (ph >= np)
        {
            nr = (unsigned int) floor (ph / np);
            ph -= nr * np;;
            in += nr;
            p1 += nr;

            if (in >= _inmax)
            {
                n = 2 * hl - nr;
                p2 = _buff;
                for (j = 0; j < _nchan; j++)
                {
                    memmove (p2 + j * di, p1 + j * di, n * sizeof (float));
                }
                in = 0;
                p1 = _buff;
                p2 = p1 + n;
            }
        }
    }

    _index = in;
    _nread = nr;
    _phase = ph;
    _pstep = dp;
    _nzero = nz;

    return 0;
    return true;
 }

--- a/source/modules/zita-resampler/vresampler.h
+++ b/source/modules/zita-resampler/vresampler.h
@@ -1,6 +1,6 @@
 // ----------------------------------------------------------------------------
 //
 //  Copyright (C) 2006-2012 Fons Adriaensen <fons@linuxaudio.org>
 //  Copyright (C) 2006-2023 Fons Adriaensen <fons@linuxaudio.org>
 //    
 //  This program is free software; you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
@@ -29,28 +29,28 @@ class VResampler
 {
 public:

    VResampler (void);
    VResampler (void) noexcept;
    ~VResampler (void);

    int  setup (double       ratio,
    bool setup (double       ratio,
                unsigned int nchan,
                unsigned int hlen);

    int  setup (double       ratio,
    bool setup (double       ratio,
                unsigned int nchan,
                unsigned int hlen,
                double       frel);

    void   clear (void);
    int    reset (void);
    int    nchan (void) const { return _nchan; }
    int    inpsize (void) const;
    double inpdist (void) const;
    int    process (void);
    
    bool   reset (void) noexcept;
    int    nchan (void) const noexcept { return _nchan; }
    int    inpsize (void) const noexcept;
    double inpdist (void) const noexcept;
    bool   process (void);

    void set_phase (double p);
    void set_rrfilt (double t);
    void set_rratio (double r);    
    void set_rratio (double r);

    unsigned int         inp_count;
    unsigned int         out_count;
@@ -61,7 +61,7 @@ public:

 private:

    enum { NPHASE = 256 };
    enum { NPHASE = 120 };

    Resampler_table     *_table;
    unsigned int         _nchan;
@@ -77,6 +77,7 @@ private:
    float               *_buff;
    float               *_c1;
    float               *_c2;
    void                *_dummy [8];
 };