vector_fmul_reverse requires padding the window at the front Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>tags/n3.4
| @@ -75,8 +75,8 @@ typedef struct CeltBlock { | |||||
| DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE]; | DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE]; | ||||
| /* Used by the encoder */ | /* Used by the encoder */ | ||||
| DECLARE_ALIGNED(32, float, overlap)[120]; | |||||
| DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE]; | |||||
| DECLARE_ALIGNED(32, float, overlap)[FFALIGN(CELT_OVERLAP, 16)]; | |||||
| DECLARE_ALIGNED(32, float, samples)[FFALIGN(CELT_MAX_FRAME_SIZE, 16)]; | |||||
| /* postfilter parameters */ | /* postfilter parameters */ | ||||
| int pf_period_new; | int pf_period_new; | ||||
| @@ -210,17 +210,15 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f) | |||||
| int i, t, ch; | int i, t, ch; | ||||
| float *win = s->scratch; | float *win = s->scratch; | ||||
| /* I think I can use s->dsp->vector_fmul_window for transients at least */ | |||||
| if (f->transient) { | if (f->transient) { | ||||
| for (ch = 0; ch < f->channels; ch++) { | for (ch = 0; ch < f->channels; ch++) { | ||||
| CeltBlock *b = &f->block[ch]; | CeltBlock *b = &f->block[ch]; | ||||
| float *src1 = b->overlap; | float *src1 = b->overlap; | ||||
| for (t = 0; t < f->blocks; t++) { | for (t = 0; t < f->blocks; t++) { | ||||
| float *src2 = &b->samples[CELT_OVERLAP*t]; | float *src2 = &b->samples[CELT_OVERLAP*t]; | ||||
| for (i = 0; i < CELT_OVERLAP; i++) { | |||||
| win[ i] = src1[i]*ff_celt_window[i]; | |||||
| win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1]; | |||||
| } | |||||
| s->dsp->vector_fmul(win, src1, ff_celt_window, CELT_OVERLAP); | |||||
| s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2, | |||||
| ff_celt_window - 8, CELT_OVERLAP + 8); | |||||
| src1 = src2; | src1 = src2; | ||||
| s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks); | s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks); | ||||
| } | } | ||||
| @@ -1096,7 +1096,9 @@ const float ff_celt_postfilter_taps[3][3] = { | |||||
| { 0.7998046875f, 0.1000976562f, 0.0 } | { 0.7998046875f, 0.1000976562f, 0.0 } | ||||
| }; | }; | ||||
| DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = { | |||||
| DECLARE_ALIGNED(32, static const float, ff_celt_window_padded)[136] = { | |||||
| 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, | |||||
| 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, | |||||
| 6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f, | 6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f, | ||||
| 0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, | 0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, | ||||
| 0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, | 0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, | ||||
| @@ -1120,9 +1122,13 @@ DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = { | |||||
| 0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, | 0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, | ||||
| 0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, | 0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, | ||||
| 0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, | 0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, | ||||
| 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f, | |||||
| 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.00000000f, | |||||
| 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, | |||||
| 1.00000000f, 1.00000000f, 1.00000000f, | |||||
| }; | }; | ||||
| const float *ff_celt_window = &ff_celt_window_padded[8]; | |||||
| /* square of the window, used for the postfilter */ | /* square of the window, used for the postfilter */ | ||||
| const float ff_celt_window2[120] = { | const float ff_celt_window2[120] = { | ||||
| 4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, 2.96371e-05f, 6.60594e-05f, | 4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, 2.96371e-05f, 6.60594e-05f, | ||||
| @@ -154,8 +154,7 @@ extern const uint32_t ff_celt_pvq_u[1272]; | |||||
| extern const float ff_celt_postfilter_taps[3][3]; | extern const float ff_celt_postfilter_taps[3][3]; | ||||
| extern const float ff_celt_window2[120]; | extern const float ff_celt_window2[120]; | ||||
| DECLARE_ALIGNED(32, extern const float, ff_celt_window)[120]; | |||||
| extern const float *ff_celt_window; | |||||
| extern const uint32_t * const ff_celt_pvq_u_row[15]; | extern const uint32_t * const ff_celt_pvq_u_row[15]; | ||||