The buffer holding the coefficients must be padded with 0 so as to use DSP functions that may overread. Currently, the SSE2/3 versions is an example, as they process batches of 16 bytes. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.2-rc1
| @@ -1681,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy) | |||||
| } | } | ||||
| /** inverse root mean square */ | /** inverse root mean square */ | ||||
| int ff_irms(const int16_t *data) | |||||
| int ff_irms(DSPContext *dsp, const int16_t *data) | |||||
| { | { | ||||
| unsigned int i, sum = 0; | |||||
| for (i=0; i < BLOCKSIZE; i++) | |||||
| sum += data[i] * data[i]; | |||||
| unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE); | |||||
| if (sum == 0) | if (sum == 0) | ||||
| return 0; /* OOPS - division by zero */ | return 0; /* OOPS - division by zero */ | ||||
| @@ -1698,14 +1695,13 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, | |||||
| int cba_idx, int cb1_idx, int cb2_idx, | int cba_idx, int cb1_idx, int cb2_idx, | ||||
| int gval, int gain) | int gval, int gain) | ||||
| { | { | ||||
| int16_t buffer_a[BLOCKSIZE]; | |||||
| int16_t *block; | int16_t *block; | ||||
| int m[3]; | int m[3]; | ||||
| if (cba_idx) { | if (cba_idx) { | ||||
| cba_idx += BLOCKSIZE/2 - 1; | cba_idx += BLOCKSIZE/2 - 1; | ||||
| ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx); | |||||
| m[0] = (ff_irms(buffer_a) * gval) >> 12; | |||||
| ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx); | |||||
| m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12; | |||||
| } else { | } else { | ||||
| m[0] = 0; | m[0] = 0; | ||||
| } | } | ||||
| @@ -1716,7 +1712,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, | |||||
| block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE; | block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE; | ||||
| add_wav(block, gain, cba_idx, m, cba_idx? buffer_a: NULL, | |||||
| add_wav(block, gain, cba_idx, m, cba_idx? ractx->buffer_a: NULL, | |||||
| ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]); | ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]); | ||||
| memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE, | memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE, | ||||
| @@ -25,6 +25,7 @@ | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "lpc.h" | #include "lpc.h" | ||||
| #include "audio_frame_queue.h" | #include "audio_frame_queue.h" | ||||
| #include "dsputil.h" | |||||
| #define NBLOCKS 4 ///< number of subblocks within a block | #define NBLOCKS 4 ///< number of subblocks within a block | ||||
| #define BLOCKSIZE 40 ///< subblock size in 16-bit words | #define BLOCKSIZE 40 ///< subblock size in 16-bit words | ||||
| @@ -35,6 +36,7 @@ | |||||
| typedef struct RA144Context { | typedef struct RA144Context { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| DSPContext dsp; | |||||
| LPCContext lpc_ctx; | LPCContext lpc_ctx; | ||||
| AudioFrameQueue afq; | AudioFrameQueue afq; | ||||
| int last_frame; | int last_frame; | ||||
| @@ -57,6 +59,8 @@ typedef struct RA144Context { | |||||
| /** Adaptive codebook, its size is two units bigger to avoid a | /** Adaptive codebook, its size is two units bigger to avoid a | ||||
| * buffer overflow. */ | * buffer overflow. */ | ||||
| int16_t adapt_cb[146+2]; | int16_t adapt_cb[146+2]; | ||||
| DECLARE_ALIGNED(16, int16_t, buffer_a)[FFALIGN(BLOCKSIZE,16)]; | |||||
| } RA144Context; | } RA144Context; | ||||
| void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset); | void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset); | ||||
| @@ -68,7 +72,7 @@ unsigned int ff_rms(const int *data); | |||||
| int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold, | int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold, | ||||
| int energy); | int energy); | ||||
| unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy); | unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy); | ||||
| int ff_irms(const int16_t *data); | |||||
| int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/); | |||||
| void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, | void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs, | ||||
| int cba_idx, int cb1_idx, int cb2_idx, | int cba_idx, int cb1_idx, int cb2_idx, | ||||
| int gval, int gain); | int gval, int gain); | ||||
| @@ -34,10 +34,13 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx) | |||||
| RA144Context *ractx = avctx->priv_data; | RA144Context *ractx = avctx->priv_data; | ||||
| ractx->avctx = avctx; | ractx->avctx = avctx; | ||||
| ff_dsputil_init(&ractx->dsp, avctx); | |||||
| ractx->lpc_coef[0] = ractx->lpc_tables[0]; | ractx->lpc_coef[0] = ractx->lpc_tables[0]; | ||||
| ractx->lpc_coef[1] = ractx->lpc_tables[1]; | ractx->lpc_coef[1] = ractx->lpc_tables[1]; | ||||
| AV_ZERO128(ractx->buffer_a+BLOCKSIZE); | |||||
| avctx->channels = 1; | avctx->channels = 1; | ||||
| avctx->channel_layout = AV_CH_LAYOUT_MONO; | avctx->channel_layout = AV_CH_LAYOUT_MONO; | ||||
| avctx->sample_fmt = AV_SAMPLE_FMT_S16; | avctx->sample_fmt = AV_SAMPLE_FMT_S16; | ||||
| @@ -60,7 +60,9 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx) | |||||
| ractx = avctx->priv_data; | ractx = avctx->priv_data; | ||||
| ractx->lpc_coef[0] = ractx->lpc_tables[0]; | ractx->lpc_coef[0] = ractx->lpc_tables[0]; | ||||
| ractx->lpc_coef[1] = ractx->lpc_tables[1]; | ractx->lpc_coef[1] = ractx->lpc_tables[1]; | ||||
| AV_ZERO128(ractx->buffer_a+BLOCKSIZE); | |||||
| ractx->avctx = avctx; | ractx->avctx = avctx; | ||||
| ff_dsputil_init(&ractx->dsp, avctx); | |||||
| ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER, | ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER, | ||||
| FF_LPC_TYPE_LEVINSON); | FF_LPC_TYPE_LEVINSON); | ||||
| if (ret < 0) | if (ret < 0) | ||||
| @@ -334,7 +336,6 @@ static void ra144_encode_subblock(RA144Context *ractx, | |||||
| float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE]; | float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE]; | ||||
| float coefs[LPC_ORDER]; | float coefs[LPC_ORDER]; | ||||
| float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE]; | float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE]; | ||||
| int16_t cba_vect[BLOCKSIZE]; | |||||
| int cba_idx, cb1_idx, cb2_idx, gain; | int cba_idx, cb1_idx, cb2_idx, gain; | ||||
| int i, n; | int i, n; | ||||
| unsigned m[3]; | unsigned m[3]; | ||||
| @@ -373,8 +374,8 @@ static void ra144_encode_subblock(RA144Context *ractx, | |||||
| */ | */ | ||||
| memcpy(cba, work + LPC_ORDER, sizeof(cba)); | memcpy(cba, work + LPC_ORDER, sizeof(cba)); | ||||
| ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1); | |||||
| m[0] = (ff_irms(cba_vect) * rms) >> 12; | |||||
| ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1); | |||||
| m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12; | |||||
| } | } | ||||
| fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx); | fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx); | ||||
| for (i = 0; i < BLOCKSIZE; i++) { | for (i = 0; i < BLOCKSIZE; i++) { | ||||