This enables SIMD optimisations of this function. Originally committed as revision 22861 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -87,7 +87,7 @@ OBJS-$(CONFIG_CLJR_ENCODER) += cljr.o | |||||
| OBJS-$(CONFIG_COOK_DECODER) += cook.o | OBJS-$(CONFIG_COOK_DECODER) += cook.o | ||||
| OBJS-$(CONFIG_CSCD_DECODER) += cscd.o | OBJS-$(CONFIG_CSCD_DECODER) += cscd.o | ||||
| OBJS-$(CONFIG_CYUV_DECODER) += cyuv.o | OBJS-$(CONFIG_CYUV_DECODER) += cyuv.o | ||||
| OBJS-$(CONFIG_DCA_DECODER) += dca.o synth_filter.o | |||||
| OBJS-$(CONFIG_DCA_DECODER) += dca.o synth_filter.o dcadsp.o | |||||
| OBJS-$(CONFIG_DNXHD_DECODER) += dnxhddec.o dnxhddata.o | OBJS-$(CONFIG_DNXHD_DECODER) += dnxhddec.o dnxhddata.o | ||||
| OBJS-$(CONFIG_DNXHD_ENCODER) += dnxhdenc.o dnxhddata.o \ | OBJS-$(CONFIG_DNXHD_ENCODER) += dnxhdenc.o dnxhddata.o \ | ||||
| mpegvideo_enc.o motion_est.o \ | mpegvideo_enc.o motion_est.o \ | ||||
| @@ -41,6 +41,7 @@ | |||||
| #include "dcahuff.h" | #include "dcahuff.h" | ||||
| #include "dca.h" | #include "dca.h" | ||||
| #include "synth_filter.h" | #include "synth_filter.h" | ||||
| #include "dcadsp.h" | |||||
| //#define TRACE | //#define TRACE | ||||
| @@ -256,6 +257,7 @@ typedef struct { | |||||
| DSPContext dsp; | DSPContext dsp; | ||||
| FFTContext imdct; | FFTContext imdct; | ||||
| SynthFilterContext synth; | SynthFilterContext synth; | ||||
| DCADSPContext dcadsp; | |||||
| } DCAContext; | } DCAContext; | ||||
| static const uint16_t dca_vlc_offs[] = { | static const uint16_t dca_vlc_offs[] = { | ||||
| @@ -788,7 +790,7 @@ static void qmf_32_subbands(DCAContext * s, int chans, | |||||
| } | } | ||||
| } | } | ||||
| static void lfe_interpolation_fir(int decimation_select, | |||||
| static void lfe_interpolation_fir(DCAContext *s, int decimation_select, | |||||
| int num_deci_sample, float *samples_in, | int num_deci_sample, float *samples_in, | ||||
| float *samples_out, float scale, | float *samples_out, float scale, | ||||
| float bias) | float bias) | ||||
| @@ -801,7 +803,7 @@ static void lfe_interpolation_fir(int decimation_select, | |||||
| * samples_out: An array holding interpolated samples | * samples_out: An array holding interpolated samples | ||||
| */ | */ | ||||
| int decifactor, k, j; | |||||
| int decifactor; | |||||
| const float *prCoeff; | const float *prCoeff; | ||||
| int deciindex; | int deciindex; | ||||
| @@ -815,25 +817,10 @@ static void lfe_interpolation_fir(int decimation_select, | |||||
| } | } | ||||
| /* Interpolation */ | /* Interpolation */ | ||||
| for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { | for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { | ||||
| float *samples_out2 = samples_out + decifactor; | |||||
| const float *cf0 = prCoeff; | |||||
| const float *cf1 = prCoeff + 256; | |||||
| /* One decimated sample generates 2*decifactor interpolated ones */ | |||||
| for (k = 0; k < decifactor; k++) { | |||||
| float v0 = 0.0; | |||||
| float v1 = 0.0; | |||||
| for (j = 0; j < 256 / decifactor; j++) { | |||||
| float s = samples_in[-j]; | |||||
| v0 += s * *cf0++; | |||||
| v1 += s * *--cf1; | |||||
| } | |||||
| *samples_out++ = (v0 * scale) + bias; | |||||
| *samples_out2++ = (v1 * scale) + bias; | |||||
| } | |||||
| s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, | |||||
| scale, bias); | |||||
| samples_in++; | samples_in++; | ||||
| samples_out += decifactor; | |||||
| samples_out += 2 * decifactor; | |||||
| } | } | ||||
| } | } | ||||
| @@ -1083,7 +1070,7 @@ static int dca_subsubframe(DCAContext * s) | |||||
| if (s->output & DCA_LFE) { | if (s->output & DCA_LFE) { | ||||
| int lfe_samples = 2 * s->lfe * s->subsubframes; | int lfe_samples = 2 * s->lfe * s->subsubframes; | ||||
| lfe_interpolation_fir(s->lfe, 2 * s->lfe, | |||||
| lfe_interpolation_fir(s, s->lfe, 2 * s->lfe, | |||||
| s->lfe_data + lfe_samples + | s->lfe_data + lfe_samples + | ||||
| 2 * s->lfe * subsubframe, | 2 * s->lfe * subsubframe, | ||||
| &s->samples[256 * dca_lfe_index[s->amode]], | &s->samples[256 * dca_lfe_index[s->amode]], | ||||
| @@ -1313,6 +1300,7 @@ static av_cold int dca_decode_init(AVCodecContext * avctx) | |||||
| dsputil_init(&s->dsp, avctx); | dsputil_init(&s->dsp, avctx); | ||||
| ff_mdct_init(&s->imdct, 6, 1, 1.0); | ff_mdct_init(&s->imdct, 6, 1, 1.0); | ||||
| ff_synth_filter_init(&s->synth); | ff_synth_filter_init(&s->synth); | ||||
| ff_dcadsp_init(&s->dcadsp); | |||||
| for(i = 0; i < 6; i++) | for(i = 0; i < 6; i++) | ||||
| s->samples_chanptr[i] = s->samples + i * 256; | s->samples_chanptr[i] = s->samples + i * 256; | ||||
| @@ -0,0 +1,49 @@ | |||||
| /* | |||||
| * Copyright (c) 2004 Gildas Bazin | |||||
| * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> | |||||
| * | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "dcadsp.h" | |||||
| static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, | |||||
| int decifactor, float scale, float bias) | |||||
| { | |||||
| float *out2 = out + decifactor; | |||||
| const float *cf0 = coefs; | |||||
| const float *cf1 = coefs + 256; | |||||
| int j, k; | |||||
| /* One decimated sample generates 2*decifactor interpolated ones */ | |||||
| for (k = 0; k < decifactor; k++) { | |||||
| float v0 = 0.0; | |||||
| float v1 = 0.0; | |||||
| for (j = 0; j < 256 / decifactor; j++) { | |||||
| float s = in[-j]; | |||||
| v0 += s * *cf0++; | |||||
| v1 += s * *--cf1; | |||||
| } | |||||
| *out++ = (v0 * scale) + bias; | |||||
| *out2++ = (v1 * scale) + bias; | |||||
| } | |||||
| } | |||||
| void ff_dcadsp_init(DCADSPContext *s) | |||||
| { | |||||
| s->lfe_fir = dca_lfe_fir_c; | |||||
| } | |||||
| @@ -0,0 +1,29 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_DCADSP_H | |||||
| #define AVCODEC_DCADSP_H | |||||
| typedef struct DCADSPContext { | |||||
| void (*lfe_fir)(float *out, const float *in, const float *coefs, | |||||
| int decifactor, float scale, float bias); | |||||
| } DCADSPContext; | |||||
| void ff_dcadsp_init(DCADSPContext *s); | |||||
| #endif /* AVCODEC_DCADSP_H */ | |||||