The x86 runs short on registers because numerous elements are not static. In addition, splitting them allows more optimized code, at least for x86. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.2-rc1
| @@ -47,16 +47,43 @@ void ff_synth_filter_float_neon(FFTContext *imdct, | |||||
| float out[32], const float in[32], | float out[32], const float in[32], | ||||
| float scale); | float scale); | ||||
| static void lfe_fir0_vfp(float *out, const float *in, const float *coefs, | |||||
| float scale) | |||||
| { | |||||
| ff_dca_lfe_fir_vfp(out, in, coefs, 32, scale); | |||||
| } | |||||
| static void lfe_fir1_vfp(float *out, const float *in, const float *coefs, | |||||
| float scale) | |||||
| { | |||||
| ff_dca_lfe_fir_vfp(out, in, coefs, 64, scale); | |||||
| } | |||||
| static void lfe_fir0_neon(float *out, const float *in, const float *coefs, | |||||
| float scale) | |||||
| { | |||||
| ff_dca_lfe_fir_neon(out, in, coefs, 32, scale); | |||||
| } | |||||
| static void lfe_fir1_neon(float *out, const float *in, const float *coefs, | |||||
| float scale) | |||||
| { | |||||
| ff_dca_lfe_fir_neon(out, in, coefs, 64, scale); | |||||
| } | |||||
| av_cold void ff_dcadsp_init_arm(DCADSPContext *s) | av_cold void ff_dcadsp_init_arm(DCADSPContext *s) | ||||
| { | { | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { | if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { | ||||
| s->lfe_fir = ff_dca_lfe_fir_vfp; | |||||
| s->lfe_fir[0] = lfe_fir0_vfp; | |||||
| s->lfe_fir[1] = lfe_fir1_vfp; | |||||
| s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; | s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; | ||||
| } | } | ||||
| if (have_neon(cpu_flags)) | |||||
| s->lfe_fir = ff_dca_lfe_fir_neon; | |||||
| if (have_neon(cpu_flags)) { | |||||
| s->lfe_fir[0] = lfe_fir0_neon; | |||||
| s->lfe_fir[1] = lfe_fir1_neon; | |||||
| } | |||||
| } | } | ||||
| av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) | av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) | ||||
| @@ -1118,23 +1118,23 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select, | |||||
| * samples_out: An array holding interpolated samples | * samples_out: An array holding interpolated samples | ||||
| */ | */ | ||||
| int decifactor; | |||||
| int idx; | |||||
| const float *prCoeff; | const float *prCoeff; | ||||
| int deciindex; | int deciindex; | ||||
| /* Select decimation filter */ | /* Select decimation filter */ | ||||
| if (decimation_select == 1) { | if (decimation_select == 1) { | ||||
| decifactor = 64; | |||||
| idx = 1; | |||||
| prCoeff = lfe_fir_128; | prCoeff = lfe_fir_128; | ||||
| } else { | } else { | ||||
| decifactor = 32; | |||||
| idx = 0; | |||||
| prCoeff = lfe_fir_64; | prCoeff = lfe_fir_64; | ||||
| } | } | ||||
| /* Interpolation */ | /* Interpolation */ | ||||
| for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { | for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { | ||||
| s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, scale); | |||||
| s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff, scale); | |||||
| samples_in++; | samples_in++; | ||||
| samples_out += 2 * decifactor; | |||||
| samples_out += 2 * 32 * (1 + idx); | |||||
| } | } | ||||
| } | } | ||||
| @@ -32,8 +32,9 @@ static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale) | |||||
| dst[i] = src[i] * fscale; | dst[i] = src[i] * fscale; | ||||
| } | } | ||||
| static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, | |||||
| int decifactor, float scale) | |||||
| static inline void | |||||
| dca_lfe_fir(float *out, const float *in, const float *coefs, | |||||
| int decifactor, float scale) | |||||
| { | { | ||||
| float *out2 = out + decifactor; | float *out2 = out + decifactor; | ||||
| const float *cf0 = coefs; | const float *cf0 = coefs; | ||||
| @@ -82,9 +83,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act, | |||||
| } | } | ||||
| } | } | ||||
| static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs, | |||||
| float scale) | |||||
| { | |||||
| dca_lfe_fir(out, in, coefs, 32, scale); | |||||
| } | |||||
| static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs, | |||||
| float scale) | |||||
| { | |||||
| dca_lfe_fir(out, in, coefs, 64, scale); | |||||
| } | |||||
| av_cold void ff_dcadsp_init(DCADSPContext *s) | av_cold void ff_dcadsp_init(DCADSPContext *s) | ||||
| { | { | ||||
| s->lfe_fir = dca_lfe_fir_c; | |||||
| s->lfe_fir[0] = dca_lfe_fir0_c; | |||||
| s->lfe_fir[1] = dca_lfe_fir1_c; | |||||
| s->qmf_32_subbands = dca_qmf_32_subbands; | s->qmf_32_subbands = dca_qmf_32_subbands; | ||||
| s->int8x8_fmul_int32 = int8x8_fmul_int32_c; | s->int8x8_fmul_int32 = int8x8_fmul_int32_c; | ||||
| if (ARCH_ARM) ff_dcadsp_init_arm(s); | if (ARCH_ARM) ff_dcadsp_init_arm(s); | ||||
| @@ -23,8 +23,8 @@ | |||||
| #include "synth_filter.h" | #include "synth_filter.h" | ||||
| typedef struct DCADSPContext { | typedef struct DCADSPContext { | ||||
| void (*lfe_fir)(float *out, const float *in, const float *coefs, | |||||
| int decifactor, float scale); | |||||
| void (*lfe_fir[2])(float *out, const float *in, const float *coefs, | |||||
| float scale); | |||||
| void (*qmf_32_subbands)(float samples_in[32][8], int sb_act, | void (*qmf_32_subbands)(float samples_in[32][8], int sb_act, | ||||
| SynthFilterContext *synth, FFTContext *imdct, | SynthFilterContext *synth, FFTContext *imdct, | ||||
| float synth_buf_ptr[512], | float synth_buf_ptr[512], | ||||