This makes the aac decoder and all voice codecs independent of dsputil.tags/n1.2
| @@ -291,7 +291,6 @@ typedef struct AACContext { | |||||
| FFTContext mdct; | FFTContext mdct; | ||||
| FFTContext mdct_small; | FFTContext mdct_small; | ||||
| FFTContext mdct_ltp; | FFTContext mdct_ltp; | ||||
| DSPContext dsp; | |||||
| FmtConvertContext fmt_conv; | FmtConvertContext fmt_conv; | ||||
| AVFloatDSPContext fdsp; | AVFloatDSPContext fdsp; | ||||
| int random_state; | int random_state; | ||||
| @@ -895,7 +895,6 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) | |||||
| ff_aac_sbr_init(); | ff_aac_sbr_init(); | ||||
| ff_dsputil_init(&ac->dsp, avctx); | |||||
| ff_fmt_convert_init(&ac->fmt_conv, avctx); | ff_fmt_convert_init(&ac->fmt_conv, avctx); | ||||
| avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
| @@ -1358,7 +1357,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], | |||||
| cfo[k] = ac->random_state; | cfo[k] = ac->random_state; | ||||
| } | } | ||||
| band_energy = ac->dsp.scalarproduct_float(cfo, cfo, off_len); | |||||
| band_energy = ac->fdsp.scalarproduct_float(cfo, cfo, off_len); | |||||
| scale = sf[idx] / sqrtf(band_energy); | scale = sf[idx] / sqrtf(band_energy); | ||||
| ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); | ac->fdsp.vector_fmul_scalar(cfo, cfo, scale, off_len); | ||||
| } | } | ||||
| @@ -21,9 +21,9 @@ | |||||
| */ | */ | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "libavutil/mathematics.h" | #include "libavutil/mathematics.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "acelp_pitch_delay.h" | #include "acelp_pitch_delay.h" | ||||
| #include "celp_math.h" | #include "celp_math.h" | ||||
| @@ -120,7 +120,7 @@ float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy, | |||||
| // Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)). | // Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)). | ||||
| float val = fixed_gain_factor * | float val = fixed_gain_factor * | ||||
| exp2f(M_LOG2_10 * 0.05 * | exp2f(M_LOG2_10 * 0.05 * | ||||
| (ff_scalarproduct_float_c(pred_table, prediction_error, 4) + | |||||
| (avpriv_scalarproduct_float_c(pred_table, prediction_error, 4) + | |||||
| energy_mean)) / | energy_mean)) / | ||||
| sqrtf(fixed_mean_energy); | sqrtf(fixed_mean_energy); | ||||
| @@ -23,8 +23,8 @@ | |||||
| #include <inttypes.h> | #include <inttypes.h> | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "acelp_vectors.h" | #include "acelp_vectors.h" | ||||
| const uint8_t ff_fc_2pulses_9bits_track1[16] = | const uint8_t ff_fc_2pulses_9bits_track1[16] = | ||||
| @@ -183,7 +183,7 @@ void ff_adaptive_gain_control(float *out, const float *in, float speech_energ, | |||||
| int size, float alpha, float *gain_mem) | int size, float alpha, float *gain_mem) | ||||
| { | { | ||||
| int i; | int i; | ||||
| float postfilter_energ = ff_scalarproduct_float_c(in, in, size); | |||||
| float postfilter_energ = avpriv_scalarproduct_float_c(in, in, size); | |||||
| float gain_scale_factor = 1.0; | float gain_scale_factor = 1.0; | ||||
| float mem = *gain_mem; | float mem = *gain_mem; | ||||
| @@ -204,7 +204,7 @@ void ff_scale_vector_to_given_sum_of_squares(float *out, const float *in, | |||||
| float sum_of_squares, const int n) | float sum_of_squares, const int n) | ||||
| { | { | ||||
| int i; | int i; | ||||
| float scalefactor = ff_scalarproduct_float_c(in, in, n); | |||||
| float scalefactor = avpriv_scalarproduct_float_c(in, in, n); | |||||
| if (scalefactor) | if (scalefactor) | ||||
| scalefactor = sqrt(sum_of_squares / scalefactor); | scalefactor = sqrt(sum_of_squares / scalefactor); | ||||
| for (i = 0; i < n; i++) | for (i = 0; i < n; i++) | ||||
| @@ -44,8 +44,8 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "celp_filters.h" | #include "celp_filters.h" | ||||
| #include "acelp_filters.h" | #include "acelp_filters.h" | ||||
| @@ -794,8 +794,8 @@ static int synthesis(AMRContext *p, float *lpc, | |||||
| // emphasize pitch vector contribution | // emphasize pitch vector contribution | ||||
| if (p->pitch_gain[4] > 0.5 && !overflow) { | if (p->pitch_gain[4] > 0.5 && !overflow) { | ||||
| float energy = ff_scalarproduct_float_c(excitation, excitation, | |||||
| AMR_SUBFRAME_SIZE); | |||||
| float energy = avpriv_scalarproduct_float_c(excitation, excitation, | |||||
| AMR_SUBFRAME_SIZE); | |||||
| float pitch_factor = | float pitch_factor = | ||||
| p->pitch_gain[4] * | p->pitch_gain[4] * | ||||
| (p->cur_frame_mode == MODE_12k2 ? | (p->cur_frame_mode == MODE_12k2 ? | ||||
| @@ -871,8 +871,8 @@ static float tilt_factor(float *lpc_n, float *lpc_d) | |||||
| ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, | ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, | ||||
| LP_FILTER_ORDER); | LP_FILTER_ORDER); | ||||
| rh0 = ff_scalarproduct_float_c(hf, hf, AMR_TILT_RESPONSE); | |||||
| rh1 = ff_scalarproduct_float_c(hf, hf + 1, AMR_TILT_RESPONSE - 1); | |||||
| rh0 = avpriv_scalarproduct_float_c(hf, hf, AMR_TILT_RESPONSE); | |||||
| rh1 = avpriv_scalarproduct_float_c(hf, hf + 1, AMR_TILT_RESPONSE - 1); | |||||
| // The spec only specifies this check for 12.2 and 10.2 kbit/s | // The spec only specifies this check for 12.2 and 10.2 kbit/s | ||||
| // modes. But in the ref source the tilt is always non-negative. | // modes. But in the ref source the tilt is always non-negative. | ||||
| @@ -892,8 +892,8 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out) | |||||
| int i; | int i; | ||||
| float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input | float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input | ||||
| float speech_gain = ff_scalarproduct_float_c(samples, samples, | |||||
| AMR_SUBFRAME_SIZE); | |||||
| float speech_gain = avpriv_scalarproduct_float_c(samples, samples, | |||||
| AMR_SUBFRAME_SIZE); | |||||
| float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter | float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter | ||||
| const float *gamma_n, *gamma_d; // Formant filter factor table | const float *gamma_n, *gamma_d; // Formant filter factor table | ||||
| @@ -998,9 +998,9 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, | |||||
| p->fixed_gain[4] = | p->fixed_gain[4] = | ||||
| ff_amr_set_fixed_gain(fixed_gain_factor, | ff_amr_set_fixed_gain(fixed_gain_factor, | ||||
| ff_scalarproduct_float_c(p->fixed_vector, | |||||
| p->fixed_vector, | |||||
| AMR_SUBFRAME_SIZE) / | |||||
| avpriv_scalarproduct_float_c(p->fixed_vector, | |||||
| p->fixed_vector, | |||||
| AMR_SUBFRAME_SIZE) / | |||||
| AMR_SUBFRAME_SIZE, | AMR_SUBFRAME_SIZE, | ||||
| p->prediction_error, | p->prediction_error, | ||||
| energy_mean[p->cur_frame_mode], energy_pred_fac); | energy_mean[p->cur_frame_mode], energy_pred_fac); | ||||
| @@ -26,10 +26,10 @@ | |||||
| #include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "libavutil/lfg.h" | #include "libavutil/lfg.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "lsp.h" | #include "lsp.h" | ||||
| #include "celp_filters.h" | #include "celp_filters.h" | ||||
| #include "acelp_filters.h" | #include "acelp_filters.h" | ||||
| @@ -595,11 +595,11 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector) | |||||
| static float voice_factor(float *p_vector, float p_gain, | static float voice_factor(float *p_vector, float p_gain, | ||||
| float *f_vector, float f_gain) | float *f_vector, float f_gain) | ||||
| { | { | ||||
| double p_ener = (double) ff_scalarproduct_float_c(p_vector, p_vector, | |||||
| AMRWB_SFR_SIZE) * | |||||
| double p_ener = (double) avpriv_scalarproduct_float_c(p_vector, p_vector, | |||||
| AMRWB_SFR_SIZE) * | |||||
| p_gain * p_gain; | p_gain * p_gain; | ||||
| double f_ener = (double) ff_scalarproduct_float_c(f_vector, f_vector, | |||||
| AMRWB_SFR_SIZE) * | |||||
| double f_ener = (double) avpriv_scalarproduct_float_c(f_vector, f_vector, | |||||
| AMRWB_SFR_SIZE) * | |||||
| f_gain * f_gain; | f_gain * f_gain; | ||||
| return (p_ener - f_ener) / (p_ener + f_ener); | return (p_ener - f_ener) / (p_ener + f_ener); | ||||
| @@ -768,8 +768,8 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, | |||||
| /* emphasize pitch vector contribution in low bitrate modes */ | /* emphasize pitch vector contribution in low bitrate modes */ | ||||
| if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { | if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { | ||||
| int i; | int i; | ||||
| float energy = ff_scalarproduct_float_c(excitation, excitation, | |||||
| AMRWB_SFR_SIZE); | |||||
| float energy = avpriv_scalarproduct_float_c(excitation, excitation, | |||||
| AMRWB_SFR_SIZE); | |||||
| // XXX: Weird part in both ref code and spec. A unknown parameter | // XXX: Weird part in both ref code and spec. A unknown parameter | ||||
| // {beta} seems to be identical to the current pitch gain | // {beta} seems to be identical to the current pitch gain | ||||
| @@ -828,9 +828,9 @@ static void upsample_5_4(float *out, const float *in, int o_size) | |||||
| i++; | i++; | ||||
| for (k = 1; k < 5; k++) { | for (k = 1; k < 5; k++) { | ||||
| out[i] = ff_scalarproduct_float_c(in0 + int_part, | |||||
| upsample_fir[4 - frac_part], | |||||
| UPS_MEM_SIZE); | |||||
| out[i] = avpriv_scalarproduct_float_c(in0 + int_part, | |||||
| upsample_fir[4 - frac_part], | |||||
| UPS_MEM_SIZE); | |||||
| int_part++; | int_part++; | ||||
| frac_part--; | frac_part--; | ||||
| i++; | i++; | ||||
| @@ -856,8 +856,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth, | |||||
| if (ctx->fr_cur_mode == MODE_23k85) | if (ctx->fr_cur_mode == MODE_23k85) | ||||
| return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); | return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); | ||||
| tilt = ff_scalarproduct_float_c(synth, synth + 1, AMRWB_SFR_SIZE - 1) / | |||||
| ff_scalarproduct_float_c(synth, synth, AMRWB_SFR_SIZE); | |||||
| tilt = avpriv_scalarproduct_float_c(synth, synth + 1, AMRWB_SFR_SIZE - 1) / | |||||
| avpriv_scalarproduct_float_c(synth, synth, AMRWB_SFR_SIZE); | |||||
| /* return gain bounded by [0.1, 1.0] */ | /* return gain bounded by [0.1, 1.0] */ | ||||
| return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); | return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); | ||||
| @@ -876,7 +876,8 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc, | |||||
| const float *synth_exc, float hb_gain) | const float *synth_exc, float hb_gain) | ||||
| { | { | ||||
| int i; | int i; | ||||
| float energy = ff_scalarproduct_float_c(synth_exc, synth_exc, AMRWB_SFR_SIZE); | |||||
| float energy = avpriv_scalarproduct_float_c(synth_exc, synth_exc, | |||||
| AMRWB_SFR_SIZE); | |||||
| /* Generate a white-noise excitation */ | /* Generate a white-noise excitation */ | ||||
| for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) | for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) | ||||
| @@ -1168,9 +1169,9 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, | |||||
| ctx->fixed_gain[0] = | ctx->fixed_gain[0] = | ||||
| ff_amr_set_fixed_gain(fixed_gain_factor, | ff_amr_set_fixed_gain(fixed_gain_factor, | ||||
| ff_scalarproduct_float_c(ctx->fixed_vector, | |||||
| ctx->fixed_vector, | |||||
| AMRWB_SFR_SIZE) / | |||||
| avpriv_scalarproduct_float_c(ctx->fixed_vector, | |||||
| ctx->fixed_vector, | |||||
| AMRWB_SFR_SIZE) / | |||||
| AMRWB_SFR_SIZE, | AMRWB_SFR_SIZE, | ||||
| ctx->prediction_error, | ctx->prediction_error, | ||||
| ENERGY_MEAN, energy_pred_fac); | ENERGY_MEAN, energy_pred_fac); | ||||
| @@ -142,8 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
| void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
| void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
| float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); | |||||
| void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | ||||
| int len); | int len); | ||||
| void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, | void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, | ||||
| @@ -293,7 +291,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||||
| c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | ||||
| } | } | ||||
| c->scalarproduct_float = ff_scalarproduct_float_neon; | |||||
| c->vector_clipf = ff_vector_clipf_neon; | c->vector_clipf = ff_vector_clipf_neon; | ||||
| c->vector_clip_int32 = ff_vector_clip_int32_neon; | c->vector_clip_int32 = ff_vector_clip_int32_neon; | ||||
| @@ -531,19 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 | |||||
| bx lr | bx lr | ||||
| endfunc | endfunc | ||||
| function ff_scalarproduct_float_neon, export=1 | |||||
| vmov.f32 q2, #0.0 | |||||
| 1: vld1.32 {q0},[r0,:128]! | |||||
| vld1.32 {q1},[r1,:128]! | |||||
| vmla.f32 q2, q0, q1 | |||||
| subs r2, r2, #4 | |||||
| bgt 1b | |||||
| vadd.f32 d0, d4, d5 | |||||
| vpadd.f32 d0, d0, d0 | |||||
| NOVFP vmov.32 r0, d0[0] | |||||
| bx lr | |||||
| endfunc | |||||
| function ff_vector_clipf_neon, export=1 | function ff_vector_clipf_neon, export=1 | ||||
| VFP vdup.32 q1, d0[1] | VFP vdup.32 q1, d0[1] | ||||
| VFP vdup.32 q0, d0[0] | VFP vdup.32 q0, d0[0] | ||||
| @@ -2353,17 +2353,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |||||
| WRAPPER8_16_SQ(rd8x8_c, rd16_c) | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||||
| WRAPPER8_16_SQ(bit8x8_c, bit16_c) | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||||
| float ff_scalarproduct_float_c(const float *v1, const float *v2, int len) | |||||
| { | |||||
| float p = 0.0; | |||||
| int i; | |||||
| for (i = 0; i < len; i++) | |||||
| p += v1[i] * v2[i]; | |||||
| return p; | |||||
| } | |||||
| static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, | static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, | ||||
| uint32_t maxi, uint32_t maxisign) | uint32_t maxi, uint32_t maxisign) | ||||
| { | { | ||||
| @@ -2694,7 +2683,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; | c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; | ||||
| c->apply_window_int16 = apply_window_int16_c; | c->apply_window_int16 = apply_window_int16_c; | ||||
| c->vector_clip_int32 = vector_clip_int32_c; | c->vector_clip_int32 = vector_clip_int32_c; | ||||
| c->scalarproduct_float = ff_scalarproduct_float_c; | |||||
| c->shrink[0]= av_image_copy_plane; | c->shrink[0]= av_image_copy_plane; | ||||
| c->shrink[1]= ff_shrink22; | c->shrink[1]= ff_shrink22; | ||||
| @@ -342,13 +342,6 @@ typedef struct DSPContext { | |||||
| /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | /* assume len is a multiple of 8, and arrays are 16-byte aligned */ | ||||
| void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); | void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); | ||||
| /** | |||||
| * Calculate the scalar product of two vectors of floats. | |||||
| * @param v1 first vector, 16-byte aligned | |||||
| * @param v2 second vector, 16-byte aligned | |||||
| * @param len length of vectors, multiple of 4 | |||||
| */ | |||||
| float (*scalarproduct_float)(const float *v1, const float *v2, int len); | |||||
| /* (I)DCT */ | /* (I)DCT */ | ||||
| void (*fdct)(DCTELEM *block/* align 16*/); | void (*fdct)(DCTELEM *block/* align 16*/); | ||||
| @@ -454,17 +447,6 @@ void ff_dsputil_init(DSPContext* p, AVCodecContext *avctx); | |||||
| int ff_check_alignment(void); | int ff_check_alignment(void); | ||||
| /** | |||||
| * Return the scalar product of two vectors. | |||||
| * | |||||
| * @param v1 first input vector | |||||
| * @param v2 first input vector | |||||
| * @param len number of elements | |||||
| * | |||||
| * @return sum of elementwise products | |||||
| */ | |||||
| float ff_scalarproduct_float_c(const float *v1, const float *v2, int len); | |||||
| /** | /** | ||||
| * permute block according to permuatation. | * permute block according to permuatation. | ||||
| * @param last last non zero element in scantable order | * @param last last non zero element in scantable order | ||||
| @@ -30,10 +30,10 @@ | |||||
| #include <stddef.h> | #include <stddef.h> | ||||
| #include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "dsputil.h" | |||||
| #include "qcelpdata.h" | #include "qcelpdata.h" | ||||
| #include "celp_filters.h" | #include "celp_filters.h" | ||||
| #include "acelp_filters.h" | #include "acelp_filters.h" | ||||
| @@ -400,12 +400,10 @@ static void apply_gain_ctrl(float *v_out, const float *v_ref, const float *v_in) | |||||
| { | { | ||||
| int i; | int i; | ||||
| for (i = 0; i < 160; i += 40) | |||||
| ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i, | |||||
| ff_scalarproduct_float_c(v_ref + i, | |||||
| v_ref + i, | |||||
| 40), | |||||
| 40); | |||||
| for (i = 0; i < 160; i += 40) { | |||||
| float res = avpriv_scalarproduct_float_c(v_ref + i, v_ref + i, 40); | |||||
| ff_scale_vector_to_given_sum_of_squares(v_out + i, v_in + i, res, 40); | |||||
| } | |||||
| } | } | ||||
| /** | /** | ||||
| @@ -680,8 +678,9 @@ static void postfilter(QCELPContext *q, float *samples, float *lpc) | |||||
| ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160); | ff_tilt_compensation(&q->postfilter_tilt_mem, 0.3, pole_out + 10, 160); | ||||
| ff_adaptive_gain_control(samples, pole_out + 10, | ff_adaptive_gain_control(samples, pole_out + 10, | ||||
| ff_scalarproduct_float_c(q->formant_mem + 10, | |||||
| q->formant_mem + 10, 160), | |||||
| avpriv_scalarproduct_float_c(q->formant_mem + 10, | |||||
| q->formant_mem + 10, | |||||
| 160), | |||||
| 160, 0.9375, &q->postfilter_agc_mem); | 160, 0.9375, &q->postfilter_agc_mem); | ||||
| } | } | ||||
| @@ -79,7 +79,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx) | |||||
| static void convolve(float *tgt, const float *src, int len, int n) | static void convolve(float *tgt, const float *src, int len, int n) | ||||
| { | { | ||||
| for (; n >= 0; n--) | for (; n >= 0; n--) | ||||
| tgt[n] = ff_scalarproduct_float_c(src, src - n, len); | |||||
| tgt[n] = avpriv_scalarproduct_float_c(src, src - n, len); | |||||
| } | } | ||||
| @@ -108,7 +108,7 @@ static void decode(RA288Context *ractx, float gain, int cb_coef) | |||||
| for (i=0; i < 5; i++) | for (i=0; i < 5; i++) | ||||
| buffer[i] = codetable[cb_coef][i] * sumsum; | buffer[i] = codetable[cb_coef][i] * sumsum; | ||||
| sum = ff_scalarproduct_float_c(buffer, buffer, 5) * ((1 << 24) / 5.); | |||||
| sum = avpriv_scalarproduct_float_c(buffer, buffer, 5) * ((1 << 24) / 5.); | |||||
| sum = FFMAX(sum, 1); | sum = FFMAX(sum, 1); | ||||
| @@ -26,11 +26,11 @@ | |||||
| #include <string.h> | #include <string.h> | ||||
| #include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "libavutil/mathematics.h" | #include "libavutil/mathematics.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #define BITSTREAM_READER_LE | #define BITSTREAM_READER_LE | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "dsputil.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "lsp.h" | #include "lsp.h" | ||||
| @@ -411,9 +411,10 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params, | |||||
| convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response, | convolute_with_sparse(fixed_vector, &fixed_cb, impulse_response, | ||||
| SUBFR_SIZE); | SUBFR_SIZE); | ||||
| avg_energy = | |||||
| (0.01 + ff_scalarproduct_float_c(fixed_vector, fixed_vector, SUBFR_SIZE)) / | |||||
| SUBFR_SIZE; | |||||
| avg_energy = (0.01 + avpriv_scalarproduct_float_c(fixed_vector, | |||||
| fixed_vector, | |||||
| SUBFR_SIZE)) / | |||||
| SUBFR_SIZE; | |||||
| ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0]; | ctx->past_pitch_gain = pitch_gain = gain_cb[params->gc_index[i]][0]; | ||||
| @@ -454,9 +455,9 @@ static void decode_frame(SiprContext *ctx, SiprParameters *params, | |||||
| if (ctx->mode == MODE_5k0) { | if (ctx->mode == MODE_5k0) { | ||||
| for (i = 0; i < subframe_count; i++) { | for (i = 0; i < subframe_count; i++) { | ||||
| float energy = ff_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, | |||||
| ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, | |||||
| SUBFR_SIZE); | |||||
| float energy = avpriv_scalarproduct_float_c(ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, | |||||
| ctx->postfilter_syn5k0 + LP_FILTER_ORDER + i * SUBFR_SIZE, | |||||
| SUBFR_SIZE); | |||||
| ff_adaptive_gain_control(&synth[i * SUBFR_SIZE], | ff_adaptive_gain_control(&synth[i * SUBFR_SIZE], | ||||
| &synth[i * SUBFR_SIZE], energy, | &synth[i * SUBFR_SIZE], energy, | ||||
| SUBFR_SIZE, 0.9, &ctx->postfilter_agc); | SUBFR_SIZE, 0.9, &ctx->postfilter_agc); | ||||
| @@ -25,8 +25,8 @@ | |||||
| #include "sipr.h" | #include "sipr.h" | ||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "libavutil/mathematics.h" | #include "libavutil/mathematics.h" | ||||
| #include "dsputil.h" | |||||
| #include "lsp.h" | #include "lsp.h" | ||||
| #include "celp_filters.h" | #include "celp_filters.h" | ||||
| #include "acelp_vectors.h" | #include "acelp_vectors.h" | ||||
| @@ -163,11 +163,11 @@ static float acelp_decode_gain_codef(float gain_corr_factor, const float *fc_v, | |||||
| const float *ma_prediction_coeff, | const float *ma_prediction_coeff, | ||||
| int subframe_size, int ma_pred_order) | int subframe_size, int ma_pred_order) | ||||
| { | { | ||||
| mr_energy += | |||||
| ff_scalarproduct_float_c(quant_energy, ma_prediction_coeff, ma_pred_order); | |||||
| mr_energy += avpriv_scalarproduct_float_c(quant_energy, ma_prediction_coeff, | |||||
| ma_pred_order); | |||||
| mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) / | mr_energy = gain_corr_factor * exp(M_LN10 / 20. * mr_energy) / | ||||
| sqrt((0.01 + ff_scalarproduct_float_c(fc_v, fc_v, subframe_size))); | |||||
| sqrt((0.01 + avpriv_scalarproduct_float_c(fc_v, fc_v, subframe_size))); | |||||
| return mr_energy; | return mr_energy; | ||||
| } | } | ||||
| @@ -30,8 +30,8 @@ | |||||
| #include <math.h> | #include <math.h> | ||||
| #include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
| #include "libavutil/float_dsp.h" | |||||
| #include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
| #include "dsputil.h" | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| @@ -523,7 +523,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch, | |||||
| /* find best fitting point in history */ | /* find best fitting point in history */ | ||||
| do { | do { | ||||
| dot = ff_scalarproduct_float_c(in, ptr, size); | |||||
| dot = avpriv_scalarproduct_float_c(in, ptr, size); | |||||
| if (dot > optimal_gain) { | if (dot > optimal_gain) { | ||||
| optimal_gain = dot; | optimal_gain = dot; | ||||
| best_hist_ptr = ptr; | best_hist_ptr = ptr; | ||||
| @@ -532,7 +532,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch, | |||||
| if (optimal_gain <= 0) | if (optimal_gain <= 0) | ||||
| return -1; | return -1; | ||||
| dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size); | |||||
| dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size); | |||||
| if (dot <= 0) // would be 1.0 | if (dot <= 0) // would be 1.0 | ||||
| return -1; | return -1; | ||||
| @@ -562,8 +562,8 @@ static float tilt_factor(const float *lpcs, int n_lpcs) | |||||
| { | { | ||||
| float rh0, rh1; | float rh0, rh1; | ||||
| rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs); | |||||
| rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1); | |||||
| rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs); | |||||
| rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1); | |||||
| return rh1 / rh0; | return rh1 / rh0; | ||||
| } | } | ||||
| @@ -656,7 +656,8 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs, | |||||
| -1.8 * tilt_factor(coeffs, remainder - 1), | -1.8 * tilt_factor(coeffs, remainder - 1), | ||||
| coeffs, remainder); | coeffs, remainder); | ||||
| } | } | ||||
| sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder)); | |||||
| sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs, | |||||
| remainder)); | |||||
| for (n = 0; n < remainder; n++) | for (n = 0; n < remainder; n++) | ||||
| coeffs[n] *= sq; | coeffs[n] *= sq; | ||||
| } | } | ||||
| @@ -1320,7 +1321,8 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, | |||||
| /* Calculate gain for adaptive & fixed codebook signal. | /* Calculate gain for adaptive & fixed codebook signal. | ||||
| * see ff_amr_set_fixed_gain(). */ | * see ff_amr_set_fixed_gain(). */ | ||||
| idx = get_bits(gb, 7); | idx = get_bits(gb, 7); | ||||
| fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) - | |||||
| fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err, | |||||
| gain_coeff, 6) - | |||||
| 5.2409161640 + wmavoice_gain_codebook_fcb[idx]); | 5.2409161640 + wmavoice_gain_codebook_fcb[idx]); | ||||
| acb_gain = wmavoice_gain_codebook_acb[idx]; | acb_gain = wmavoice_gain_codebook_acb[idx]; | ||||
| pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx], | pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx], | ||||
| @@ -463,32 +463,6 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left | |||||
| .src_unaligned: | .src_unaligned: | ||||
| ADD_HFYU_LEFT_LOOP 0, 0 | ADD_HFYU_LEFT_LOOP 0, 0 | ||||
| ; float scalarproduct_float_sse(const float *v1, const float *v2, int len) | |||||
| INIT_XMM sse | |||||
| cglobal scalarproduct_float, 3,3,2, v1, v2, offset | |||||
| neg offsetq | |||||
| shl offsetq, 2 | |||||
| sub v1q, offsetq | |||||
| sub v2q, offsetq | |||||
| xorps xmm0, xmm0 | |||||
| .loop: | |||||
| movaps xmm1, [v1q+offsetq] | |||||
| mulps xmm1, [v2q+offsetq] | |||||
| addps xmm0, xmm1 | |||||
| add offsetq, 16 | |||||
| js .loop | |||||
| movhlps xmm1, xmm0 | |||||
| addps xmm0, xmm1 | |||||
| movss xmm1, xmm0 | |||||
| shufps xmm0, xmm0, 1 | |||||
| addss xmm0, xmm1 | |||||
| %if ARCH_X86_64 == 0 | |||||
| movss r0m, xmm0 | |||||
| fld dword r0m | |||||
| %endif | |||||
| RET | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | ||||
| ; int32_t max, unsigned int len) | ; int32_t max, unsigned int len) | ||||
| @@ -1846,8 +1846,6 @@ int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, | |||||
| int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, | int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, | ||||
| int w, int left); | int w, int left); | ||||
| float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); | |||||
| void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, | void ff_vector_clip_int32_mmx (int32_t *dst, const int32_t *src, | ||||
| int32_t min, int32_t max, unsigned int len); | int32_t min, int32_t max, unsigned int len); | ||||
| void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, | void ff_vector_clip_int32_sse2 (int32_t *dst, const int32_t *src, | ||||
| @@ -2128,10 +2126,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) | |||||
| c->vector_clipf = vector_clipf_sse; | c->vector_clipf = vector_clipf_sse; | ||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| #if HAVE_YASM | |||||
| c->scalarproduct_float = ff_scalarproduct_float_sse; | |||||
| #endif /* HAVE_YASM */ | |||||
| } | } | ||||
| static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, | ||||
| @@ -43,6 +43,8 @@ void ff_vector_fmul_reverse_neon(float *dst, const float *src0, | |||||
| void ff_butterflies_float_neon(float *v1, float *v2, int len); | void ff_butterflies_float_neon(float *v1, float *v2, int len); | ||||
| float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); | |||||
| void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | ||||
| { | { | ||||
| fdsp->vector_fmul = ff_vector_fmul_neon; | fdsp->vector_fmul = ff_vector_fmul_neon; | ||||
| @@ -52,4 +54,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | |||||
| fdsp->vector_fmul_add = ff_vector_fmul_add_neon; | fdsp->vector_fmul_add = ff_vector_fmul_add_neon; | ||||
| fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; | fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; | ||||
| fdsp->butterflies_float = ff_butterflies_float_neon; | fdsp->butterflies_float = ff_butterflies_float_neon; | ||||
| fdsp->scalarproduct_float = ff_scalarproduct_float_neon; | |||||
| } | } | ||||
| @@ -256,3 +256,16 @@ function ff_butterflies_float_neon, export=1 | |||||
| bgt 1b | bgt 1b | ||||
| bx lr | bx lr | ||||
| endfunc | endfunc | ||||
| function ff_scalarproduct_float_neon, export=1 | |||||
| vmov.f32 q2, #0.0 | |||||
| 1: vld1.32 {q0},[r0,:128]! | |||||
| vld1.32 {q1},[r1,:128]! | |||||
| vmla.f32 q2, q0, q1 | |||||
| subs r2, r2, #4 | |||||
| bgt 1b | |||||
| vadd.f32 d0, d4, d5 | |||||
| vpadd.f32 d0, d0, d0 | |||||
| NOVFP vmov.32 r0, d0[0] | |||||
| bx lr | |||||
| endfunc | |||||
| @@ -101,6 +101,17 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2, | |||||
| } | } | ||||
| } | } | ||||
| float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len) | |||||
| { | |||||
| float p = 0.0; | |||||
| int i; | |||||
| for (i = 0; i < len; i++) | |||||
| p += v1[i] * v2[i]; | |||||
| return p; | |||||
| } | |||||
| void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | ||||
| { | { | ||||
| fdsp->vector_fmul = vector_fmul_c; | fdsp->vector_fmul = vector_fmul_c; | ||||
| @@ -111,6 +122,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | |||||
| fdsp->vector_fmul_add = vector_fmul_add_c; | fdsp->vector_fmul_add = vector_fmul_add_c; | ||||
| fdsp->vector_fmul_reverse = vector_fmul_reverse_c; | fdsp->vector_fmul_reverse = vector_fmul_reverse_c; | ||||
| fdsp->butterflies_float = butterflies_float_c; | fdsp->butterflies_float = butterflies_float_c; | ||||
| fdsp->scalarproduct_float = avpriv_scalarproduct_float_c; | |||||
| #if ARCH_ARM | #if ARCH_ARM | ||||
| ff_float_dsp_init_arm(fdsp); | ff_float_dsp_init_arm(fdsp); | ||||
| @@ -146,8 +146,30 @@ typedef struct AVFloatDSPContext { | |||||
| * @param len length of vectors, multiple of 4 | * @param len length of vectors, multiple of 4 | ||||
| */ | */ | ||||
| void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | ||||
| /** | |||||
| * Calculate the scalar product of two vectors of floats. | |||||
| * | |||||
| * @param v1 first vector, 16-byte aligned | |||||
| * @param v2 second vector, 16-byte aligned | |||||
| * @param len length of vectors, multiple of 4 | |||||
| * | |||||
| * @return sum of elementwise products | |||||
| */ | |||||
| float (*scalarproduct_float)(const float *v1, const float *v2, int len); | |||||
| } AVFloatDSPContext; | } AVFloatDSPContext; | ||||
| /** | |||||
| * Return the scalar product of two vectors. | |||||
| * | |||||
| * @param v1 first input vector | |||||
| * @param v2 first input vector | |||||
| * @param len number of elements | |||||
| * | |||||
| * @return sum of elementwise products | |||||
| */ | |||||
| float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); | |||||
| /** | /** | ||||
| * Initialize a float DSP context. | * Initialize a float DSP context. | ||||
| * | * | ||||
| @@ -227,3 +227,28 @@ INIT_XMM sse | |||||
| VECTOR_FMUL_REVERSE | VECTOR_FMUL_REVERSE | ||||
| INIT_YMM avx | INIT_YMM avx | ||||
| VECTOR_FMUL_REVERSE | VECTOR_FMUL_REVERSE | ||||
| ; float scalarproduct_float_sse(const float *v1, const float *v2, int len) | |||||
| INIT_XMM sse | |||||
| cglobal scalarproduct_float, 3,3,2, v1, v2, offset | |||||
| neg offsetq | |||||
| shl offsetq, 2 | |||||
| sub v1q, offsetq | |||||
| sub v2q, offsetq | |||||
| xorps xmm0, xmm0 | |||||
| .loop: | |||||
| movaps xmm1, [v1q+offsetq] | |||||
| mulps xmm1, [v2q+offsetq] | |||||
| addps xmm0, xmm1 | |||||
| add offsetq, 16 | |||||
| js .loop | |||||
| movhlps xmm1, xmm0 | |||||
| addps xmm0, xmm1 | |||||
| movss xmm1, xmm0 | |||||
| shufps xmm0, xmm0, 1 | |||||
| addss xmm0, xmm1 | |||||
| %if ARCH_X86_64 == 0 | |||||
| movss r0m, xmm0 | |||||
| fld dword r0m | |||||
| %endif | |||||
| RET | |||||
| @@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0, | |||||
| void ff_vector_fmul_reverse_avx(float *dst, const float *src0, | void ff_vector_fmul_reverse_avx(float *dst, const float *src0, | ||||
| const float *src1, int len); | const float *src1, int len); | ||||
| float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); | |||||
| #if HAVE_6REGS && HAVE_INLINE_ASM | #if HAVE_6REGS && HAVE_INLINE_ASM | ||||
| static void vector_fmul_window_3dnowext(float *dst, const float *src0, | static void vector_fmul_window_3dnowext(float *dst, const float *src0, | ||||
| const float *src1, const float *win, | const float *src1, const float *win, | ||||
| @@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) | |||||
| fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; | fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; | ||||
| fdsp->vector_fmul_add = ff_vector_fmul_add_sse; | fdsp->vector_fmul_add = ff_vector_fmul_add_sse; | ||||
| fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; | fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; | ||||
| fdsp->scalarproduct_float = ff_scalarproduct_float_sse; | |||||
| } | } | ||||
| if (EXTERNAL_SSE2(mm_flags)) { | if (EXTERNAL_SSE2(mm_flags)) { | ||||
| fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; | fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; | ||||