| @@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0, | |||
| const float *src1, const float *win, int len); | |||
| void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, | |||
| int len); | |||
| void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, | |||
| int len); | |||
| void ff_butterflies_float_neon(float *v1, float *v2, int len); | |||
| float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); | |||
| void ff_vector_fmul_reverse_neon(float *dst, const float *src0, | |||
| @@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||
| c->vector_fmul_window = ff_vector_fmul_window_neon; | |||
| c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; | |||
| c->vector_fmac_scalar = ff_vector_fmac_scalar_neon; | |||
| c->butterflies_float = ff_butterflies_float_neon; | |||
| c->scalarproduct_float = ff_scalarproduct_float_neon; | |||
| c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; | |||
| @@ -682,54 +682,6 @@ NOVFP vdup.32 q8, r2 | |||
| .unreq len | |||
| endfunc | |||
| function ff_vector_fmac_scalar_neon, export=1 | |||
| VFP len .req r2 | |||
| VFP acc .req r3 | |||
| NOVFP len .req r3 | |||
| NOVFP acc .req r2 | |||
| VFP vdup.32 q15, d0[0] | |||
| NOVFP vdup.32 q15, r2 | |||
| bics r12, len, #15 | |||
| mov acc, r0 | |||
| beq 3f | |||
| vld1.32 {q0}, [r1,:128]! | |||
| vld1.32 {q8}, [acc,:128]! | |||
| vld1.32 {q1}, [r1,:128]! | |||
| vld1.32 {q9}, [acc,:128]! | |||
| 1: vmla.f32 q8, q0, q15 | |||
| vld1.32 {q2}, [r1,:128]! | |||
| vld1.32 {q10}, [acc,:128]! | |||
| vmla.f32 q9, q1, q15 | |||
| vld1.32 {q3}, [r1,:128]! | |||
| vld1.32 {q11}, [acc,:128]! | |||
| vmla.f32 q10, q2, q15 | |||
| vst1.32 {q8}, [r0,:128]! | |||
| vmla.f32 q11, q3, q15 | |||
| vst1.32 {q9}, [r0,:128]! | |||
| subs r12, r12, #16 | |||
| beq 2f | |||
| vld1.32 {q0}, [r1,:128]! | |||
| vld1.32 {q8}, [acc,:128]! | |||
| vst1.32 {q10}, [r0,:128]! | |||
| vld1.32 {q1}, [r1,:128]! | |||
| vld1.32 {q9}, [acc,:128]! | |||
| vst1.32 {q11}, [r0,:128]! | |||
| b 1b | |||
| 2: vst1.32 {q10}, [r0,:128]! | |||
| vst1.32 {q11}, [r0,:128]! | |||
| ands len, len, #15 | |||
| it eq | |||
| bxeq lr | |||
| 3: vld1.32 {q0}, [r1,:128]! | |||
| vld1.32 {q8}, [acc,:128]! | |||
| vmla.f32 q8, q0, q15 | |||
| vst1.32 {q8}, [r0,:128]! | |||
| subs len, len, #4 | |||
| bgt 3b | |||
| bx lr | |||
| .unreq len | |||
| endfunc | |||
| function ff_butterflies_float_neon, export=1 | |||
| 1: vld1.32 {q0},[r0,:128] | |||
| vld1.32 {q1},[r1,:128] | |||
| @@ -27,6 +27,7 @@ | |||
| #include <stdio.h> | |||
| #include "libavutil/common.h" | |||
| #include "libavutil/float_dsp.h" | |||
| #include "libavutil/intmath.h" | |||
| #include "libavutil/intreadwrite.h" | |||
| #include "libavutil/mathematics.h" | |||
| @@ -383,7 +384,7 @@ typedef struct { | |||
| int profile; | |||
| int debug_flag; ///< used for suppressing repeated error messages output | |||
| DSPContext dsp; | |||
| AVFloatDSPContext fdsp; | |||
| FFTContext imdct; | |||
| SynthFilterContext synth; | |||
| DCADSPContext dcadsp; | |||
| @@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data, | |||
| float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256; | |||
| float *lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256; | |||
| float *rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256; | |||
| s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256); | |||
| s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256); | |||
| s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256); | |||
| s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256); | |||
| } | |||
| if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) { | |||
| @@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx) | |||
| s->avctx = avctx; | |||
| dca_init_vlcs(); | |||
| ff_dsputil_init(&s->dsp, avctx); | |||
| avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | |||
| ff_mdct_init(&s->imdct, 6, 1, 1.0); | |||
| ff_synth_filter_init(&s->synth); | |||
| ff_dcadsp_init(&s->dcadsp); | |||
| @@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul, | |||
| dst[i] = src[i] * mul; | |||
| } | |||
| static void vector_fmac_scalar_c(float *dst, const float *src, float mul, | |||
| int len) | |||
| { | |||
| int i; | |||
| for (i = 0; i < len; i++) | |||
| dst[i] += src[i] * mul; | |||
| } | |||
| static void butterflies_float_c(float *restrict v1, float *restrict v2, | |||
| int len) | |||
| { | |||
| @@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
| c->butterflies_float = butterflies_float_c; | |||
| c->butterflies_float_interleave = butterflies_float_interleave_c; | |||
| c->vector_fmul_scalar = vector_fmul_scalar_c; | |||
| c->vector_fmac_scalar = vector_fmac_scalar_c; | |||
| c->shrink[0]= av_image_copy_plane; | |||
| c->shrink[1]= ff_shrink22; | |||
| @@ -416,17 +416,6 @@ typedef struct DSPContext { | |||
| */ | |||
| void (*vector_fmul_scalar)(float *dst, const float *src, float mul, | |||
| int len); | |||
| /** | |||
| * Multiply a vector of floats by a scalar float and add to | |||
| * destination vector. Source and destination vectors must | |||
| * overlap exactly or not at all. | |||
| * @param dst result vector, 16-byte aligned | |||
| * @param src input vector, 16-byte aligned | |||
| * @param mul scalar value | |||
| * @param len length of vector, multiple of 4 | |||
| */ | |||
| void (*vector_fmac_scalar)(float *dst, const float *src, float mul, | |||
| int len); | |||
| /** | |||
| * Calculate the scalar product of two vectors of floats. | |||
| * @param v1 first vector, 16-byte aligned | |||
| @@ -26,7 +26,11 @@ | |||
| void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); | |||
| void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, | |||
| int len); | |||
| void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | |||
| { | |||
| fdsp->vector_fmul = ff_vector_fmul_neon; | |||
| fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; | |||
| } | |||
| @@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1 | |||
| 3: vst1.32 {d16-d19},[r0,:128]! | |||
| bx lr | |||
| endfunc | |||
| function ff_vector_fmac_scalar_neon, export=1 | |||
| VFP len .req r2 | |||
| VFP acc .req r3 | |||
| NOVFP len .req r3 | |||
| NOVFP acc .req r2 | |||
| VFP vdup.32 q15, d0[0] | |||
| NOVFP vdup.32 q15, r2 | |||
| bics r12, len, #15 | |||
| mov acc, r0 | |||
| beq 3f | |||
| vld1.32 {q0}, [r1,:128]! | |||
| vld1.32 {q8}, [acc,:128]! | |||
| vld1.32 {q1}, [r1,:128]! | |||
| vld1.32 {q9}, [acc,:128]! | |||
| 1: vmla.f32 q8, q0, q15 | |||
| vld1.32 {q2}, [r1,:128]! | |||
| vld1.32 {q10}, [acc,:128]! | |||
| vmla.f32 q9, q1, q15 | |||
| vld1.32 {q3}, [r1,:128]! | |||
| vld1.32 {q11}, [acc,:128]! | |||
| vmla.f32 q10, q2, q15 | |||
| vst1.32 {q8}, [r0,:128]! | |||
| vmla.f32 q11, q3, q15 | |||
| vst1.32 {q9}, [r0,:128]! | |||
| subs r12, r12, #16 | |||
| beq 2f | |||
| vld1.32 {q0}, [r1,:128]! | |||
| vld1.32 {q8}, [acc,:128]! | |||
| vst1.32 {q10}, [r0,:128]! | |||
| vld1.32 {q1}, [r1,:128]! | |||
| vld1.32 {q9}, [acc,:128]! | |||
| vst1.32 {q11}, [r0,:128]! | |||
| b 1b | |||
| 2: vst1.32 {q10}, [r0,:128]! | |||
| vst1.32 {q11}, [r0,:128]! | |||
| ands len, len, #15 | |||
| it eq | |||
| bxeq lr | |||
| 3: vld1.32 {q0}, [r1,:128]! | |||
| vld1.32 {q8}, [acc,:128]! | |||
| vmla.f32 q8, q0, q15 | |||
| vst1.32 {q8}, [r0,:128]! | |||
| subs len, len, #4 | |||
| bgt 3b | |||
| bx lr | |||
| .unreq len | |||
| endfunc | |||
| @@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1, | |||
| dst[i] = src0[i] * src1[i]; | |||
| } | |||
| static void vector_fmac_scalar_c(float *dst, const float *src, float mul, | |||
| int len) | |||
| { | |||
| int i; | |||
| for (i = 0; i < len; i++) | |||
| dst[i] += src[i] * mul; | |||
| } | |||
| void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | |||
| { | |||
| fdsp->vector_fmul = vector_fmul_c; | |||
| fdsp->vector_fmac_scalar = vector_fmac_scalar_c; | |||
| #if ARCH_ARM | |||
| ff_float_dsp_init_arm(fdsp); | |||
| @@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext { | |||
| */ | |||
| void (*vector_fmul)(float *dst, const float *src0, const float *src1, | |||
| int len); | |||
| /** | |||
| * Multiply a vector of floats by a scalar float and add to | |||
| * destination vector. Source and destination vectors must | |||
| * overlap exactly or not at all. | |||
| * | |||
| * @param dst result vector | |||
| * constraints: 16-byte aligned | |||
| * @param src input vector | |||
| * constraints: 16-byte aligned | |||
| * @param mul scalar value | |||
| * @param len length of vector | |||
| * constraints: multiple of 4 | |||
| */ | |||
| void (*vector_fmac_scalar)(float *dst, const float *src, float mul, | |||
| int len); | |||
| } AVFloatDSPContext; | |||
| /** | |||