They were superseded with their integer equivalents. Rename integer decode_hf to decode_hf.tags/n3.0
| @@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct, | |||
| float out[32], const float in[32], | |||
| float scale); | |||
| void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], | |||
| const int32_t vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int32_t scale[DCA_SUBBANDS][2], | |||
| intptr_t start, intptr_t end); | |||
| av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| @@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) | |||
| if (have_neon(cpu_flags)) { | |||
| s->lfe_fir[0] = ff_dca_lfe_fir0_neon; | |||
| s->lfe_fir[1] = ff_dca_lfe_fir1_neon; | |||
| s->decode_hf = ff_decode_hf_neon; | |||
| } | |||
| } | |||
| @@ -21,66 +21,6 @@ | |||
| #include "libavutil/aarch64/asm.S" | |||
| function ff_decode_hf_neon, export=1 | |||
| add x2, x2, x3 | |||
| add x0, x0, x5, lsl #5 | |||
| add x1, x1, x5, lsl #2 | |||
| add x4, x4, x5, lsl #3 | |||
| sub x6, x6, x5 | |||
| ldr w7, [x1], #4 | |||
| add x7, x2, x7, lsl #5 | |||
| subs x6, x6, #1 | |||
| b.eq 1f | |||
| b.gt 2f | |||
| ret | |||
| 2: | |||
| ldr w8, [x1], #4 | |||
| subs x6, x6, #2 | |||
| add x8, x2, x8, lsl #5 | |||
| ld1 {v2.4s}, [x4], #16 | |||
| ld1 {v0.8b}, [x7] | |||
| ld1 {v4.8b}, [x8] | |||
| sxtl v3.8h, v0.8b | |||
| sxtl v7.8h, v4.8b | |||
| scvtf v2.4s, v2.4s, #4 | |||
| sxtl v0.4s, v3.4h | |||
| sxtl2 v1.4s, v3.8h | |||
| sxtl v4.4s, v7.4h | |||
| sxtl2 v5.4s, v7.8h | |||
| scvtf v0.4s, v0.4s | |||
| scvtf v1.4s, v1.4s | |||
| scvtf v4.4s, v4.4s | |||
| scvtf v5.4s, v5.4s | |||
| fmul v0.4s, v0.4s, v2.s[0] | |||
| fmul v1.4s, v1.4s, v2.s[0] | |||
| fmul v4.4s, v4.4s, v2.s[2] | |||
| fmul v5.4s, v5.4s, v2.s[2] | |||
| b.lt 10f | |||
| ldr w7, [x1], #4 | |||
| add x7, x2, x7, lsl #5 | |||
| st1 {v0.4s,v1.4s}, [x0], #32 | |||
| st1 {v4.4s,v5.4s}, [x0], #32 | |||
| b.gt 2b | |||
| 1: | |||
| ldr w9, [x4] | |||
| ld1 {v0.8b}, [x7] | |||
| scvtf s2, w9, #4 | |||
| sxtl v3.8h, v0.8b | |||
| sxtl v0.4s, v3.4h | |||
| sxtl2 v1.4s, v3.8h | |||
| scvtf v0.4s, v0.4s | |||
| scvtf v1.4s, v1.4s | |||
| fmul v0.4s, v0.4s, v2.s[0] | |||
| fmul v1.4s, v1.4s, v2.s[0] | |||
| st1 {v0.4s,v1.4s}, [x0] | |||
| ret | |||
| 10: | |||
| st1 {v0.4s,v1.4s}, [x0], #32 | |||
| st1 {v4.4s,v5.4s}, [x0] | |||
| ret | |||
| endfunc | |||
| function ff_dca_lfe_fir0_neon, export=1 | |||
| mov x3, #32 // decifactor | |||
| sub x1, x1, #7*4 | |||
| @@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct, | |||
| float out[32], const float in[32], | |||
| float scale); | |||
| void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], | |||
| const int32_t vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int32_t scale[DCA_SUBBANDS][2], | |||
| intptr_t start, intptr_t end); | |||
| av_cold void ff_dcadsp_init_arm(DCADSPContext *s) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| @@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) | |||
| if (have_neon(cpu_flags)) { | |||
| s->lfe_fir[0] = ff_dca_lfe_fir0_neon; | |||
| s->lfe_fir[1] = ff_dca_lfe_fir1_neon; | |||
| s->decode_hf = ff_decode_hf_neon; | |||
| } | |||
| } | |||
| @@ -20,35 +20,6 @@ | |||
| #include "libavutil/arm/asm.S" | |||
| function ff_decode_hf_neon, export=1 | |||
| push {r4-r5,lr} | |||
| add r2, r2, r3 | |||
| ldr r3, [sp, #12] | |||
| ldrd r4, r5, [sp, #16] | |||
| add r3, r3, r4, lsl #3 | |||
| add r1, r1, r4, lsl #2 | |||
| add r0, r0, r4, lsl #5 | |||
| 1: ldr_post lr, r1, #4 | |||
| add r4, r4, #1 | |||
| add lr, r2, lr, lsl #5 | |||
| cmp r4, r5 | |||
| vld1.32 {d7}, [r3]! | |||
| vld1.8 {d0}, [lr,:64] | |||
| vcvt.f32.s32 d7, d7, #4 | |||
| vmovl.s8 q1, d0 | |||
| vmovl.s16 q0, d2 | |||
| vmovl.s16 q1, d3 | |||
| vcvt.f32.s32 q0, q0 | |||
| vcvt.f32.s32 q1, q1 | |||
| vmul.f32 q0, q0, d7[0] | |||
| vmul.f32 q1, q1, d7[0] | |||
| vst1.32 {q0-q1}, [r0,:128]! | |||
| bne 1b | |||
| pop {r4-r5,pc} | |||
| endfunc | |||
| function ff_dca_lfe_fir0_neon, export=1 | |||
| push {r4-r6,lr} | |||
| mov r3, #32 @ decifactor | |||
| @@ -4187,13 +4187,6 @@ const uint32_t ff_dca_lossy_quant[32] = { | |||
| 84, 42, 21, 0, 0, 0, 0, 0 | |||
| }; | |||
| const float ff_dca_lossy_quant_d[32] = { | |||
| 0, 1.6, 1.0, 0.8, 0.59, 0.50, 0.42, 0.34, | |||
| 0.19, 0.11, 0.06, 0.035, 0.019, 0.011, 0.0065, 0.0040, | |||
| 0.0025, 0.0014, 0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004, | |||
| 0.00002, 0.00001, 0.000005, 0, 0, 0, 0, 0 | |||
| }; | |||
| /* 20bits unsigned fractional binary codes */ | |||
| const uint32_t ff_dca_lossless_quant[32] = { | |||
| 0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127, | |||
| @@ -4202,14 +4195,6 @@ const uint32_t ff_dca_lossless_quant[32] = { | |||
| 4, 2, 1, 0, 0, 0, 0, 0 | |||
| }; | |||
| const float ff_dca_lossless_quant_d[32] = { | |||
| 0, 1.0, 0.5, 0.33, 0.25, 0.166, 0.125, | |||
| 0.083, 0.0625, 0.03125, 0.0156, 7.874E-3, 3.922E-3, 1.957E-3, | |||
| 9.775E-4, 4.885E-4, 2.442E-4, 1.221E-4, 6.104E-5, 3.052E-5, 1.526E-5, | |||
| 7.629E-6, 3.815E-6, 1.907E-6, 9.537E-7, 4.768E-7, 2.384E-7, 0, | |||
| 0, 0, 0, 0 | |||
| }; | |||
| /* Vector quantization tables */ | |||
| DECLARE_ALIGNED(8, const int8_t, ff_dca_high_freq_vq)[1024][32] = { | |||
| @@ -35,10 +35,8 @@ extern const uint32_t ff_dca_scale_factor_quant6[64]; | |||
| extern const uint32_t ff_dca_scale_factor_quant7[128]; | |||
| extern const uint32_t ff_dca_lossy_quant[32]; | |||
| extern const float ff_dca_lossy_quant_d[32]; | |||
| extern const uint32_t ff_dca_lossless_quant[32]; | |||
| extern const float ff_dca_lossless_quant_d[32]; | |||
| extern const int8_t ff_dca_high_freq_vq[1024][32]; | |||
| @@ -913,12 +913,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) | |||
| s->debug_flag |= 0x01; | |||
| } | |||
| s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq, | |||
| ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND, | |||
| s->dca_chan[k].scale_factor, | |||
| s->audio_header.vq_start_subband[k], | |||
| s->audio_header.subband_activity[k]); | |||
| s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq, | |||
| ff_dca_high_freq_vq, | |||
| subsubframe * SAMPLES_PER_SUBBAND, | |||
| s->dca_chan[k].scale_factor, | |||
| s->audio_header.vq_start_subband[k], | |||
| s->audio_header.subband_activity[k]); | |||
| } | |||
| } | |||
| @@ -27,29 +27,11 @@ | |||
| #include "dcadsp.h" | |||
| #include "dcamath.h" | |||
| static void decode_hf_c(float dst[DCA_SUBBANDS][8], | |||
| static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8], | |||
| const int32_t vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int32_t scale[DCA_SUBBANDS][2], | |||
| intptr_t start, intptr_t end) | |||
| { | |||
| int i, l; | |||
| for (l = start; l < end; l++) { | |||
| /* 1 vector -> 32 samples but we only need the 8 samples | |||
| * for this subsubframe. */ | |||
| const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset]; | |||
| float fscale = scale[l][0] * (1 / 16.0); | |||
| for (i = 0; i < 8; i++) | |||
| dst[l][i] = ptr[i] * fscale; | |||
| } | |||
| } | |||
| static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8], | |||
| const int32_t vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int32_t scale[DCA_SUBBANDS][2], | |||
| intptr_t start, intptr_t end) | |||
| { | |||
| int i, j; | |||
| @@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) | |||
| s->lfe_fir[1] = dca_lfe_fir1_c; | |||
| s->qmf_32_subbands = dca_qmf_32_subbands; | |||
| s->decode_hf = decode_hf_c; | |||
| s->decode_hf_int = decode_hf_int_c; | |||
| s->dequantize = dequantize_c; | |||
| if (ARCH_AARCH64) | |||
| @@ -32,17 +32,12 @@ typedef struct DCADSPContext { | |||
| int *synth_buf_offset, float synth_buf2[32], | |||
| const float window[512], float *samples_out, | |||
| float raXin[32], float scale); | |||
| void (*decode_hf)(float dst[DCA_SUBBANDS][8], | |||
| void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8], | |||
| const int32_t vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int32_t scale[DCA_SUBBANDS][2], | |||
| intptr_t start, intptr_t end); | |||
| void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8], | |||
| const int32_t vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int32_t scale[DCA_SUBBANDS][2], | |||
| intptr_t start, intptr_t end); | |||
| void (*dequantize)(int32_t *samples, uint32_t step_size, uint64_t scale); | |||
| void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale); | |||
| } DCADSPContext; | |||
| void ff_dcadsp_init(DCADSPContext *s); | |||
| @@ -26,92 +26,6 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16 | |||
| SECTION .text | |||
| ; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS], | |||
| ; const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| ; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end) | |||
| %macro DECODE_HF 0 | |||
| cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end | |||
| lea srcq, [srcq + offsetq] | |||
| shl startq, 2 | |||
| mov offsetd, endm | |||
| %define DICT offsetq | |||
| shl offsetq, 2 | |||
| mov endm, offsetq | |||
| .loop: | |||
| %if ARCH_X86_64 | |||
| mov offsetd, [scaleq + 2 * startq] | |||
| cvtsi2ss m0, offsetd | |||
| %else | |||
| cvtsi2ss m0, [scaleq + 2 * startq] | |||
| %endif | |||
| mov offsetd, [numq + startq] | |||
| mulss m0, [pf_inv16] | |||
| shl DICT, 5 | |||
| shufps m0, m0, 0 | |||
| %if cpuflag(sse2) | |||
| %if cpuflag(sse4) | |||
| pmovsxbd m1, [srcq + DICT + 0] | |||
| pmovsxbd m2, [srcq + DICT + 4] | |||
| %else | |||
| movq m1, [srcq + DICT] | |||
| punpcklbw m1, m1 | |||
| mova m2, m1 | |||
| punpcklwd m1, m1 | |||
| punpckhwd m2, m2 | |||
| psrad m1, 24 | |||
| psrad m2, 24 | |||
| %endif | |||
| cvtdq2ps m1, m1 | |||
| cvtdq2ps m2, m2 | |||
| %else | |||
| movd mm0, [srcq + DICT + 0] | |||
| movd mm1, [srcq + DICT + 4] | |||
| punpcklbw mm0, mm0 | |||
| punpcklbw mm1, mm1 | |||
| movq mm2, mm0 | |||
| movq mm3, mm1 | |||
| punpcklwd mm0, mm0 | |||
| punpcklwd mm1, mm1 | |||
| punpckhwd mm2, mm2 | |||
| punpckhwd mm3, mm3 | |||
| psrad mm0, 24 | |||
| psrad mm1, 24 | |||
| psrad mm2, 24 | |||
| psrad mm3, 24 | |||
| cvtpi2ps m1, mm0 | |||
| cvtpi2ps m2, mm1 | |||
| cvtpi2ps m3, mm2 | |||
| cvtpi2ps m4, mm3 | |||
| shufps m0, m0, 0 | |||
| shufps m1, m3, q1010 | |||
| shufps m2, m4, q1010 | |||
| %endif | |||
| mulps m1, m0 | |||
| mulps m2, m0 | |||
| mova [dstq + 8 * startq + 0], m1 | |||
| mova [dstq + 8 * startq + 16], m2 | |||
| add startq, 4 | |||
| cmp startq, endm | |||
| jl .loop | |||
| .end: | |||
| %if notcpuflag(sse2) | |||
| emms | |||
| %endif | |||
| REP_RET | |||
| %endmacro | |||
| %if ARCH_X86_32 | |||
| INIT_XMM sse | |||
| DECODE_HF | |||
| %endif | |||
| INIT_XMM sse2 | |||
| DECODE_HF | |||
| INIT_XMM sse4 | |||
| DECODE_HF | |||
| ; %1=v0/v1 %2=in1 %3=in2 | |||
| %macro FIR_LOOP 2-3 | |||
| .loop%1: | |||
| @@ -23,15 +23,6 @@ | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/dcadsp.h" | |||
| void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); | |||
| void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); | |||
| void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], | |||
| const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
| int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); | |||
| void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs); | |||
| void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs); | |||
| @@ -40,20 +31,9 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (EXTERNAL_SSE(cpu_flags)) { | |||
| #if ARCH_X86_32 | |||
| s->decode_hf = ff_decode_hf_sse; | |||
| #endif | |||
| s->lfe_fir[0] = ff_dca_lfe_fir0_sse; | |||
| s->lfe_fir[1] = ff_dca_lfe_fir1_sse; | |||
| } | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| s->decode_hf = ff_decode_hf_sse2; | |||
| } | |||
| if (EXTERNAL_SSE4(cpu_flags)) { | |||
| s->decode_hf = ff_decode_hf_sse4; | |||
| } | |||
| } | |||
| @@ -75,16 +75,6 @@ | |||
| } \ | |||
| } while (0) | |||
| #define randomize_decode_hf() \ | |||
| do { \ | |||
| int i; \ | |||
| for (i = 0; i < DCA_SUBBANDS; i++) { \ | |||
| vq_num[i] = rnd() >> 22; \ | |||
| scale[i][0] = rnd() >> 26; \ | |||
| scale[i][1] = INT32_MIN; \ | |||
| } \ | |||
| } while (0) | |||
| void checkasm_check_dcadsp(void) | |||
| { | |||
| DCADSPContext c; | |||
| @@ -98,40 +88,5 @@ void checkasm_check_dcadsp(void) | |||
| if (check_func(c.lfe_fir[1], "dca_lfe_fir1")) | |||
| check_lfe_fir(64, 1.0e-6f); | |||
| if (check_func(c.decode_hf, "dca_decode_hf")) { | |||
| LOCAL_ALIGNED_16(float, dst0, [DCA_SUBBANDS], [8]); | |||
| LOCAL_ALIGNED_16(float, dst1, [DCA_SUBBANDS], [8]); | |||
| LOCAL_ALIGNED_16(int32_t, scale, [DCA_SUBBANDS], [2]); | |||
| LOCAL_ALIGNED_16(int32_t, vq_num, [DCA_SUBBANDS]); | |||
| intptr_t start, end = 32, offset; | |||
| declare_func(void, float[DCA_SUBBANDS][8], const int32_t[DCA_SUBBANDS], | |||
| const int8_t[1024][DCA_SUBBANDS], intptr_t, int32_t[DCA_SUBBANDS][2], | |||
| intptr_t, intptr_t); | |||
| for (start = 0; start < 32; start++) { | |||
| for (offset = 0; offset < 32; offset += 8) { | |||
| int j; | |||
| for (j = 0; j < DCA_SUBBANDS; j++) { | |||
| memset(dst0[j], 0, sizeof(*(dst0[j])) * 8); | |||
| memset(dst1[j], 0, sizeof(*(dst1[j])) * 8); | |||
| } | |||
| randomize_decode_hf(); | |||
| call_ref(dst0, vq_num, ff_dca_high_freq_vq, offset, scale, start, end); | |||
| call_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end); | |||
| for (j = 0; j < 8 * DCA_SUBBANDS; j++) { | |||
| if (!float_near_ulp(dst0[j>>3][j&7], dst1[j>>3][j&7], 1)) { | |||
| fail(); | |||
| break; | |||
| } | |||
| } | |||
| bench_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end); | |||
| } | |||
| } | |||
| } | |||
| report("dcadsp"); | |||
| } | |||