They were superseded with their integer equivalents. Rename integer decode_hf to decode_hf.tags/n3.0
@@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct, | |||
float out[32], const float in[32], | |||
float scale); | |||
void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], | |||
const int32_t vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int32_t scale[DCA_SUBBANDS][2], | |||
intptr_t start, intptr_t end); | |||
av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) | |||
{ | |||
int cpu_flags = av_get_cpu_flags(); | |||
@@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) | |||
if (have_neon(cpu_flags)) { | |||
s->lfe_fir[0] = ff_dca_lfe_fir0_neon; | |||
s->lfe_fir[1] = ff_dca_lfe_fir1_neon; | |||
s->decode_hf = ff_decode_hf_neon; | |||
} | |||
} | |||
@@ -21,66 +21,6 @@ | |||
#include "libavutil/aarch64/asm.S" | |||
function ff_decode_hf_neon, export=1 | |||
add x2, x2, x3 | |||
add x0, x0, x5, lsl #5 | |||
add x1, x1, x5, lsl #2 | |||
add x4, x4, x5, lsl #3 | |||
sub x6, x6, x5 | |||
ldr w7, [x1], #4 | |||
add x7, x2, x7, lsl #5 | |||
subs x6, x6, #1 | |||
b.eq 1f | |||
b.gt 2f | |||
ret | |||
2: | |||
ldr w8, [x1], #4 | |||
subs x6, x6, #2 | |||
add x8, x2, x8, lsl #5 | |||
ld1 {v2.4s}, [x4], #16 | |||
ld1 {v0.8b}, [x7] | |||
ld1 {v4.8b}, [x8] | |||
sxtl v3.8h, v0.8b | |||
sxtl v7.8h, v4.8b | |||
scvtf v2.4s, v2.4s, #4 | |||
sxtl v0.4s, v3.4h | |||
sxtl2 v1.4s, v3.8h | |||
sxtl v4.4s, v7.4h | |||
sxtl2 v5.4s, v7.8h | |||
scvtf v0.4s, v0.4s | |||
scvtf v1.4s, v1.4s | |||
scvtf v4.4s, v4.4s | |||
scvtf v5.4s, v5.4s | |||
fmul v0.4s, v0.4s, v2.s[0] | |||
fmul v1.4s, v1.4s, v2.s[0] | |||
fmul v4.4s, v4.4s, v2.s[2] | |||
fmul v5.4s, v5.4s, v2.s[2] | |||
b.lt 10f | |||
ldr w7, [x1], #4 | |||
add x7, x2, x7, lsl #5 | |||
st1 {v0.4s,v1.4s}, [x0], #32 | |||
st1 {v4.4s,v5.4s}, [x0], #32 | |||
b.gt 2b | |||
1: | |||
ldr w9, [x4] | |||
ld1 {v0.8b}, [x7] | |||
scvtf s2, w9, #4 | |||
sxtl v3.8h, v0.8b | |||
sxtl v0.4s, v3.4h | |||
sxtl2 v1.4s, v3.8h | |||
scvtf v0.4s, v0.4s | |||
scvtf v1.4s, v1.4s | |||
fmul v0.4s, v0.4s, v2.s[0] | |||
fmul v1.4s, v1.4s, v2.s[0] | |||
st1 {v0.4s,v1.4s}, [x0] | |||
ret | |||
10: | |||
st1 {v0.4s,v1.4s}, [x0], #32 | |||
st1 {v4.4s,v5.4s}, [x0] | |||
ret | |||
endfunc | |||
function ff_dca_lfe_fir0_neon, export=1 | |||
mov x3, #32 // decifactor | |||
sub x1, x1, #7*4 | |||
@@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct, | |||
float out[32], const float in[32], | |||
float scale); | |||
void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], | |||
const int32_t vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int32_t scale[DCA_SUBBANDS][2], | |||
intptr_t start, intptr_t end); | |||
av_cold void ff_dcadsp_init_arm(DCADSPContext *s) | |||
{ | |||
int cpu_flags = av_get_cpu_flags(); | |||
@@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) | |||
if (have_neon(cpu_flags)) { | |||
s->lfe_fir[0] = ff_dca_lfe_fir0_neon; | |||
s->lfe_fir[1] = ff_dca_lfe_fir1_neon; | |||
s->decode_hf = ff_decode_hf_neon; | |||
} | |||
} | |||
@@ -20,35 +20,6 @@ | |||
#include "libavutil/arm/asm.S" | |||
function ff_decode_hf_neon, export=1 | |||
push {r4-r5,lr} | |||
add r2, r2, r3 | |||
ldr r3, [sp, #12] | |||
ldrd r4, r5, [sp, #16] | |||
add r3, r3, r4, lsl #3 | |||
add r1, r1, r4, lsl #2 | |||
add r0, r0, r4, lsl #5 | |||
1: ldr_post lr, r1, #4 | |||
add r4, r4, #1 | |||
add lr, r2, lr, lsl #5 | |||
cmp r4, r5 | |||
vld1.32 {d7}, [r3]! | |||
vld1.8 {d0}, [lr,:64] | |||
vcvt.f32.s32 d7, d7, #4 | |||
vmovl.s8 q1, d0 | |||
vmovl.s16 q0, d2 | |||
vmovl.s16 q1, d3 | |||
vcvt.f32.s32 q0, q0 | |||
vcvt.f32.s32 q1, q1 | |||
vmul.f32 q0, q0, d7[0] | |||
vmul.f32 q1, q1, d7[0] | |||
vst1.32 {q0-q1}, [r0,:128]! | |||
bne 1b | |||
pop {r4-r5,pc} | |||
endfunc | |||
function ff_dca_lfe_fir0_neon, export=1 | |||
push {r4-r6,lr} | |||
mov r3, #32 @ decifactor | |||
@@ -4187,13 +4187,6 @@ const uint32_t ff_dca_lossy_quant[32] = { | |||
84, 42, 21, 0, 0, 0, 0, 0 | |||
}; | |||
const float ff_dca_lossy_quant_d[32] = { | |||
0, 1.6, 1.0, 0.8, 0.59, 0.50, 0.42, 0.34, | |||
0.19, 0.11, 0.06, 0.035, 0.019, 0.011, 0.0065, 0.0040, | |||
0.0025, 0.0014, 0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004, | |||
0.00002, 0.00001, 0.000005, 0, 0, 0, 0, 0 | |||
}; | |||
/* 20bits unsigned fractional binary codes */ | |||
const uint32_t ff_dca_lossless_quant[32] = { | |||
0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127, | |||
@@ -4202,14 +4195,6 @@ const uint32_t ff_dca_lossless_quant[32] = { | |||
4, 2, 1, 0, 0, 0, 0, 0 | |||
}; | |||
const float ff_dca_lossless_quant_d[32] = { | |||
0, 1.0, 0.5, 0.33, 0.25, 0.166, 0.125, | |||
0.083, 0.0625, 0.03125, 0.0156, 7.874E-3, 3.922E-3, 1.957E-3, | |||
9.775E-4, 4.885E-4, 2.442E-4, 1.221E-4, 6.104E-5, 3.052E-5, 1.526E-5, | |||
7.629E-6, 3.815E-6, 1.907E-6, 9.537E-7, 4.768E-7, 2.384E-7, 0, | |||
0, 0, 0, 0 | |||
}; | |||
/* Vector quantization tables */ | |||
DECLARE_ALIGNED(8, const int8_t, ff_dca_high_freq_vq)[1024][32] = { | |||
@@ -35,10 +35,8 @@ extern const uint32_t ff_dca_scale_factor_quant6[64]; | |||
extern const uint32_t ff_dca_scale_factor_quant7[128]; | |||
extern const uint32_t ff_dca_lossy_quant[32]; | |||
extern const float ff_dca_lossy_quant_d[32]; | |||
extern const uint32_t ff_dca_lossless_quant[32]; | |||
extern const float ff_dca_lossless_quant_d[32]; | |||
extern const int8_t ff_dca_high_freq_vq[1024][32]; | |||
@@ -913,12 +913,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) | |||
s->debug_flag |= 0x01; | |||
} | |||
s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq, | |||
ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND, | |||
s->dca_chan[k].scale_factor, | |||
s->audio_header.vq_start_subband[k], | |||
s->audio_header.subband_activity[k]); | |||
s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq, | |||
ff_dca_high_freq_vq, | |||
subsubframe * SAMPLES_PER_SUBBAND, | |||
s->dca_chan[k].scale_factor, | |||
s->audio_header.vq_start_subband[k], | |||
s->audio_header.subband_activity[k]); | |||
} | |||
} | |||
@@ -27,29 +27,11 @@ | |||
#include "dcadsp.h" | |||
#include "dcamath.h" | |||
static void decode_hf_c(float dst[DCA_SUBBANDS][8], | |||
static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8], | |||
const int32_t vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int32_t scale[DCA_SUBBANDS][2], | |||
intptr_t start, intptr_t end) | |||
{ | |||
int i, l; | |||
for (l = start; l < end; l++) { | |||
/* 1 vector -> 32 samples but we only need the 8 samples | |||
* for this subsubframe. */ | |||
const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset]; | |||
float fscale = scale[l][0] * (1 / 16.0); | |||
for (i = 0; i < 8; i++) | |||
dst[l][i] = ptr[i] * fscale; | |||
} | |||
} | |||
static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8], | |||
const int32_t vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int32_t scale[DCA_SUBBANDS][2], | |||
intptr_t start, intptr_t end) | |||
{ | |||
int i, j; | |||
@@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) | |||
s->lfe_fir[1] = dca_lfe_fir1_c; | |||
s->qmf_32_subbands = dca_qmf_32_subbands; | |||
s->decode_hf = decode_hf_c; | |||
s->decode_hf_int = decode_hf_int_c; | |||
s->dequantize = dequantize_c; | |||
if (ARCH_AARCH64) | |||
@@ -32,17 +32,12 @@ typedef struct DCADSPContext { | |||
int *synth_buf_offset, float synth_buf2[32], | |||
const float window[512], float *samples_out, | |||
float raXin[32], float scale); | |||
void (*decode_hf)(float dst[DCA_SUBBANDS][8], | |||
void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8], | |||
const int32_t vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int32_t scale[DCA_SUBBANDS][2], | |||
intptr_t start, intptr_t end); | |||
void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8], | |||
const int32_t vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int32_t scale[DCA_SUBBANDS][2], | |||
intptr_t start, intptr_t end); | |||
void (*dequantize)(int32_t *samples, uint32_t step_size, uint64_t scale); | |||
void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale); | |||
} DCADSPContext; | |||
void ff_dcadsp_init(DCADSPContext *s); | |||
@@ -26,92 +26,6 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16 | |||
SECTION .text | |||
; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS], | |||
; const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end) | |||
%macro DECODE_HF 0 | |||
cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end | |||
lea srcq, [srcq + offsetq] | |||
shl startq, 2 | |||
mov offsetd, endm | |||
%define DICT offsetq | |||
shl offsetq, 2 | |||
mov endm, offsetq | |||
.loop: | |||
%if ARCH_X86_64 | |||
mov offsetd, [scaleq + 2 * startq] | |||
cvtsi2ss m0, offsetd | |||
%else | |||
cvtsi2ss m0, [scaleq + 2 * startq] | |||
%endif | |||
mov offsetd, [numq + startq] | |||
mulss m0, [pf_inv16] | |||
shl DICT, 5 | |||
shufps m0, m0, 0 | |||
%if cpuflag(sse2) | |||
%if cpuflag(sse4) | |||
pmovsxbd m1, [srcq + DICT + 0] | |||
pmovsxbd m2, [srcq + DICT + 4] | |||
%else | |||
movq m1, [srcq + DICT] | |||
punpcklbw m1, m1 | |||
mova m2, m1 | |||
punpcklwd m1, m1 | |||
punpckhwd m2, m2 | |||
psrad m1, 24 | |||
psrad m2, 24 | |||
%endif | |||
cvtdq2ps m1, m1 | |||
cvtdq2ps m2, m2 | |||
%else | |||
movd mm0, [srcq + DICT + 0] | |||
movd mm1, [srcq + DICT + 4] | |||
punpcklbw mm0, mm0 | |||
punpcklbw mm1, mm1 | |||
movq mm2, mm0 | |||
movq mm3, mm1 | |||
punpcklwd mm0, mm0 | |||
punpcklwd mm1, mm1 | |||
punpckhwd mm2, mm2 | |||
punpckhwd mm3, mm3 | |||
psrad mm0, 24 | |||
psrad mm1, 24 | |||
psrad mm2, 24 | |||
psrad mm3, 24 | |||
cvtpi2ps m1, mm0 | |||
cvtpi2ps m2, mm1 | |||
cvtpi2ps m3, mm2 | |||
cvtpi2ps m4, mm3 | |||
shufps m0, m0, 0 | |||
shufps m1, m3, q1010 | |||
shufps m2, m4, q1010 | |||
%endif | |||
mulps m1, m0 | |||
mulps m2, m0 | |||
mova [dstq + 8 * startq + 0], m1 | |||
mova [dstq + 8 * startq + 16], m2 | |||
add startq, 4 | |||
cmp startq, endm | |||
jl .loop | |||
.end: | |||
%if notcpuflag(sse2) | |||
emms | |||
%endif | |||
REP_RET | |||
%endmacro | |||
%if ARCH_X86_32 | |||
INIT_XMM sse | |||
DECODE_HF | |||
%endif | |||
INIT_XMM sse2 | |||
DECODE_HF | |||
INIT_XMM sse4 | |||
DECODE_HF | |||
; %1=v0/v1 %2=in1 %3=in2 | |||
%macro FIR_LOOP 2-3 | |||
.loop%1: | |||
@@ -23,15 +23,6 @@ | |||
#include "libavutil/x86/cpu.h" | |||
#include "libavcodec/dcadsp.h" | |||
void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); | |||
void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); | |||
void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], | |||
const int8_t hf_vq[1024][32], intptr_t vq_offset, | |||
int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); | |||
void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs); | |||
void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs); | |||
@@ -40,20 +31,9 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) | |||
int cpu_flags = av_get_cpu_flags(); | |||
if (EXTERNAL_SSE(cpu_flags)) { | |||
#if ARCH_X86_32 | |||
s->decode_hf = ff_decode_hf_sse; | |||
#endif | |||
s->lfe_fir[0] = ff_dca_lfe_fir0_sse; | |||
s->lfe_fir[1] = ff_dca_lfe_fir1_sse; | |||
} | |||
if (EXTERNAL_SSE2(cpu_flags)) { | |||
s->decode_hf = ff_decode_hf_sse2; | |||
} | |||
if (EXTERNAL_SSE4(cpu_flags)) { | |||
s->decode_hf = ff_decode_hf_sse4; | |||
} | |||
} | |||
@@ -75,16 +75,6 @@ | |||
} \ | |||
} while (0) | |||
#define randomize_decode_hf() \ | |||
do { \ | |||
int i; \ | |||
for (i = 0; i < DCA_SUBBANDS; i++) { \ | |||
vq_num[i] = rnd() >> 22; \ | |||
scale[i][0] = rnd() >> 26; \ | |||
scale[i][1] = INT32_MIN; \ | |||
} \ | |||
} while (0) | |||
void checkasm_check_dcadsp(void) | |||
{ | |||
DCADSPContext c; | |||
@@ -98,40 +88,5 @@ void checkasm_check_dcadsp(void) | |||
if (check_func(c.lfe_fir[1], "dca_lfe_fir1")) | |||
check_lfe_fir(64, 1.0e-6f); | |||
if (check_func(c.decode_hf, "dca_decode_hf")) { | |||
LOCAL_ALIGNED_16(float, dst0, [DCA_SUBBANDS], [8]); | |||
LOCAL_ALIGNED_16(float, dst1, [DCA_SUBBANDS], [8]); | |||
LOCAL_ALIGNED_16(int32_t, scale, [DCA_SUBBANDS], [2]); | |||
LOCAL_ALIGNED_16(int32_t, vq_num, [DCA_SUBBANDS]); | |||
intptr_t start, end = 32, offset; | |||
declare_func(void, float[DCA_SUBBANDS][8], const int32_t[DCA_SUBBANDS], | |||
const int8_t[1024][DCA_SUBBANDS], intptr_t, int32_t[DCA_SUBBANDS][2], | |||
intptr_t, intptr_t); | |||
for (start = 0; start < 32; start++) { | |||
for (offset = 0; offset < 32; offset += 8) { | |||
int j; | |||
for (j = 0; j < DCA_SUBBANDS; j++) { | |||
memset(dst0[j], 0, sizeof(*(dst0[j])) * 8); | |||
memset(dst1[j], 0, sizeof(*(dst1[j])) * 8); | |||
} | |||
randomize_decode_hf(); | |||
call_ref(dst0, vq_num, ff_dca_high_freq_vq, offset, scale, start, end); | |||
call_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end); | |||
for (j = 0; j < 8 * DCA_SUBBANDS; j++) { | |||
if (!float_near_ulp(dst0[j>>3][j&7], dst1[j>>3][j&7], 1)) { | |||
fail(); | |||
break; | |||
} | |||
} | |||
bench_new(dst1, vq_num, ff_dca_high_freq_vq, offset, scale, start, end); | |||
} | |||
} | |||
} | |||
report("dcadsp"); | |||
} |