Browse Source

lavr: x86: optimized 2-channel flt to s16p conversion

tags/n1.0
Justin Ruggles 13 years ago
parent
commit
31d0d7181d
2 changed files with 55 additions and 0 deletions
  1. +46
    -0
      libavresample/x86/audio_convert.asm
  2. +9
    -0
      libavresample/x86/audio_convert_init.c

+ 46
- 0
libavresample/x86/audio_convert.asm View File

@@ -1045,3 +1045,49 @@ CONV_S16_TO_FLTP_6CH
INIT_XMM avx
CONV_S16_TO_FLTP_6CH
%endif

;------------------------------------------------------------------------------
; void ff_conv_flt_to_s16p_2ch(int16_t *const *dst, float *src, int len,
; int channels);
;------------------------------------------------------------------------------

%macro CONV_FLT_TO_S16P_2CH 0
cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
lea lenq, [2*lend]
mov dst1q, [dst0q+gprsize]
mov dst0q, [dst0q ]
lea srcq, [srcq+4*lenq]
add dst0q, lenq
add dst1q, lenq
neg lenq
mova m5, [pf_s16_scale]
.loop:
mova m0, [srcq+4*lenq ]
mova m1, [srcq+4*lenq+ mmsize]
mova m2, [srcq+4*lenq+2*mmsize]
mova m3, [srcq+4*lenq+3*mmsize]
DEINT2_PS 0, 1, 4
DEINT2_PS 2, 3, 4
mulps m0, m0, m5
mulps m1, m1, m5
mulps m2, m2, m5
mulps m3, m3, m5
cvtps2dq m0, m0
cvtps2dq m1, m1
cvtps2dq m2, m2
cvtps2dq m3, m3
packssdw m0, m2
packssdw m1, m3
mova [dst0q+lenq], m0
mova [dst1q+lenq], m1
add lenq, mmsize
jl .loop
REP_RET
%endmacro

INIT_XMM sse2
CONV_FLT_TO_S16P_2CH
%if HAVE_AVX
INIT_XMM avx
CONV_FLT_TO_S16P_2CH
%endif

+ 9
- 0
libavresample/x86/audio_convert_init.c View File

@@ -120,6 +120,11 @@ extern void ff_conv_s16_to_fltp_6ch_sse4 (float *const *dst, int16_t *src,
extern void ff_conv_s16_to_fltp_6ch_avx (float *const *dst, int16_t *src,
int len, int channels);

extern void ff_conv_flt_to_s16p_2ch_sse2(int16_t *const *dst, float *src,
int len, int channels);
extern void ff_conv_flt_to_s16p_2ch_avx (int16_t *const *dst, float *src,
int len, int channels);

av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
{
#if HAVE_YASM
@@ -175,6 +180,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2, 16, 8, "SSE2", ff_conv_s16_to_fltp_2ch_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
6, 16, 4, "SSE2", ff_conv_s16_to_fltp_6ch_sse2);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
2, 16, 8, "SSE2", ff_conv_flt_to_s16p_2ch_sse2);
}
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) {
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P,
@@ -219,6 +226,8 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac)
2, 16, 8, "AVX", ff_conv_s16_to_fltp_2ch_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16,
6, 16, 4, "AVX", ff_conv_s16_to_fltp_6ch_avx);
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT,
2, 16, 8, "AVX", ff_conv_flt_to_s16p_2ch_avx);
}
#endif
}

Loading…
Cancel
Save