Browse Source

x86: sbrdsp: implement SSE qmf_deint_neg

From 133 (unrolled av_intfloat32 C) to 59 cycles on Arrandale/Win64.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
tags/n2.3
Christophe Gisquet Michael Niedermayer 13 years ago
parent
commit
d1310c591e
2 changed files with 25 additions and 0 deletions
  1. +22
    -0
      libavcodec/x86/sbrdsp.asm
  2. +3
    -0
      libavcodec/x86/sbrdsp_init.c

+ 22
- 0
libavcodec/x86/sbrdsp.asm View File

@@ -423,3 +423,25 @@ apply_noise_main:
add count, mmsize
jl .loop
RET

INIT_XMM sse
cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
%define COUNT 32*4
%define OFFSET 32*4
mov cq, -COUNT
lea vrevq, [vq + OFFSET + COUNT]
add vq, OFFSET-mmsize
add srcq, 2*COUNT
mova m3, [ps_neg]
.loop:
mova m0, [srcq + 2*cq + 0*mmsize]
mova m1, [srcq + 2*cq + 1*mmsize]
shufps m2, m0, m1, q2020
shufps m1, m0, q1313
xorps m2, m3
mova [vq], m1
mova [vrevq + cq], m2
sub vq, mmsize
add cq, mmsize
jl .loop
REP_RET

+ 3
- 0
libavcodec/x86/sbrdsp_init.c View File

@@ -51,6 +51,8 @@ void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m,
const float *q_filt, int noise,
int kx, int m_max);

void ff_sbr_qmf_deint_neg_sse(float *v, const float *src);

av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -63,6 +65,7 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
s->hf_gen = ff_sbr_hf_gen_sse;
s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse;
s->qmf_deint_neg = ff_sbr_qmf_deint_neg_sse;
}

if (EXTERNAL_SSE2(cpu_flags)) {


Loading…
Cancel
Save