Browse Source

avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code

This is simpler and more robust, and fixes mismatching XMM save restore
mismatches

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
tags/n2.6
Michael Niedermayer 10 years ago
parent
commit
f1214763af
2 changed files with 26 additions and 10 deletions
  1. +0
    -8
      libavcodec/x86/lossless_audiodsp.asm
  2. +26
    -2
      libavcodec/x86/lossless_audiodsp_init.c

+ 0
- 8
libavcodec/x86/lossless_audiodsp.asm View File

@@ -26,12 +26,6 @@ SECTION_TEXT
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
; int order, int mul)
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
%if mmsize == 16
test orderq, 8
jnz scalarproduct_and_madd_int16_fallback
%else
scalarproduct_and_madd_int16_fallback
%endif
shl orderq, 1
movd m7, mulm
%if mmsize == 16
@@ -123,8 +117,6 @@ align 16
; int order, int mul)
INIT_XMM ssse3
cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul
test orderq, 8
jnz scalarproduct_and_madd_int16_fallback
shl orderq, 1
movd m7, mulm
pshuflw m7, m7, 0


+ 26
- 2
libavcodec/x86/lossless_audiodsp_init.c View File

@@ -31,6 +31,30 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul);

static int32_t scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul)
{
#if HAVE_SSE2_EXTERNAL
if (order & 8)
return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
else
return ff_scalarproduct_and_madd_int16_sse2(v1, v2, v3, order, mul);
#endif
}

static int32_t scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul)
{
#if HAVE_SSSE3_EXTERNAL
if (order & 8)
return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
else
return ff_scalarproduct_and_madd_int16_ssse3(v1, v2, v3, order, mul);
#endif
}

av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -39,9 +63,9 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;

if (EXTERNAL_SSE2(cpu_flags))
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2;

if (EXTERNAL_SSSE3(cpu_flags) &&
!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_ssse3;
}

Loading…
Cancel
Save