Browse Source

x86/vp9lpf: add ff_vp9_loop_filter_[vh]_88_16_sse2()

Similar gains as the ssse3 version once again

Signed-off-by: James Almer <jamrial@gmail.com>
tags/n2.2-rc1
James Almer Clément Bœsch 12 years ago
parent
commit
644c32ea4b
2 changed files with 20 additions and 3 deletions
  1. +3
    -0
      libavcodec/x86/vp9dsp_init.c
  2. +17
    -3
      libavcodec/x86/vp9lpf.asm

+ 3
- 0
libavcodec/x86/vp9dsp_init.c View File

@@ -187,6 +187,7 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri
lpf_funcs(16, 16, sse2);
lpf_funcs(16, 16, ssse3);
lpf_funcs(16, 16, avx);
lpf_funcs(88, 16, sse2);
lpf_funcs(88, 16, ssse3);
lpf_funcs(88, 16, avx);

@@ -248,6 +249,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_fpel(1, 1, 32, avg, sse2);
init_fpel(0, 1, 64, avg, sse2);
if (ARCH_X86_64) {
dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_sse2;
dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_sse2;
dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2;
dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2;
}


+ 17
- 3
libavcodec/x86/vp9lpf.asm View File

@@ -304,6 +304,17 @@ SECTION .text
%define Q7 dst2q + strideq
%endmacro

%macro SPLATB_MASK 2
%if cpuflag(ssse3)
pshufb %1, %2
%else
punpcklbw %1, %1
punpcklqdq %1, %1
pshuflw %1, %1, 0
pshufhw %1, %1, 0x55
%endif
%endmacro

%macro LOOPFILTER 2 ; %1=v/h %2=size1
lea mstrideq, [strideq]
neg mstrideq
@@ -394,11 +405,13 @@ SECTION .text
SPLATB_REG m2, I, m0 ; I I I I ...
SPLATB_REG m3, E, m0 ; E E E E ...
%elif %2 == 88
%if cpuflag(ssse3)
mova m0, [mask_mix]
%endif
movd m2, Id
movd m3, Ed
pshufb m2, m0
pshufb m3, m0
SPLATB_MASK m2, m0
SPLATB_MASK m3, m0
%endif
mova m0, [pb_80]
pxor m2, m0
@@ -456,7 +469,7 @@ SECTION .text
SPLATB_REG m7, H, m0 ; H H H H ...
%else
movd m7, Hd
pshufb m7, [mask_mix]
SPLATB_MASK m7, [mask_mix]
%endif
pxor m7, m8
pxor m4, m8
@@ -760,6 +773,7 @@ LPF_16_16_VH sse2
LPF_16_16_VH ssse3
LPF_16_16_VH avx

LPF_88_16_VH sse2
LPF_88_16_VH ssse3
LPF_88_16_VH avx



Loading…
Cancel
Save