Browse Source

x86/hevc_deblock: use psignw instead of pmullw where possible

It's slightly faster

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
tags/n2.4
James Almer Michael Niedermayer 11 years ago
parent
commit
4f91bb0ff0
1 changed files with 8 additions and 0 deletions
  1. +8
    -0
      libavcodec/x86/hevc_deblock.asm

+ 8
- 0
libavcodec/x86/hevc_deblock.asm View File

@@ -324,7 +324,11 @@ ALIGN 16
movd m4, [tcq+4]; tc1 movd m4, [tcq+4]; tc1
punpcklwd m4, m4 punpcklwd m4, m4
shufps m6, m4, 0; tc0, tc1 shufps m6, m4, 0; tc0, tc1
%if cpuflag(ssse3)
psignw m4, m6, [pw_m1]; -tc0, -tc1
%else
pmullw m4, m6, [pw_m1]; -tc0, -tc1 pmullw m4, m6, [pw_m1]; -tc0, -tc1
%endif
;end tc calculations ;end tc calculations


paddw m5, [pw_4]; +4 paddw m5, [pw_4]; +4
@@ -609,7 +613,11 @@ ALIGN 16
pminsw m12, m9; av_clip(delta0, -tc, tc) pminsw m12, m9; av_clip(delta0, -tc, tc)


psraw m9, 1; tc -> tc / 2 psraw m9, 1; tc -> tc / 2
%if cpuflag(ssse3)
psignw m14, m9, [pw_m1]; -tc / 2
%else
pmullw m14, m9, [pw_m1]; -tc / 2 pmullw m14, m9, [pw_m1]; -tc / 2
%endif


pavgw m15, m1, m3; (p2 + p0 + 1) >> 1 pavgw m15, m1, m3; (p2 + p0 + 1) >> 1
psubw m15, m2; ((p2 + p0 + 1) >> 1) - p1 psubw m15, m2; ((p2 + p0 + 1) >> 1) - p1


Loading…
Cancel
Save