Browse Source

x86/dsputilenc: make the SUM_ABS_DCTELEM macro more readable

Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
tags/n2.3
James Almer Michael Niedermayer 11 years ago
parent
commit
d94e255dd1
1 changed files with 18 additions and 24 deletions
  1. +18
    -24
      libavcodec/x86/dsputilenc.asm

+ 18
- 24
libavcodec/x86/dsputilenc.asm View File

@@ -487,34 +487,28 @@ cglobal pix_norm1, 2, 4
movd eax, m1
RET

%macro DCT_SAD4 1
mova m2, [blockq+%1+0 ]
mova m3, [blockq+%1+16]
mova m4, [blockq+%1+32]
mova m5, [blockq+%1+48]
ABS1_SUM m2, m6, m0
ABS1_SUM m3, m6, m1
ABS1_SUM m4, m6, m0
ABS1_SUM m5, m6, m1
%endmacro

;-----------------------------------------------
;int ff_sum_abs_dctelem(int16_t *block)
;-----------------------------------------------
; %1 = number of xmm registers used
; %2 = number of inline loops

%macro SUM_ABS_DCTELEM 1
%macro SUM_ABS_DCTELEM 2
cglobal sum_abs_dctelem, 1, 1, %1, block
pxor m0, m0
pxor m1, m1
DCT_SAD4 0
%if mmsize == 8
DCT_SAD4 8
%endif
DCT_SAD4 64
%if mmsize == 8
DCT_SAD4 72
%endif
%assign %%i 0
%rep %2
mova m2, [blockq+mmsize*(0+%%i)]
mova m3, [blockq+mmsize*(1+%%i)]
mova m4, [blockq+mmsize*(2+%%i)]
mova m5, [blockq+mmsize*(3+%%i)]
ABS1_SUM m2, m6, m0
ABS1_SUM m3, m6, m1
ABS1_SUM m4, m6, m0
ABS1_SUM m5, m6, m1
%assign %%i %%i+4
%endrep
paddusw m0, m1
HSUM m0, m1, eax
and eax, 0xFFFF
@@ -522,10 +516,10 @@ cglobal sum_abs_dctelem, 1, 1, %1, block
%endmacro

INIT_MMX mmx
SUM_ABS_DCTELEM 0
SUM_ABS_DCTELEM 0, 4
INIT_MMX mmxext
SUM_ABS_DCTELEM 0
SUM_ABS_DCTELEM 0, 4
INIT_XMM sse2
SUM_ABS_DCTELEM 7
SUM_ABS_DCTELEM 7, 2
INIT_XMM ssse3
SUM_ABS_DCTELEM 6
SUM_ABS_DCTELEM 6, 2

Loading…
Cancel
Save