|
|
|
@@ -487,34 +487,28 @@ cglobal pix_norm1, 2, 4 |
|
|
|
movd eax, m1 |
|
|
|
RET |
|
|
|
|
|
|
|
%macro DCT_SAD4 1 |
|
|
|
mova m2, [blockq+%1+0 ] |
|
|
|
mova m3, [blockq+%1+16] |
|
|
|
mova m4, [blockq+%1+32] |
|
|
|
mova m5, [blockq+%1+48] |
|
|
|
ABS1_SUM m2, m6, m0 |
|
|
|
ABS1_SUM m3, m6, m1 |
|
|
|
ABS1_SUM m4, m6, m0 |
|
|
|
ABS1_SUM m5, m6, m1 |
|
|
|
%endmacro |
|
|
|
|
|
|
|
;----------------------------------------------- |
|
|
|
;int ff_sum_abs_dctelem(int16_t *block) |
|
|
|
;----------------------------------------------- |
|
|
|
; %1 = number of xmm registers used |
|
|
|
; %2 = number of inline loops |
|
|
|
|
|
|
|
%macro SUM_ABS_DCTELEM 1 |
|
|
|
%macro SUM_ABS_DCTELEM 2 |
|
|
|
cglobal sum_abs_dctelem, 1, 1, %1, block |
|
|
|
pxor m0, m0 |
|
|
|
pxor m1, m1 |
|
|
|
DCT_SAD4 0 |
|
|
|
%if mmsize == 8 |
|
|
|
DCT_SAD4 8 |
|
|
|
%endif |
|
|
|
DCT_SAD4 64 |
|
|
|
%if mmsize == 8 |
|
|
|
DCT_SAD4 72 |
|
|
|
%endif |
|
|
|
%assign %%i 0 |
|
|
|
%rep %2 |
|
|
|
mova m2, [blockq+mmsize*(0+%%i)] |
|
|
|
mova m3, [blockq+mmsize*(1+%%i)] |
|
|
|
mova m4, [blockq+mmsize*(2+%%i)] |
|
|
|
mova m5, [blockq+mmsize*(3+%%i)] |
|
|
|
ABS1_SUM m2, m6, m0 |
|
|
|
ABS1_SUM m3, m6, m1 |
|
|
|
ABS1_SUM m4, m6, m0 |
|
|
|
ABS1_SUM m5, m6, m1 |
|
|
|
%assign %%i %%i+4 |
|
|
|
%endrep |
|
|
|
paddusw m0, m1 |
|
|
|
HSUM m0, m1, eax |
|
|
|
and eax, 0xFFFF |
|
|
|
@@ -522,10 +516,10 @@ cglobal sum_abs_dctelem, 1, 1, %1, block |
|
|
|
%endmacro |
|
|
|
|
|
|
|
INIT_MMX mmx |
|
|
|
SUM_ABS_DCTELEM 0 |
|
|
|
SUM_ABS_DCTELEM 0, 4 |
|
|
|
INIT_MMX mmxext |
|
|
|
SUM_ABS_DCTELEM 0 |
|
|
|
SUM_ABS_DCTELEM 0, 4 |
|
|
|
INIT_XMM sse2 |
|
|
|
SUM_ABS_DCTELEM 7 |
|
|
|
SUM_ABS_DCTELEM 7, 2 |
|
|
|
INIT_XMM ssse3 |
|
|
|
SUM_ABS_DCTELEM 6 |
|
|
|
SUM_ABS_DCTELEM 6, 2 |