|
|
|
@@ -28,25 +28,23 @@ cextern pw_1023 |
|
|
|
|
|
|
|
; the add_res macros and functions were largely inspired by h264_idct.asm from the x264 project |
|
|
|
%macro ADD_RES_MMX_4_8 0 |
|
|
|
mova m2, [r1] |
|
|
|
mova m4, [r1+8] |
|
|
|
mova m0, [r1] |
|
|
|
mova m2, [r1+8] |
|
|
|
pxor m1, m1 |
|
|
|
pxor m3, m3 |
|
|
|
psubw m1, m0 |
|
|
|
psubw m3, m2 |
|
|
|
packuswb m2, m2 |
|
|
|
packuswb m3, m3 |
|
|
|
pxor m5, m5 |
|
|
|
psubw m5, m4 |
|
|
|
packuswb m4, m4 |
|
|
|
packuswb m5, m5 |
|
|
|
|
|
|
|
movh m0, [r0] |
|
|
|
movh m1, [r0+r2] |
|
|
|
packuswb m0, m2 |
|
|
|
packuswb m1, m3 |
|
|
|
|
|
|
|
movd m2, [r0] |
|
|
|
movd m3, [r0+r2] |
|
|
|
punpckldq m2, m3 |
|
|
|
paddusb m0, m2 |
|
|
|
paddusb m1, m4 |
|
|
|
psubusb m0, m3 |
|
|
|
psubusb m1, m5 |
|
|
|
movh [r0], m0 |
|
|
|
movh [r0+r2], m1 |
|
|
|
psubusb m0, m1 |
|
|
|
movd [r0], m0 |
|
|
|
psrlq m0, 32 |
|
|
|
movd [r0+r2], m0 |
|
|
|
%endmacro |
|
|
|
|
|
|
|
|
|
|
|
@@ -95,15 +93,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6 |
|
|
|
vinserti128 m2, m2, [r1+%1+32], 1 |
|
|
|
vinserti128 m6, m6, [r1+%1+48], 1 |
|
|
|
%endif |
|
|
|
%if cpuflag(avx) |
|
|
|
psubw m1, m0, m2 |
|
|
|
psubw m5, m0, m6 |
|
|
|
%else |
|
|
|
mova m1, m0 |
|
|
|
mova m5, m0 |
|
|
|
psubw m1, m2 |
|
|
|
psubw m5, m6 |
|
|
|
%endif |
|
|
|
packuswb m2, m6 |
|
|
|
packuswb m1, m5 |
|
|
|
|
|
|
|
@@ -113,15 +104,8 @@ cglobal hevc_add_residual_4_8, 3, 3, 6 |
|
|
|
vinserti128 m4, m4, [r1+%1+96 ], 1 |
|
|
|
vinserti128 m6, m6, [r1+%1+112], 1 |
|
|
|
%endif |
|
|
|
%if cpuflag(avx) |
|
|
|
psubw m3, m0, m4 |
|
|
|
psubw m5, m0, m6 |
|
|
|
%else |
|
|
|
mova m3, m0 |
|
|
|
mova m5, m0 |
|
|
|
psubw m3, m4 |
|
|
|
psubw m5, m6 |
|
|
|
%endif |
|
|
|
packuswb m4, m6 |
|
|
|
packuswb m3, m5 |
|
|
|
|
|
|
|
@@ -192,7 +176,7 @@ cglobal hevc_add_residual_32_8, 3, 5, 7 |
|
|
|
dec r4d |
|
|
|
jg .loop |
|
|
|
RET |
|
|
|
%endif |
|
|
|
%endif ;HAVE_AVX2_EXTERNAL |
|
|
|
|
|
|
|
%macro ADD_RES_SSE_8_10 4 |
|
|
|
mova m0, [%4] |
|
|
|
|