|
|
@@ -22,7 +22,6 @@ |
|
|
|
|
|
|
|
%include "libavutil/x86/x86util.asm" |
|
|
|
|
|
|
|
%if ARCH_X86_64 |
|
|
|
SECTION_RODATA |
|
|
|
|
|
|
|
pw_128: times 8 dw 128 |
|
|
@@ -34,11 +33,19 @@ pb_255: times 16 db 255 |
|
|
|
SECTION .text |
|
|
|
|
|
|
|
%macro BLEND_INIT 2 |
|
|
|
cglobal blend_%1, 9, 11, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, start, end |
|
|
|
%if ARCH_X86_64 |
|
|
|
cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x |
|
|
|
mov widthd, dword widthm |
|
|
|
%else |
|
|
|
cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x |
|
|
|
%define dst_linesizeq r5mp |
|
|
|
%define widthq r6mp |
|
|
|
%endif |
|
|
|
mov endd, dword r8m |
|
|
|
add topq, widthq |
|
|
|
add bottomq, widthq |
|
|
|
add dstq, widthq |
|
|
|
sub endq, startq |
|
|
|
sub endd, dword r7m ; start |
|
|
|
neg widthq |
|
|
|
%endmacro |
|
|
|
|
|
|
@@ -54,15 +61,14 @@ REP_RET |
|
|
|
%macro BLEND_SIMPLE 2 |
|
|
|
BLEND_INIT %1, 2 |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movu m0, [topq + x] |
|
|
|
movu m1, [bottomq + x] |
|
|
|
movu m0, [topq + xq] |
|
|
|
movu m1, [bottomq + xq] |
|
|
|
p%2 m0, m1 |
|
|
|
mova [dstq + x], m0 |
|
|
|
add r10q, mmsize |
|
|
|
mova [dstq + xq], m0 |
|
|
|
add xq, mmsize |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
%endmacro |
|
|
@@ -80,38 +86,36 @@ BLEND_INIT difference128, 4 |
|
|
|
pxor m2, m2 |
|
|
|
mova m3, [pw_128] |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movh m0, [topq + x] |
|
|
|
movh m1, [bottomq + x] |
|
|
|
movh m0, [topq + xq] |
|
|
|
movh m1, [bottomq + xq] |
|
|
|
punpcklbw m0, m2 |
|
|
|
punpcklbw m1, m2 |
|
|
|
paddw m0, m3 |
|
|
|
psubw m0, m1 |
|
|
|
packuswb m0, m0 |
|
|
|
movh [dstq + x], m0 |
|
|
|
add r10q, mmsize / 2 |
|
|
|
movh [dstq + xq], m0 |
|
|
|
add xq, mmsize / 2 |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
|
BLEND_INIT average, 3 |
|
|
|
pxor m2, m2 |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movh m0, [topq + x] |
|
|
|
movh m1, [bottomq + x] |
|
|
|
movh m0, [topq + xq] |
|
|
|
movh m1, [bottomq + xq] |
|
|
|
punpcklbw m0, m2 |
|
|
|
punpcklbw m1, m2 |
|
|
|
paddw m0, m1 |
|
|
|
psrlw m0, 1 |
|
|
|
packuswb m0, m0 |
|
|
|
movh [dstq + x], m0 |
|
|
|
add r10q, mmsize / 2 |
|
|
|
movh [dstq + xq], m0 |
|
|
|
add xq, mmsize / 2 |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
@@ -119,19 +123,18 @@ BLEND_INIT addition128, 4 |
|
|
|
pxor m2, m2 |
|
|
|
mova m3, [pw_128] |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movh m0, [topq + x] |
|
|
|
movh m1, [bottomq + x] |
|
|
|
movh m0, [topq + xq] |
|
|
|
movh m1, [bottomq + xq] |
|
|
|
punpcklbw m0, m2 |
|
|
|
punpcklbw m1, m2 |
|
|
|
paddw m0, m1 |
|
|
|
psubw m0, m3 |
|
|
|
packuswb m0, m0 |
|
|
|
movh [dstq + x], m0 |
|
|
|
add r10q, mmsize / 2 |
|
|
|
movh [dstq + xq], m0 |
|
|
|
add xq, mmsize / 2 |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
@@ -140,38 +143,36 @@ BLEND_INIT hardmix, 5 |
|
|
|
mova m3, [pb_128] |
|
|
|
mova m4, [pb_127] |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movu m0, [topq + x] |
|
|
|
movu m1, [bottomq + x] |
|
|
|
movu m0, [topq + xq] |
|
|
|
movu m1, [bottomq + xq] |
|
|
|
pxor m1, m4 |
|
|
|
pxor m0, m3 |
|
|
|
pcmpgtb m1, m0 |
|
|
|
pxor m1, m2 |
|
|
|
mova [dstq + x], m1 |
|
|
|
add r10q, mmsize |
|
|
|
mova [dstq + xq], m1 |
|
|
|
add xq, mmsize |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
|
BLEND_INIT phoenix, 4 |
|
|
|
mova m3, [pb_255] |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movu m0, [topq + x] |
|
|
|
movu m1, [bottomq + x] |
|
|
|
movu m0, [topq + xq] |
|
|
|
movu m1, [bottomq + xq] |
|
|
|
mova m2, m0 |
|
|
|
pminub m0, m1 |
|
|
|
pmaxub m1, m2 |
|
|
|
mova m2, m3 |
|
|
|
psubusb m2, m1 |
|
|
|
paddusb m2, m0 |
|
|
|
mova [dstq + x], m2 |
|
|
|
add r10q, mmsize |
|
|
|
mova [dstq + xq], m2 |
|
|
|
add xq, mmsize |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
@@ -179,19 +180,18 @@ INIT_XMM ssse3 |
|
|
|
BLEND_INIT difference, 3 |
|
|
|
pxor m2, m2 |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movh m0, [topq + x] |
|
|
|
movh m1, [bottomq + x] |
|
|
|
movh m0, [topq + xq] |
|
|
|
movh m1, [bottomq + xq] |
|
|
|
punpcklbw m0, m2 |
|
|
|
punpcklbw m1, m2 |
|
|
|
psubw m0, m1 |
|
|
|
pabsw m0, m0 |
|
|
|
packuswb m0, m0 |
|
|
|
movh [dstq + x], m0 |
|
|
|
add r10q, mmsize / 2 |
|
|
|
movh [dstq + xq], m0 |
|
|
|
add xq, mmsize / 2 |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
@@ -199,12 +199,11 @@ BLEND_INIT negation, 5 |
|
|
|
pxor m2, m2 |
|
|
|
mova m4, [pw_255] |
|
|
|
.nextrow: |
|
|
|
mov r10q, widthq |
|
|
|
%define x r10q |
|
|
|
mov xq, widthq |
|
|
|
|
|
|
|
.loop: |
|
|
|
movh m0, [topq + x] |
|
|
|
movh m1, [bottomq + x] |
|
|
|
movh m0, [topq + xq] |
|
|
|
movh m1, [bottomq + xq] |
|
|
|
punpcklbw m0, m2 |
|
|
|
punpcklbw m1, m2 |
|
|
|
mova m3, m4 |
|
|
@@ -214,9 +213,7 @@ BLEND_INIT negation, 5 |
|
|
|
mova m0, m4 |
|
|
|
psubw m0, m3 |
|
|
|
packuswb m0, m0 |
|
|
|
movh [dstq + x], m0 |
|
|
|
add r10q, mmsize / 2 |
|
|
|
movh [dstq + xq], m0 |
|
|
|
add xq, mmsize / 2 |
|
|
|
jl .loop |
|
|
|
BLEND_END |
|
|
|
|
|
|
|
%endif |