| @@ -27,7 +27,8 @@ cextern pw_64 | |||||
| SECTION .text | SECTION .text | ||||
| %macro DIAG4_MMX 6 | |||||
| %macro DIAG4 6 | |||||
| %if mmsize == 8 | |||||
| movq m0, [%1+%2] | movq m0, [%1+%2] | ||||
| movq m1, [%1+%3] | movq m1, [%1+%3] | ||||
| movq m3, m0 | movq m3, m0 | ||||
| @@ -64,9 +65,7 @@ SECTION .text | |||||
| psraw m3, 7 | psraw m3, 7 | ||||
| packuswb m0, m3 | packuswb m0, m3 | ||||
| movq [%6], m0 | movq [%6], m0 | ||||
| %endmacro | |||||
| %macro DIAG4_SSE2 6 | |||||
| %else ; mmsize == 16 | |||||
| movq m0, [%1+%2] | movq m0, [%1+%2] | ||||
| movq m1, [%1+%3] | movq m1, [%1+%3] | ||||
| punpcklbw m0, m7 | punpcklbw m0, m7 | ||||
| @@ -86,9 +85,11 @@ SECTION .text | |||||
| psraw m0, 7 | psraw m0, 7 | ||||
| packuswb m0, m0 | packuswb m0, m0 | ||||
| movq [%6], m0 | movq [%6], m0 | ||||
| %endif ; mmsize == 8/16 | |||||
| %endmacro | %endmacro | ||||
| %macro SPLAT4REGS_MMX 0 | |||||
| %macro SPLAT4REGS 0 | |||||
| %if mmsize == 8 | |||||
| movq m5, m3 | movq m5, m3 | ||||
| punpcklwd m3, m3 | punpcklwd m3, m3 | ||||
| movq m4, m3 | movq m4, m3 | ||||
| @@ -102,9 +103,7 @@ SECTION .text | |||||
| movq [rsp+8*12], m4 | movq [rsp+8*12], m4 | ||||
| movq [rsp+8*13], m5 | movq [rsp+8*13], m5 | ||||
| movq [rsp+8*14], m2 | movq [rsp+8*14], m2 | ||||
| %endmacro | |||||
| %macro SPLAT4REGS_SSE2 0 | |||||
| %else ; mmsize == 16 | |||||
| pshuflw m4, m3, 0x0 | pshuflw m4, m3, 0x0 | ||||
| pshuflw m5, m3, 0x55 | pshuflw m5, m3, 0x55 | ||||
| pshuflw m6, m3, 0xAA | pshuflw m6, m3, 0xAA | ||||
| @@ -113,15 +112,16 @@ SECTION .text | |||||
| punpcklqdq m5, m5 | punpcklqdq m5, m5 | ||||
| punpcklqdq m6, m6 | punpcklqdq m6, m6 | ||||
| punpcklqdq m3, m3 | punpcklqdq m3, m3 | ||||
| %endif ; mmsize == 8/16 | |||||
| %endmacro | %endmacro | ||||
| %macro vp6_filter_diag4 2 | |||||
| %macro vp6_filter_diag4 0 | |||||
| ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride, | ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride, | ||||
| ; const int16_t h_weight[4], const int16_t v_weights[4]) | ; const int16_t h_weight[4], const int16_t v_weights[4]) | ||||
| cglobal vp6_filter_diag4_%1, 5, 7, %2 | |||||
| cglobal vp6_filter_diag4, 5, 7, 8 | |||||
| mov r5, rsp ; backup stack pointer | mov r5, rsp ; backup stack pointer | ||||
| and rsp, ~(mmsize-1) ; align stack | and rsp, ~(mmsize-1) ; align stack | ||||
| %ifidn %1, sse2 | |||||
| %if mmsize == 16 | |||||
| sub rsp, 8*11 | sub rsp, 8*11 | ||||
| %else | %else | ||||
| sub rsp, 8*15 | sub rsp, 8*15 | ||||
| @@ -162,12 +162,8 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2 | |||||
| RET | RET | ||||
| %endmacro | %endmacro | ||||
| INIT_MMX | |||||
| %define DIAG4 DIAG4_MMX | |||||
| %define SPLAT4REGS SPLAT4REGS_MMX | |||||
| vp6_filter_diag4 mmx, 0 | |||||
| INIT_MMX mmx | |||||
| vp6_filter_diag4 | |||||
| INIT_XMM | |||||
| %define DIAG4 DIAG4_SSE2 | |||||
| %define SPLAT4REGS SPLAT4REGS_SSE2 | |||||
| vp6_filter_diag4 sse2, 8 | |||||
| INIT_XMM sse2 | |||||
| vp6_filter_diag4 | |||||