| @@ -70,8 +70,8 @@ SECTION .text | |||
| packuswb m0, m1 | |||
| %endmacro | |||
| INIT_MMX | |||
| cglobal h264_weight_16_mmxext, 6, 6, 0 | |||
| INIT_MMX mmxext | |||
| cglobal h264_weight_16, 6, 6, 0 | |||
| WEIGHT_SETUP | |||
| .nextrow: | |||
| WEIGHT_OP 0, 4 | |||
| @@ -83,8 +83,8 @@ cglobal h264_weight_16_mmxext, 6, 6, 0 | |||
| jnz .nextrow | |||
| REP_RET | |||
| %macro WEIGHT_FUNC_MM 3 | |||
| cglobal h264_weight_%1_%3, 6, 6, %2 | |||
| %macro WEIGHT_FUNC_MM 2 | |||
| cglobal h264_weight_%1, 6, 6, %2 | |||
| WEIGHT_SETUP | |||
| .nextrow: | |||
| WEIGHT_OP 0, mmsize/2 | |||
| @@ -95,13 +95,13 @@ cglobal h264_weight_%1_%3, 6, 6, %2 | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX | |||
| WEIGHT_FUNC_MM 8, 0, mmxext | |||
| INIT_XMM | |||
| WEIGHT_FUNC_MM 16, 8, sse2 | |||
| INIT_MMX mmxext | |||
| WEIGHT_FUNC_MM 8, 0 | |||
| INIT_XMM sse2 | |||
| WEIGHT_FUNC_MM 16, 8 | |||
| %macro WEIGHT_FUNC_HALF_MM 3 | |||
| cglobal h264_weight_%1_%3, 6, 6, %2 | |||
| %macro WEIGHT_FUNC_HALF_MM 2 | |||
| cglobal h264_weight_%1, 6, 6, %2 | |||
| WEIGHT_SETUP | |||
| sar r2d, 1 | |||
| lea r3, [r1*2] | |||
| @@ -120,10 +120,10 @@ cglobal h264_weight_%1_%3, 6, 6, %2 | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX | |||
| WEIGHT_FUNC_HALF_MM 4, 0, mmxext | |||
| INIT_XMM | |||
| WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| INIT_MMX mmxext | |||
| WEIGHT_FUNC_HALF_MM 4, 0 | |||
| INIT_XMM sse2 | |||
| WEIGHT_FUNC_HALF_MM 8, 8 | |||
| %macro BIWEIGHT_SETUP 0 | |||
| %if ARCH_X86_64 | |||
| @@ -135,12 +135,25 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| add off_regd, 1 | |||
| or off_regd, 1 | |||
| add r4, 1 | |||
| %if cpuflag(ssse3) | |||
| movd m4, r5d | |||
| movd m0, r6d | |||
| %else | |||
| movd m3, r5d | |||
| movd m4, r6d | |||
| %endif | |||
| movd m5, off_regd | |||
| movd m6, r4d | |||
| pslld m5, m6 | |||
| psrld m5, 1 | |||
| %if cpuflag(ssse3) | |||
| punpcklbw m4, m0 | |||
| pshuflw m4, m4, 0 | |||
| pshuflw m5, m5, 0 | |||
| punpcklqdq m4, m4 | |||
| punpcklqdq m5, m5 | |||
| %else | |||
| %if mmsize == 16 | |||
| pshuflw m3, m3, 0 | |||
| pshuflw m4, m4, 0 | |||
| @@ -154,6 +167,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| pshufw m5, m5, 0 | |||
| %endif | |||
| pxor m7, m7 | |||
| %endif | |||
| %endmacro | |||
| %macro BIWEIGHT_STEPA 3 | |||
| @@ -174,8 +188,8 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| packuswb m0, m1 | |||
| %endmacro | |||
| INIT_MMX | |||
| cglobal h264_biweight_16_mmxext, 7, 8, 0 | |||
| INIT_MMX mmxext | |||
| cglobal h264_biweight_16, 7, 8, 0 | |||
| BIWEIGHT_SETUP | |||
| movifnidn r3d, r3m | |||
| .nextrow: | |||
| @@ -193,8 +207,8 @@ cglobal h264_biweight_16_mmxext, 7, 8, 0 | |||
| jnz .nextrow | |||
| REP_RET | |||
| %macro BIWEIGHT_FUNC_MM 3 | |||
| cglobal h264_biweight_%1_%3, 7, 8, %2 | |||
| %macro BIWEIGHT_FUNC_MM 2 | |||
| cglobal h264_biweight_%1, 7, 8, %2 | |||
| BIWEIGHT_SETUP | |||
| movifnidn r3d, r3m | |||
| .nextrow: | |||
| @@ -209,13 +223,13 @@ cglobal h264_biweight_%1_%3, 7, 8, %2 | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX | |||
| BIWEIGHT_FUNC_MM 8, 0, mmxext | |||
| INIT_XMM | |||
| BIWEIGHT_FUNC_MM 16, 8, sse2 | |||
| INIT_MMX mmxext | |||
| BIWEIGHT_FUNC_MM 8, 0 | |||
| INIT_XMM sse2 | |||
| BIWEIGHT_FUNC_MM 16, 8 | |||
| %macro BIWEIGHT_FUNC_HALF_MM 3 | |||
| cglobal h264_biweight_%1_%3, 7, 8, %2 | |||
| %macro BIWEIGHT_FUNC_HALF_MM 2 | |||
| cglobal h264_biweight_%1, 7, 8, %2 | |||
| BIWEIGHT_SETUP | |||
| movifnidn r3d, r3m | |||
| sar r3, 1 | |||
| @@ -238,33 +252,10 @@ cglobal h264_biweight_%1_%3, 7, 8, %2 | |||
| REP_RET | |||
| %endmacro | |||
| INIT_MMX | |||
| BIWEIGHT_FUNC_HALF_MM 4, 0, mmxext | |||
| INIT_XMM | |||
| BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| %macro BIWEIGHT_SSSE3_SETUP 0 | |||
| %if ARCH_X86_64 | |||
| %define off_regd r7d | |||
| %else | |||
| %define off_regd r3d | |||
| %endif | |||
| mov off_regd, r7m | |||
| add off_regd, 1 | |||
| or off_regd, 1 | |||
| add r4, 1 | |||
| movd m4, r5d | |||
| movd m0, r6d | |||
| movd m5, off_regd | |||
| movd m6, r4d | |||
| pslld m5, m6 | |||
| psrld m5, 1 | |||
| punpcklbw m4, m0 | |||
| pshuflw m4, m4, 0 | |||
| pshuflw m5, m5, 0 | |||
| punpcklqdq m4, m4 | |||
| punpcklqdq m5, m5 | |||
| %endmacro | |||
| INIT_MMX mmxext | |||
| BIWEIGHT_FUNC_HALF_MM 4, 0 | |||
| INIT_XMM sse2 | |||
| BIWEIGHT_FUNC_HALF_MM 8, 8 | |||
| %macro BIWEIGHT_SSSE3_OP 0 | |||
| pmaddubsw m0, m4 | |||
| @@ -276,9 +267,9 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| packuswb m0, m2 | |||
| %endmacro | |||
| INIT_XMM | |||
| cglobal h264_biweight_16_ssse3, 7, 8, 8 | |||
| BIWEIGHT_SSSE3_SETUP | |||
| INIT_XMM ssse3 | |||
| cglobal h264_biweight_16, 7, 8, 8 | |||
| BIWEIGHT_SETUP | |||
| movifnidn r3d, r3m | |||
| .nextrow: | |||
| @@ -295,9 +286,9 @@ cglobal h264_biweight_16_ssse3, 7, 8, 8 | |||
| jnz .nextrow | |||
| REP_RET | |||
| INIT_XMM | |||
| cglobal h264_biweight_8_ssse3, 7, 8, 8 | |||
| BIWEIGHT_SSSE3_SETUP | |||
| INIT_XMM ssse3 | |||
| cglobal h264_biweight_8, 7, 8, 8 | |||
| BIWEIGHT_SETUP | |||
| movifnidn r3d, r3m | |||
| sar r3, 1 | |||
| lea r4, [r2*2] | |||