| @@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7 | |||
| SECTION_TEXT | |||
| %macro ADD_INT16_LOOP 1 ; %1 = is_aligned | |||
| %macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned) | |||
| movd m4, maskd | |||
| SPLATW m4, m4 | |||
| add wq, wq | |||
| @@ -51,28 +51,16 @@ SECTION_TEXT | |||
| neg wq | |||
| jz %%.end | |||
| %%.loop: | |||
| %if %1 | |||
| mova m0, [srcq+wq] | |||
| mova m1, [dstq+wq] | |||
| mova m2, [srcq+wq+mmsize] | |||
| mova m3, [dstq+wq+mmsize] | |||
| %else | |||
| movu m0, [srcq+wq] | |||
| movu m1, [dstq+wq] | |||
| movu m2, [srcq+wq+mmsize] | |||
| movu m3, [dstq+wq+mmsize] | |||
| %endif | |||
| mov%1 m0, [srcq+wq] | |||
| mov%1 m1, [dstq+wq] | |||
| mov%1 m2, [srcq+wq+mmsize] | |||
| mov%1 m3, [dstq+wq+mmsize] | |||
| paddw m0, m1 | |||
| paddw m2, m3 | |||
| pand m0, m4 | |||
| pand m2, m4 | |||
| %if %1 | |||
| mova [dstq+wq] , m0 | |||
| mova [dstq+wq+mmsize], m2 | |||
| %else | |||
| movu [dstq+wq] , m0 | |||
| movu [dstq+wq+mmsize], m2 | |||
| %endif | |||
| mov%1 [dstq+wq] , m0 | |||
| mov%1 [dstq+wq+mmsize], m2 | |||
| add wq, 2*mmsize | |||
| jl %%.loop | |||
| %%.end: | |||
| @@ -81,7 +69,7 @@ SECTION_TEXT | |||
| INIT_MMX mmx | |||
| cglobal add_int16, 4,4,5, dst, src, mask, w | |||
| ADD_INT16_LOOP 1 | |||
| ADD_INT16_LOOP a | |||
| INIT_XMM sse2 | |||
| cglobal add_int16, 4,4,5, dst, src, mask, w | |||
| @@ -89,11 +77,11 @@ cglobal add_int16, 4,4,5, dst, src, mask, w | |||
| jnz .unaligned | |||
| test dstq, mmsize-1 | |||
| jnz .unaligned | |||
| ADD_INT16_LOOP 1 | |||
| ADD_INT16_LOOP a | |||
| .unaligned: | |||
| ADD_INT16_LOOP 0 | |||
| ADD_INT16_LOOP u | |||
| %macro DIFF_INT16_LOOP 1 ; %1 = is_aligned | |||
| %macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned) | |||
| movd m4, maskd | |||
| SPLATW m4, m4 | |||
| add wq, wq | |||
| @@ -114,28 +102,16 @@ cglobal add_int16, 4,4,5, dst, src, mask, w | |||
| neg wq | |||
| jz %%.end | |||
| %%.loop: | |||
| %if %1 | |||
| mova m0, [src1q+wq] | |||
| mova m1, [src2q+wq] | |||
| mova m2, [src1q+wq+mmsize] | |||
| mova m3, [src2q+wq+mmsize] | |||
| %else | |||
| movu m0, [src1q+wq] | |||
| movu m1, [src2q+wq] | |||
| movu m2, [src1q+wq+mmsize] | |||
| movu m3, [src2q+wq+mmsize] | |||
| %endif | |||
| mov%1 m0, [src1q+wq] | |||
| mov%1 m1, [src2q+wq] | |||
| mov%1 m2, [src1q+wq+mmsize] | |||
| mov%1 m3, [src2q+wq+mmsize] | |||
| psubw m0, m1 | |||
| psubw m2, m3 | |||
| pand m0, m4 | |||
| pand m2, m4 | |||
| %if %1 | |||
| mova [dstq+wq] , m0 | |||
| mova [dstq+wq+mmsize], m2 | |||
| %else | |||
| movu [dstq+wq] , m0 | |||
| movu [dstq+wq+mmsize], m2 | |||
| %endif | |||
| mov%1 [dstq+wq] , m0 | |||
| mov%1 [dstq+wq+mmsize], m2 | |||
| add wq, 2*mmsize | |||
| jl %%.loop | |||
| %%.end: | |||
| @@ -144,7 +120,7 @@ cglobal add_int16, 4,4,5, dst, src, mask, w | |||
| INIT_MMX mmx | |||
| cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w | |||
| DIFF_INT16_LOOP 1 | |||
| DIFF_INT16_LOOP a | |||
| INIT_XMM sse2 | |||
| cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w | |||
| @@ -154,22 +130,18 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w | |||
| jnz .unaligned | |||
| test dstq, mmsize-1 | |||
| jnz .unaligned | |||
| DIFF_INT16_LOOP 1 | |||
| DIFF_INT16_LOOP a | |||
| .unaligned: | |||
| DIFF_INT16_LOOP 0 | |||
| DIFF_INT16_LOOP u | |||
| %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | |||
| %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u) | |||
| add wq, wq | |||
| add srcq, wq | |||
| add dstq, wq | |||
| neg wq | |||
| %%.loop: | |||
| %if %2 | |||
| mova m1, [srcq+wq] | |||
| %else | |||
| movu m1, [srcq+wq] | |||
| %endif | |||
| mov%2 m1, [srcq+wq] | |||
| mova m2, m1 | |||
| pslld m1, 16 | |||
| paddw m1, m2 | |||
| @@ -185,7 +157,7 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w | |||
| %endif | |||
| paddw m0, m1 | |||
| pand m0, m7 | |||
| %if %1 | |||
| %ifidn %1, a | |||
| mova [dstq+wq], m0 | |||
| %else | |||
| movq [dstq+wq], m0 | |||
| @@ -214,7 +186,7 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left | |||
| psllq m0, 48 | |||
| movd m7, maskm | |||
| SPLATW m7 ,m7 | |||
| ADD_HFYU_LEFT_LOOP_INT16 1, 1 | |||
| ADD_HFYU_LEFT_LOOP_INT16 a, a | |||
| INIT_XMM sse4 | |||
| cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left | |||
| @@ -229,11 +201,11 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left | |||
| jnz .src_unaligned | |||
| test dstq, 15 | |||
| jnz .dst_unaligned | |||
| ADD_HFYU_LEFT_LOOP_INT16 1, 1 | |||
| ADD_HFYU_LEFT_LOOP_INT16 a, a | |||
| .dst_unaligned: | |||
| ADD_HFYU_LEFT_LOOP_INT16 0, 1 | |||
| ADD_HFYU_LEFT_LOOP_INT16 u, a | |||
| .src_unaligned: | |||
| ADD_HFYU_LEFT_LOOP_INT16 0, 0 | |||
| ADD_HFYU_LEFT_LOOP_INT16 u, u | |||
| ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) | |||
| INIT_MMX mmxext | |||