| @@ -156,7 +156,7 @@ INIT_MMX mmx | |||||
| %define ABS2 ABS2_MMX | %define ABS2 ABS2_MMX | ||||
| AC3_MAX_MSB_ABS_INT16 or_abs | AC3_MAX_MSB_ABS_INT16 or_abs | ||||
| INIT_MMX mmx2 | INIT_MMX mmx2 | ||||
| %define ABS2 ABS2_MMX2 | |||||
| %define ABS2 ABS2_MMXEXT | |||||
| AC3_MAX_MSB_ABS_INT16 min_max | AC3_MAX_MSB_ABS_INT16 min_max | ||||
| INIT_XMM sse2 | INIT_XMM sse2 | ||||
| AC3_MAX_MSB_ABS_INT16 min_max | AC3_MAX_MSB_ABS_INT16 min_max | ||||
| @@ -430,7 +430,7 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui | |||||
| "mov" #size " " #b ", " #temp " \n\t"\ | "mov" #size " " #b ", " #temp " \n\t"\ | ||||
| "pavgusb " #temp ", " #a " \n\t"\ | "pavgusb " #temp ", " #a " \n\t"\ | ||||
| "mov" #size " " #a ", " #b " \n\t" | "mov" #size " " #a ", " #b " \n\t" | ||||
| #define AVG_MMX2_OP(a,b,temp, size) \ | |||||
| #define AVG_MMXEXT_OP(a, b, temp, size) \ | |||||
| "mov" #size " " #b ", " #temp " \n\t"\ | "mov" #size " " #b ", " #temp " \n\t"\ | ||||
| "pavgb " #temp ", " #a " \n\t"\ | "pavgb " #temp ", " #a " \n\t"\ | ||||
| "mov" #size " " #a ", " #b " \n\t" | "mov" #size " " #a ", " #b " \n\t" | ||||
| @@ -439,7 +439,7 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui | |||||
| #if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
| QPEL_CAVS(put_, PUT_OP, mmx2) | QPEL_CAVS(put_, PUT_OP, mmx2) | ||||
| QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2) | |||||
| QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2) | |||||
| CAVS_MC(put_, 8, mmx2) | CAVS_MC(put_, 8, mmx2) | ||||
| CAVS_MC(put_, 16,mmx2) | CAVS_MC(put_, 16,mmx2) | ||||
| @@ -923,7 +923,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||||
| "packuswb %%mm5, %%mm5 \n\t" \ | "packuswb %%mm5, %%mm5 \n\t" \ | ||||
| OP(%%mm5, out, %%mm7, d) | OP(%%mm5, out, %%mm7, d) | ||||
| #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW) \ | |||||
| #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \ | |||||
| static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ | static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ | ||||
| uint8_t *src, \ | uint8_t *src, \ | ||||
| int dstStride, \ | int dstStride, \ | ||||
| @@ -991,7 +991,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ | |||||
| "psraw $5, %%mm3 \n\t" \ | "psraw $5, %%mm3 \n\t" \ | ||||
| "movq %5, %%mm1 \n\t" \ | "movq %5, %%mm1 \n\t" \ | ||||
| "packuswb %%mm3, %%mm1 \n\t" \ | "packuswb %%mm3, %%mm1 \n\t" \ | ||||
| OP_MMX2(%%mm1, (%1), %%mm4, q) \ | |||||
| OP_MMXEXT(%%mm1, (%1), %%mm4, q) \ | |||||
| /* mm0 = GHIJ, mm2 = FGHI, mm5 = HIJK, mm6 = IJKL, mm7 = 0 */ \ | /* mm0 = GHIJ, mm2 = FGHI, mm5 = HIJK, mm6 = IJKL, mm7 = 0 */ \ | ||||
| \ | \ | ||||
| "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */ \ | "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */ \ | ||||
| @@ -1038,7 +1038,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ | |||||
| "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */ \ | "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */ \ | ||||
| "psraw $5, %%mm4 \n\t" \ | "psraw $5, %%mm4 \n\t" \ | ||||
| "packuswb %%mm4, %%mm0 \n\t" \ | "packuswb %%mm4, %%mm0 \n\t" \ | ||||
| OP_MMX2(%%mm0, 8(%1), %%mm4, q) \ | |||||
| OP_MMXEXT(%%mm0, 8(%1), %%mm4, q) \ | |||||
| \ | \ | ||||
| "add %3, %0 \n\t" \ | "add %3, %0 \n\t" \ | ||||
| "add %4, %1 \n\t" \ | "add %4, %1 \n\t" \ | ||||
| @@ -1175,7 +1175,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ | |||||
| "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */ \ | "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */ \ | ||||
| "psraw $5, %%mm3 \n\t" \ | "psraw $5, %%mm3 \n\t" \ | ||||
| "packuswb %%mm3, %%mm0 \n\t" \ | "packuswb %%mm3, %%mm0 \n\t" \ | ||||
| OP_MMX2(%%mm0, (%1), %%mm4, q) \ | |||||
| OP_MMXEXT(%%mm0, (%1), %%mm4, q) \ | |||||
| \ | \ | ||||
| "add %3, %0 \n\t" \ | "add %3, %0 \n\t" \ | ||||
| "add %4, %1 \n\t" \ | "add %4, %1 \n\t" \ | ||||
| @@ -1744,19 +1744,19 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \ | |||||
| "pavgusb "#temp", "#a" \n\t" \ | "pavgusb "#temp", "#a" \n\t" \ | ||||
| "mov"#size" "#a", "#b" \n\t" | "mov"#size" "#a", "#b" \n\t" | ||||
| #define AVG_MMX2_OP(a, b, temp, size) \ | |||||
| #define AVG_MMXEXT_OP(a, b, temp, size) \ | |||||
| "mov"#size" "#b", "#temp" \n\t" \ | "mov"#size" "#b", "#temp" \n\t" \ | ||||
| "pavgb "#temp", "#a" \n\t" \ | "pavgb "#temp", "#a" \n\t" \ | ||||
| "mov"#size" "#a", "#b" \n\t" | "mov"#size" "#a", "#b" \n\t" | ||||
| QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP) | QPEL_BASE(put_, ff_pw_16, _, PUT_OP, PUT_OP) | ||||
| QPEL_BASE(avg_, ff_pw_16, _, AVG_MMX2_OP, AVG_3DNOW_OP) | |||||
| QPEL_BASE(avg_, ff_pw_16, _, AVG_MMXEXT_OP, AVG_3DNOW_OP) | |||||
| QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) | QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) | ||||
| QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) | QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) | ||||
| QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) | QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) | ||||
| QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) | QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) | ||||
| QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) | QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) | ||||
| QPEL_OP(avg_, ff_pw_16, _, AVG_MMX2_OP, mmx2) | |||||
| QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2) | |||||
| QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) | QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) | ||||
| /***********************************/ | /***********************************/ | ||||
| @@ -112,7 +112,7 @@ SECTION .text | |||||
| movd %3, %1 | movd %3, %1 | ||||
| %endmacro | %endmacro | ||||
| %macro HSUM_MMX2 3 | |||||
| %macro HSUM_MMXEXT 3 | |||||
| pshufw %2, %1, 0xE | pshufw %2, %1, 0xE | ||||
| paddusw %1, %2 | paddusw %1, %2 | ||||
| pshufw %2, %1, 0x1 | pshufw %2, %1, 0x1 | ||||
| @@ -263,12 +263,12 @@ INIT_MMX | |||||
| %define HSUM HSUM_MMX | %define HSUM HSUM_MMX | ||||
| HADAMARD8_DIFF_MMX mmx | HADAMARD8_DIFF_MMX mmx | ||||
| %define ABS1 ABS1_MMX2 | |||||
| %define HSUM HSUM_MMX2 | |||||
| %define ABS1 ABS1_MMXEXT | |||||
| %define HSUM HSUM_MMXEXT | |||||
| HADAMARD8_DIFF_MMX mmx2 | HADAMARD8_DIFF_MMX mmx2 | ||||
| INIT_XMM | INIT_XMM | ||||
| %define ABS2 ABS2_MMX2 | |||||
| %define ABS2 ABS2_MMXEXT | |||||
| %if ARCH_X86_64 | %if ARCH_X86_64 | ||||
| %define ABS_SUM_8x8 ABS_SUM_8x8_64 | %define ABS_SUM_8x8 ABS_SUM_8x8_64 | ||||
| %else | %else | ||||
| @@ -888,7 +888,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c | |||||
| "pxor " #z ", " #a " \n\t"\ | "pxor " #z ", " #a " \n\t"\ | ||||
| "psubw " #z ", " #a " \n\t" | "psubw " #z ", " #a " \n\t" | ||||
| #define MMABS_MMX2(a,z)\ | |||||
| #define MMABS_MMXEXT(a, z) \ | |||||
| "pxor " #z ", " #z " \n\t"\ | "pxor " #z ", " #z " \n\t"\ | ||||
| "psubw " #a ", " #z " \n\t"\ | "psubw " #a ", " #z " \n\t"\ | ||||
| "pmaxsw " #z ", " #a " \n\t" | "pmaxsw " #z ", " #a " \n\t" | ||||
| @@ -912,7 +912,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, c | |||||
| "paddusw "#t", "#a" \n\t"\ | "paddusw "#t", "#a" \n\t"\ | ||||
| "movd "#a", "#dst" \n\t"\ | "movd "#a", "#dst" \n\t"\ | ||||
| #define HSUM_MMX2(a, t, dst)\ | |||||
| #define HSUM_MMXEXT(a, t, dst) \ | |||||
| "pshufw $0x0E, "#a", "#t" \n\t"\ | "pshufw $0x0E, "#a", "#t" \n\t"\ | ||||
| "paddusw "#t", "#a" \n\t"\ | "paddusw "#t", "#a" \n\t"\ | ||||
| "pshufw $0x01, "#a", "#t" \n\t"\ | "pshufw $0x01, "#a", "#t" \n\t"\ | ||||
| @@ -974,8 +974,8 @@ DCT_SAD_FUNC(mmx) | |||||
| #undef MMABS | #undef MMABS | ||||
| #undef HSUM | #undef HSUM | ||||
| #define HSUM(a,t,dst) HSUM_MMX2(a,t,dst) | |||||
| #define MMABS(a,z) MMABS_MMX2(a,z) | |||||
| #define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) | |||||
| #define MMABS(a,z) MMABS_MMXEXT(a,z) | |||||
| DCT_SAD_FUNC(mmx2) | DCT_SAD_FUNC(mmx2) | ||||
| #undef HSUM | #undef HSUM | ||||
| #undef DCT_SAD | #undef DCT_SAD | ||||
| @@ -246,7 +246,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 | |||||
| IDCT8_ADD_SSE r0, r1, r2, r3 | IDCT8_ADD_SSE r0, r1, r2, r3 | ||||
| RET | RET | ||||
| %macro DC_ADD_MMX2_INIT 2-3 | |||||
| %macro DC_ADD_MMXEXT_INIT 2-3 | |||||
| %if %0 == 2 | %if %0 == 2 | ||||
| movsx %1, word [%1] | movsx %1, word [%1] | ||||
| add %1, 32 | add %1, 32 | ||||
| @@ -266,7 +266,7 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 | |||||
| packuswb m1, m1 | packuswb m1, m1 | ||||
| %endmacro | %endmacro | ||||
| %macro DC_ADD_MMX2_OP 4 | |||||
| %macro DC_ADD_MMXEXT_OP 4 | |||||
| %1 m2, [%2 ] | %1 m2, [%2 ] | ||||
| %1 m3, [%2+%3 ] | %1 m3, [%2+%3 ] | ||||
| %1 m4, [%2+%3*2] | %1 m4, [%2+%3*2] | ||||
| @@ -288,16 +288,16 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10 | |||||
| INIT_MMX | INIT_MMX | ||||
| ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ||||
| cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0 | cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0 | ||||
| DC_ADD_MMX2_INIT r1, r2 | |||||
| DC_ADD_MMX2_OP movh, r0, r2, r1 | |||||
| DC_ADD_MMXEXT_INIT r1, r2 | |||||
| DC_ADD_MMXEXT_OP movh, r0, r2, r1 | |||||
| RET | RET | ||||
| ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) | ||||
| cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0 | cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0 | ||||
| DC_ADD_MMX2_INIT r1, r2 | |||||
| DC_ADD_MMX2_OP mova, r0, r2, r1 | |||||
| DC_ADD_MMXEXT_INIT r1, r2 | |||||
| DC_ADD_MMXEXT_OP mova, r0, r2, r1 | |||||
| lea r0, [r0+r2*4] | lea r0, [r0+r2*4] | ||||
| DC_ADD_MMX2_OP mova, r0, r2, r1 | |||||
| DC_ADD_MMXEXT_OP mova, r0, r2, r1 | |||||
| RET | RET | ||||
| ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, | ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, | ||||
| @@ -371,14 +371,14 @@ cglobal h264_idct_add16_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s | |||||
| movsx r6, word [r2] | movsx r6, word [r2] | ||||
| test r6, r6 | test r6, r6 | ||||
| jz .no_dc | jz .no_dc | ||||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||||
| DC_ADD_MMXEXT_INIT r2, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| %define dst2q r1 | %define dst2q r1 | ||||
| %define dst2d r1d | %define dst2d r1d | ||||
| %endif | %endif | ||||
| mov dst2d, dword [r1+r5*4] | mov dst2d, dword [r1+r5*4] | ||||
| lea dst2q, [r0+dst2q] | lea dst2q, [r0+dst2q] | ||||
| DC_ADD_MMX2_OP movh, dst2q, r3, r6 | |||||
| DC_ADD_MMXEXT_OP movh, dst2q, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| mov r1, r1m | mov r1, r1m | ||||
| %endif | %endif | ||||
| @@ -445,14 +445,14 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo | |||||
| movsx r6, word [r2] | movsx r6, word [r2] | ||||
| test r6, r6 | test r6, r6 | ||||
| jz .skipblock | jz .skipblock | ||||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||||
| DC_ADD_MMXEXT_INIT r2, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| %define dst2q r1 | %define dst2q r1 | ||||
| %define dst2d r1d | %define dst2d r1d | ||||
| %endif | %endif | ||||
| mov dst2d, dword [r1+r5*4] | mov dst2d, dword [r1+r5*4] | ||||
| add dst2q, r0 | add dst2q, r0 | ||||
| DC_ADD_MMX2_OP movh, dst2q, r3, r6 | |||||
| DC_ADD_MMXEXT_OP movh, dst2q, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| mov r1, r1m | mov r1, r1m | ||||
| %endif | %endif | ||||
| @@ -483,16 +483,16 @@ cglobal h264_idct8_add4_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, s | |||||
| movsx r6, word [r2] | movsx r6, word [r2] | ||||
| test r6, r6 | test r6, r6 | ||||
| jz .no_dc | jz .no_dc | ||||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||||
| DC_ADD_MMXEXT_INIT r2, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| %define dst2q r1 | %define dst2q r1 | ||||
| %define dst2d r1d | %define dst2d r1d | ||||
| %endif | %endif | ||||
| mov dst2d, dword [r1+r5*4] | mov dst2d, dword [r1+r5*4] | ||||
| lea dst2q, [r0+dst2q] | lea dst2q, [r0+dst2q] | ||||
| DC_ADD_MMX2_OP mova, dst2q, r3, r6 | |||||
| DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 | |||||
| lea dst2q, [dst2q+r3*4] | lea dst2q, [dst2q+r3*4] | ||||
| DC_ADD_MMX2_OP mova, dst2q, r3, r6 | |||||
| DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| mov r1, r1m | mov r1, r1m | ||||
| %endif | %endif | ||||
| @@ -541,16 +541,16 @@ cglobal h264_idct8_add4_8_sse2, 5, 8 + npicregs, 10, dst1, block_offset, block, | |||||
| test r6, r6 | test r6, r6 | ||||
| jz .no_dc | jz .no_dc | ||||
| INIT_MMX | INIT_MMX | ||||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||||
| DC_ADD_MMXEXT_INIT r2, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| %define dst2q r1 | %define dst2q r1 | ||||
| %define dst2d r1d | %define dst2d r1d | ||||
| %endif | %endif | ||||
| mov dst2d, dword [r1+r5*4] | mov dst2d, dword [r1+r5*4] | ||||
| add dst2q, r0 | add dst2q, r0 | ||||
| DC_ADD_MMX2_OP mova, dst2q, r3, r6 | |||||
| DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 | |||||
| lea dst2q, [dst2q+r3*4] | lea dst2q, [dst2q+r3*4] | ||||
| DC_ADD_MMX2_OP mova, dst2q, r3, r6 | |||||
| DC_ADD_MMXEXT_OP mova, dst2q, r3, r6 | |||||
| %if ARCH_X86_64 == 0 | %if ARCH_X86_64 == 0 | ||||
| mov r1, r1m | mov r1, r1m | ||||
| %endif | %endif | ||||
| @@ -644,7 +644,7 @@ h264_idct_add8_mmx2_plane: | |||||
| movsx r6, word [r2] | movsx r6, word [r2] | ||||
| test r6, r6 | test r6, r6 | ||||
| jz .skipblock | jz .skipblock | ||||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||||
| DC_ADD_MMXEXT_INIT r2, r3, r6 | |||||
| %if ARCH_X86_64 | %if ARCH_X86_64 | ||||
| mov r0d, dword [r1+r5*4] | mov r0d, dword [r1+r5*4] | ||||
| add r0, [dst2q] | add r0, [dst2q] | ||||
| @@ -653,7 +653,7 @@ h264_idct_add8_mmx2_plane: | |||||
| mov r0, [r0] | mov r0, [r0] | ||||
| add r0, dword [r1+r5*4] | add r0, dword [r1+r5*4] | ||||
| %endif | %endif | ||||
| DC_ADD_MMX2_OP movh, r0, r3, r6 | |||||
| DC_ADD_MMXEXT_OP movh, r0, r3, r6 | |||||
| .skipblock: | .skipblock: | ||||
| inc r5 | inc r5 | ||||
| add r2, 32 | add r2, 32 | ||||
| @@ -697,7 +697,7 @@ h264_idct_dc_add8_mmx2: | |||||
| pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D | pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D | ||||
| punpcklwd m0, m0 ; d d d d D D D D | punpcklwd m0, m0 ; d d d d D D D D | ||||
| lea r6, [r3*3] | lea r6, [r3*3] | ||||
| DC_ADD_MMX2_OP movq, r0, r3, r6 | |||||
| DC_ADD_MMXEXT_OP movq, r0, r3, r6 | |||||
| ret | ret | ||||
| ALIGN 16 | ALIGN 16 | ||||
| @@ -1169,18 +1169,18 @@ QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) | |||||
| #undef PAVGB | #undef PAVGB | ||||
| #define PAVGB "pavgb" | #define PAVGB "pavgb" | ||||
| QPEL_H264(put_, PUT_OP, mmx2) | QPEL_H264(put_, PUT_OP, mmx2) | ||||
| QPEL_H264(avg_, AVG_MMX2_OP, mmx2) | |||||
| QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2) | |||||
| QPEL_H264_V_XMM(put_, PUT_OP, sse2) | QPEL_H264_V_XMM(put_, PUT_OP, sse2) | ||||
| QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2) | |||||
| QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) | |||||
| QPEL_H264_HV_XMM(put_, PUT_OP, sse2) | QPEL_H264_HV_XMM(put_, PUT_OP, sse2) | ||||
| QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2) | |||||
| QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2) | |||||
| #if HAVE_SSSE3_INLINE | #if HAVE_SSSE3_INLINE | ||||
| QPEL_H264_H_XMM(put_, PUT_OP, ssse3) | QPEL_H264_H_XMM(put_, PUT_OP, ssse3) | ||||
| QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3) | |||||
| QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3) | |||||
| QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3) | QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3) | ||||
| QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3) | |||||
| QPEL_H264_HV2_XMM(avg_,AVG_MMXEXT_OP, ssse3) | |||||
| QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) | QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) | ||||
| QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3) | |||||
| QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) | |||||
| #endif | #endif | ||||
| #undef PAVGB | #undef PAVGB | ||||
| @@ -268,7 +268,7 @@ cglobal vc1_h_loop_filter8_%1, 3,5,0 | |||||
| RET | RET | ||||
| %endmacro | %endmacro | ||||
| %define PABSW PABSW_MMX2 | |||||
| %define PABSW PABSW_MMXEXT | |||||
| VC1_LF_MMX mmx2 | VC1_LF_MMX mmx2 | ||||
| INIT_XMM | INIT_XMM | ||||
| @@ -157,7 +157,7 @@ | |||||
| psubw %1, %2 | psubw %1, %2 | ||||
| %endmacro | %endmacro | ||||
| %macro PABSW_MMX2 2 | |||||
| %macro PABSW_MMXEXT 2 | |||||
| pxor %1, %1 | pxor %1, %1 | ||||
| psubw %1, %2 | psubw %1, %2 | ||||
| pmaxsw %1, %2 | pmaxsw %1, %2 | ||||
| @@ -189,13 +189,13 @@ | |||||
| psubw %2, %4 | psubw %2, %4 | ||||
| %endmacro | %endmacro | ||||
| %macro ABS1_MMX2 2 ; a, tmp | |||||
| %macro ABS1_MMXEXT 2 ; a, tmp | |||||
| pxor %2, %2 | pxor %2, %2 | ||||
| psubw %2, %1 | psubw %2, %1 | ||||
| pmaxsw %1, %2 | pmaxsw %1, %2 | ||||
| %endmacro | %endmacro | ||||
| %macro ABS2_MMX2 4 ; a, b, tmp0, tmp1 | |||||
| %macro ABS2_MMXEXT 4 ; a, b, tmp0, tmp1 | |||||
| pxor %3, %3 | pxor %3, %3 | ||||
| pxor %4, %4 | pxor %4, %4 | ||||
| psubw %3, %1 | psubw %3, %1 | ||||
| @@ -519,7 +519,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, | |||||
| "cmp "#dstw", "#index" \n\t"\ | "cmp "#dstw", "#index" \n\t"\ | ||||
| " jb 1b \n\t" | " jb 1b \n\t" | ||||
| #define WRITEBGR24MMX2(dst, dstw, index) \ | |||||
| #define WRITEBGR24MMXEXT(dst, dstw, index) \ | |||||
| /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | ||||
| "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ | "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\ | ||||
| "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ | "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\ | ||||
| @@ -569,7 +569,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, | |||||
| #if COMPILE_TEMPLATE_MMXEXT | #if COMPILE_TEMPLATE_MMXEXT | ||||
| #undef WRITEBGR24 | #undef WRITEBGR24 | ||||
| #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX2(dst, dstw, index) | |||||
| #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index) | |||||
| #else | #else | ||||
| #undef WRITEBGR24 | #undef WRITEBGR24 | ||||
| #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) | #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) | ||||
| @@ -1411,7 +1411,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | |||||
| PREFETCH" 64(%%"REG_c") \n\t" | PREFETCH" 64(%%"REG_c") \n\t" | ||||
| #if ARCH_X86_64 | #if ARCH_X86_64 | ||||
| #define CALL_MMX2_FILTER_CODE \ | |||||
| #define CALL_MMXEXT_FILTER_CODE \ | |||||
| "movl (%%"REG_b"), %%esi \n\t"\ | "movl (%%"REG_b"), %%esi \n\t"\ | ||||
| "call *%4 \n\t"\ | "call *%4 \n\t"\ | ||||
| "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ | "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ | ||||
| @@ -1420,7 +1420,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | |||||
| "xor %%"REG_a", %%"REG_a" \n\t"\ | "xor %%"REG_a", %%"REG_a" \n\t"\ | ||||
| #else | #else | ||||
| #define CALL_MMX2_FILTER_CODE \ | |||||
| #define CALL_MMXEXT_FILTER_CODE \ | |||||
| "movl (%%"REG_b"), %%esi \n\t"\ | "movl (%%"REG_b"), %%esi \n\t"\ | ||||
| "call *%4 \n\t"\ | "call *%4 \n\t"\ | ||||
| "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ | "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ | ||||
| @@ -1429,14 +1429,14 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | |||||
| #endif /* ARCH_X86_64 */ | #endif /* ARCH_X86_64 */ | ||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| #if defined(PIC) | #if defined(PIC) | ||||
| "mov %5, %%"REG_b" \n\t" | "mov %5, %%"REG_b" \n\t" | ||||
| @@ -1506,10 +1506,10 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||||
| PREFETCH" 32(%%"REG_c") \n\t" | PREFETCH" 32(%%"REG_c") \n\t" | ||||
| PREFETCH" 64(%%"REG_c") \n\t" | PREFETCH" 64(%%"REG_c") \n\t" | ||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| "xor %%"REG_a", %%"REG_a" \n\t" // i | "xor %%"REG_a", %%"REG_a" \n\t" // i | ||||
| "mov %5, %%"REG_c" \n\t" // src | "mov %5, %%"REG_c" \n\t" // src | ||||
| "mov %6, %%"REG_D" \n\t" // buf2 | "mov %6, %%"REG_D" \n\t" // buf2 | ||||
| @@ -1517,10 +1517,10 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||||
| PREFETCH" 32(%%"REG_c") \n\t" | PREFETCH" 32(%%"REG_c") \n\t" | ||||
| PREFETCH" 64(%%"REG_c") \n\t" | PREFETCH" 64(%%"REG_c") \n\t" | ||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMX2_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| CALL_MMXEXT_FILTER_CODE | |||||
| #if defined(PIC) | #if defined(PIC) | ||||
| "mov %7, %%"REG_b" \n\t" | "mov %7, %%"REG_b" \n\t" | ||||