This allows combining multiple conditionals in a single statement.tags/n0.11
| @@ -541,7 +541,8 @@ print_config_mak(){ | |||
| } | |||
| print_config_asm(){ | |||
| enabled $1 && echo "%define $2" | |||
| enabled $1 && v=1 || v=0 | |||
| echo "%define $2 $v" | |||
| } | |||
| print_config(){ | |||
| @@ -69,12 +69,12 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset | |||
| %define LOOP_ALIGN | |||
| INIT_MMX | |||
| AC3_EXPONENT_MIN mmx | |||
| %ifdef HAVE_MMX2 | |||
| %if HAVE_MMX2 | |||
| %define PMINUB PMINUB_MMXEXT | |||
| %define LOOP_ALIGN ALIGN 16 | |||
| AC3_EXPONENT_MIN mmxext | |||
| %endif | |||
| %ifdef HAVE_SSE | |||
| %if HAVE_SSE | |||
| INIT_XMM | |||
| AC3_EXPONENT_MIN sse2 | |||
| %endif | |||
| @@ -367,7 +367,7 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum | |||
| pabsd %1, %1 | |||
| %endmacro | |||
| %ifdef HAVE_AMD3DNOW | |||
| %if HAVE_AMD3DNOW | |||
| INIT_MMX | |||
| cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len | |||
| add expq, lenq | |||
| @@ -439,11 +439,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len | |||
| REP_RET | |||
| %endmacro | |||
| %ifdef HAVE_SSE | |||
| %if HAVE_SSE | |||
| INIT_XMM | |||
| %define PABSD PABSD_MMX | |||
| AC3_EXTRACT_EXPONENTS sse2 | |||
| %ifdef HAVE_SSSE3 | |||
| %if HAVE_SSSE3 | |||
| %define PABSD PABSD_SSSE3 | |||
| AC3_EXTRACT_EXPONENTS ssse3 | |||
| %endif | |||
| @@ -211,7 +211,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | |||
| INIT_YMM | |||
| SECTION_TEXT | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) | |||
| cglobal dct32_float_avx, 2,3,8, out, in, tmp | |||
| ; pass 1 | |||
| @@ -289,7 +289,7 @@ INIT_XMM | |||
| %define BUTTERFLY BUTTERFLY_SSE | |||
| %define BUTTERFLY0 BUTTERFLY0_SSE | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define SPILL SWAP | |||
| %define UNSPILL SWAP | |||
| @@ -138,7 +138,7 @@ align 16 | |||
| %endif | |||
| %define t0 [v1q + orderq] | |||
| %define t1 [v1q + orderq + mmsize] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mova m8, t0 | |||
| mova m9, t1 | |||
| %define t0 m8 | |||
| @@ -474,7 +474,7 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset | |||
| movss xmm1, xmm0 | |||
| shufps xmm0, xmm0, 1 | |||
| addss xmm0, xmm1 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| movd r0m, xmm0 | |||
| fld dword r0m | |||
| %endif | |||
| @@ -498,7 +498,7 @@ cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset | |||
| ; function implementations. Fast are fixed-width, slow is variable-width | |||
| %macro EMU_EDGE_FUNC 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define w_reg r10 | |||
| cglobal emu_edge_core, 6, 7, 1 | |||
| mov r11, r5 ; save block_h | |||
| @@ -513,14 +513,14 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| mov w_reg, r7m | |||
| sub w_reg, r6m ; w = start_x - end_x | |||
| sub r5, r4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| sub r4, r3 | |||
| %else | |||
| sub r4, dword r3m | |||
| %endif | |||
| cmp w_reg, 22 | |||
| jg .slow_v_extend_loop | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| mov r2, r2m ; linesize | |||
| %endif | |||
| sal w_reg, 7 ; w * 128 | |||
| @@ -536,7 +536,7 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| ; horizontal extend (left/right) | |||
| mov w_reg, r6m ; start_x | |||
| sub r0, w_reg | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r3, r0 ; backup of buf+block_h*linesize | |||
| mov r5, r11 | |||
| %else | |||
| @@ -564,7 +564,7 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| ; now r3(64)/r0(32)=buf,r2=linesize,r11/r5=block_h,r6/r3=val, r10/r6=end_x, r1=block_w | |||
| .right_extend: | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| mov r0, r0m | |||
| mov r5, r5m | |||
| %endif | |||
| @@ -589,13 +589,13 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| .h_extend_end: | |||
| RET | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define vall al | |||
| %define valh ah | |||
| %define valw ax | |||
| %define valw2 r10w | |||
| %define valw3 r3w | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| %define valw4 r4w | |||
| %else ; unix64 | |||
| %define valw4 r3w | |||
| @@ -643,7 +643,7 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| %endrep ; %2/16 | |||
| %endif | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %if (%2-%%src_off) == 8 | |||
| mov rax, [r1+%%src_off] | |||
| %assign %%src_off %%src_off+8 | |||
| @@ -692,7 +692,7 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| %endrep ; %2/16 | |||
| %endif | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %if (%2-%%dst_off) == 8 | |||
| mov [r0+%%dst_off], rax | |||
| %assign %%dst_off %%dst_off+8 | |||
| @@ -740,7 +740,7 @@ cglobal emu_edge_core, 2, 7, 0 | |||
| ALIGN 128 | |||
| .emuedge_v_extend_ %+ %%n: | |||
| ; extend pixels above body | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| test r3 , r3 ; if (!start_y) | |||
| jz .emuedge_copy_body_ %+ %%n %+ _loop ; goto body | |||
| %else ; ARCH_X86_32 | |||
| @@ -751,7 +751,7 @@ ALIGN 128 | |||
| .emuedge_extend_top_ %+ %%n %+ _loop: ; do { | |||
| WRITE_NUM_BYTES top, %%n ; write bytes | |||
| add r0 , r2 ; dst += linesize | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| dec r3d | |||
| %else ; ARCH_X86_32 | |||
| dec dword r3m | |||
| @@ -779,7 +779,7 @@ ALIGN 128 | |||
| jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h) | |||
| .emuedge_v_extend_end_ %+ %%n: | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ret | |||
| %else ; ARCH_X86_32 | |||
| rep ret | |||
| @@ -841,7 +841,7 @@ ALIGN 64 | |||
| WRITE_V_PIXEL %%n, r0 ; write pixels | |||
| dec r5 | |||
| jnz .emuedge_extend_left_ %+ %%n ; } while (--block_h) | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ret | |||
| %else ; ARCH_X86_32 | |||
| rep ret | |||
| @@ -856,7 +856,7 @@ ALIGN 64 | |||
| %rep 11 | |||
| ALIGN 64 | |||
| .emuedge_extend_right_ %+ %%n: ; do { | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| sub r3, r2 ; dst -= linesize | |||
| READ_V_PIXEL %%n, [r3+w_reg-1] ; read pixels | |||
| WRITE_V_PIXEL %%n, r3+r4-%%n ; write pixels | |||
| @@ -868,7 +868,7 @@ ALIGN 64 | |||
| dec r5 | |||
| %endif ; ARCH_X86_64/32 | |||
| jnz .emuedge_extend_right_ %+ %%n ; } while (--block_h) | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ret | |||
| %else ; ARCH_X86_32 | |||
| rep ret | |||
| @@ -876,7 +876,7 @@ ALIGN 64 | |||
| %assign %%n %%n+2 | |||
| %endrep | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| %define stack_offset 0x10 | |||
| %endif | |||
| %endmacro ; RIGHT_EXTEND | |||
| @@ -916,7 +916,7 @@ ALIGN 64 | |||
| V_COPY_NPX %1, mm0, movq, 8, 0xFFFFFFF8 | |||
| %else ; sse | |||
| V_COPY_NPX %1, xmm0, movups, 16, 0xFFFFFFF0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define linesize r2 | |||
| V_COPY_NPX %1, rax , mov, 8 | |||
| %else ; ARCH_X86_32 | |||
| @@ -940,7 +940,7 @@ ALIGN 64 | |||
| .slow_v_extend_loop: | |||
| ; r0=buf,r1=src,r2(64)/r2m(32)=linesize,r3(64)/r3m(32)=start_x,r4=end_y,r5=block_h | |||
| ; r11(64)/r3(later-64)/r2(32)=cnt_reg,r6(64)/r3(32)=val_reg,r10(64)/r6(32)=w=end_x-start_x | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| push r11 ; save old value of block_h | |||
| test r3, r3 | |||
| %define cnt_reg r11 | |||
| @@ -956,18 +956,18 @@ ALIGN 64 | |||
| .do_body_copy: | |||
| V_COPY_ROW body, r4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| pop r11 ; restore old value of block_h | |||
| %define cnt_reg r3 | |||
| %endif | |||
| test r5, r5 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| jz .v_extend_end | |||
| %else | |||
| jz .skip_bottom_extend | |||
| %endif | |||
| V_COPY_ROW bottom, r5 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| .skip_bottom_extend: | |||
| mov r2, r2m | |||
| %endif | |||
| @@ -996,7 +996,7 @@ ALIGN 64 | |||
| .left_extend_loop_end: | |||
| dec r5 | |||
| jnz .slow_left_extend_loop | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| mov r2, r2m | |||
| %endif | |||
| jmp .right_extend | |||
| @@ -1006,7 +1006,7 @@ ALIGN 64 | |||
| .slow_right_extend_loop: | |||
| ; r3(64)/r0(32)=buf+block_h*linesize,r2=linesize,r4=block_w,r11(64)/r5(32)=block_h, | |||
| ; r10(64)/r6(32)=end_x,r6/r3=val,r1=cntr | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define buf_reg r3 | |||
| %define bh_reg r11 | |||
| %else | |||
| @@ -1047,7 +1047,7 @@ SLOW_RIGHT_EXTEND | |||
| %endmacro | |||
| emu_edge sse | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| emu_edge mmx | |||
| %endif | |||
| @@ -1138,7 +1138,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0 | |||
| %macro BUTTERFLIES_FLOAT_INTERLEAVE 0 | |||
| cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd lenq, lend | |||
| %endif | |||
| test lenq, lenq | |||
| @@ -245,7 +245,7 @@ hadamard8x8_diff_%1: | |||
| lea r0, [r3*3] | |||
| DIFF_PIXELS_8 r1, r2, 0, r3, r0, rsp+gprsize | |||
| HADAMARD8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 | |||
| %else | |||
| TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [rsp+gprsize], [rsp+mmsize+gprsize] | |||
| @@ -270,7 +270,7 @@ HADAMARD8_DIFF_MMX mmx2 | |||
| INIT_XMM | |||
| %define ABS2 ABS2_MMX2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define ABS_SUM_8x8 ABS_SUM_8x8_64 | |||
| %else | |||
| %define ABS_SUM_8x8 ABS_SUM_8x8_32 | |||
| @@ -30,7 +30,7 @@ | |||
| %include "x86inc.asm" | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define pointer resq | |||
| %else | |||
| %define pointer resd | |||
| @@ -73,7 +73,7 @@ cextern cos_ %+ i | |||
| %assign i i<<1 | |||
| %endrep | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define pointer dq | |||
| %else | |||
| %define pointer dd | |||
| @@ -299,7 +299,7 @@ IF%1 mova Z(1), m5 | |||
| INIT_YMM | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| align 16 | |||
| fft8_avx: | |||
| mova m0, Z(0) | |||
| @@ -534,7 +534,7 @@ DEFINE_ARGS z, w, n, o1, o3 | |||
| INIT_YMM | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| %macro INTERL_AVX 5 | |||
| vunpckhps %3, %2, %1 | |||
| vunpcklps %2, %2, %1 | |||
| @@ -638,7 +638,7 @@ cglobal fft_dispatch%3%2, 2,5,8, z, nbits | |||
| RET | |||
| %endmacro ; DECL_FFT | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_YMM | |||
| DECL_FFT 6, _avx | |||
| DECL_FFT 6, _avx, _interleave | |||
| @@ -750,7 +750,7 @@ INIT_XMM | |||
| %macro DECL_IMDCT 2 | |||
| cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define rrevtab r10 | |||
| %define rtcos r11 | |||
| %define rtsin r12 | |||
| @@ -769,24 +769,24 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * | |||
| mov rtsin, [r0+FFTContext.tsin] | |||
| add rtcos, r3 | |||
| add rtsin, r3 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| push rtcos | |||
| push rtsin | |||
| %endif | |||
| shr r3, 1 | |||
| mov rrevtab, [r0+FFTContext.revtab] | |||
| add rrevtab, r3 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| push rrevtab | |||
| %endif | |||
| sub r3, 4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| xor r4, r4 | |||
| sub r4, r3 | |||
| %endif | |||
| .pre: | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| ;unspill | |||
| xor r4, r4 | |||
| sub r4, r3 | |||
| @@ -795,7 +795,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * | |||
| %endif | |||
| PREROTATER r4, r3, r2, rtcos, rtsin | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movzx r5, word [rrevtab+r4-4] | |||
| movzx r6, word [rrevtab+r4-2] | |||
| movzx r13, word [rrevtab+r3] | |||
| @@ -829,7 +829,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * | |||
| mov r0d, [r5+FFTContext.mdctsize] | |||
| add r6, r0 | |||
| shr r0, 1 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| %define rtcos r2 | |||
| %define rtsin r3 | |||
| mov rtcos, [esp+8] | |||
| @@ -839,7 +839,7 @@ cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample * | |||
| mov r1, -mmsize | |||
| sub r1, r0 | |||
| %2 r0, r1, r6, rtcos, rtsin | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| pop r14 | |||
| pop r13 | |||
| pop r12 | |||
| @@ -856,6 +856,6 @@ DECL_IMDCT _sse, POSROTATESHUF | |||
| INIT_YMM | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| DECL_IMDCT _avx, POSROTATESHUF_AVX | |||
| %endif | |||
| @@ -28,14 +28,14 @@ SECTION_TEXT | |||
| ; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len); | |||
| ;--------------------------------------------------------------------------------- | |||
| %macro INT32_TO_FLOAT_FMUL_SCALAR 2 | |||
| %ifdef UNIX64 | |||
| %if UNIX64 | |||
| cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len | |||
| %else | |||
| cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len | |||
| %endif | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| SWAP 0, 2 | |||
| %elifdef ARCH_X86_32 | |||
| %elif ARCH_X86_32 | |||
| movss m0, mulm | |||
| %endif | |||
| SPLATD m0 | |||
| @@ -180,7 +180,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 | |||
| %macro FLOAT_TO_INT16_INTERLEAVE6 1 | |||
| ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) | |||
| cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define lend r10d | |||
| mov lend, r2d | |||
| %else | |||
| @@ -241,7 +241,7 @@ FLOAT_TO_INT16_INTERLEAVE6 3dn2 | |||
| %macro FLOAT_INTERLEAVE6 2 | |||
| cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define lend r10d | |||
| mov lend, r2d | |||
| %else | |||
| @@ -94,7 +94,7 @@ SECTION .text | |||
| ; put/avg_h264_chroma_mc8_mmx_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, | |||
| ; int stride, int h, int mx, int my) | |||
| cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif | |||
| mov r6d, r5d | |||
| @@ -113,7 +113,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 | |||
| %define rnd_1d_rv40 rnd_rv40_1d_tbl | |||
| %define rnd_2d_rv40 rnd_rv40_2d_tbl | |||
| %endif | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r5 | |||
| and r10, 6 ; &~1 for mx/my=[0,7] | |||
| lea r10, [r10*4+r4] | |||
| @@ -147,7 +147,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 | |||
| %ifdef PIC | |||
| lea r11, [rnd_rv40_1d_tbl] | |||
| %endif | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r5, r0m | |||
| %endif | |||
| %endif | |||
| @@ -198,7 +198,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 | |||
| %ifdef PIC | |||
| lea r11, [rnd_rv40_2d_tbl] | |||
| %endif | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r5, r0m | |||
| %endif | |||
| %endif | |||
| @@ -279,7 +279,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 0 | |||
| %macro chroma_mc4_mmx_func 3 | |||
| cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif | |||
| pxor m7, m7 | |||
| @@ -364,7 +364,7 @@ cglobal %1_%2_chroma_mc4_%3, 6, 6, 0 | |||
| %macro chroma_mc2_mmx_func 3 | |||
| cglobal %1_%2_chroma_mc2_%3, 6, 7, 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif | |||
| @@ -452,7 +452,7 @@ chroma_mc4_mmx_func avg, rv40, 3dnow | |||
| %macro chroma_mc8_ssse3_func 3 | |||
| cglobal %1_%2_chroma_mc8_%3, 6, 7, 8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif | |||
| mov r6d, r5d | |||
| @@ -600,7 +600,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8 | |||
| %macro chroma_mc4_ssse3_func 3 | |||
| cglobal %1_%2_chroma_mc4_%3, 6, 7, 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif | |||
| mov r6, r4 | |||
| @@ -252,7 +252,7 @@ cglobal %1_h264_chroma_mc2_10_%2, 6,7 | |||
| %define CHROMAMC_AVG NOTHING | |||
| INIT_XMM | |||
| CHROMA_MC8 put, sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| CHROMA_MC8 put, avx | |||
| %endif | |||
| @@ -264,7 +264,7 @@ CHROMA_MC2 put, mmxext | |||
| %define PAVG pavgw | |||
| INIT_XMM | |||
| CHROMA_MC8 avg, sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| CHROMA_MC8 avg, avx | |||
| %endif | |||
| @@ -200,7 +200,7 @@ cextern pb_A1 | |||
| ; out: %4 = |%1-%2|>%3 | |||
| ; clobbers: %5 | |||
| %macro DIFF_GT2 5 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| psubusb %5, %2, %1 | |||
| psubusb %4, %1, %2 | |||
| %else | |||
| @@ -278,7 +278,7 @@ cextern pb_A1 | |||
| mova %4, %2 | |||
| %endmacro | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ;----------------------------------------------------------------------------- | |||
| ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -333,7 +333,7 @@ cglobal deblock_h_luma_8_%1, 5,7 | |||
| lea r11, [r10+r10*2] | |||
| lea r6, [r0-4] | |||
| lea r5, [r0-4+r11] | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| sub rsp, 0x98 | |||
| %define pix_tmp rsp+0x30 | |||
| %else | |||
| @@ -352,7 +352,7 @@ cglobal deblock_h_luma_8_%1, 5,7 | |||
| ; don't backup r6, r5, r10, r11 because deblock_v_luma_sse2 doesn't use them | |||
| lea r0, [pix_tmp+0x30] | |||
| mov r1d, 0x10 | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| mov [rsp+0x20], r4 | |||
| %endif | |||
| call deblock_v_luma_8_%1 | |||
| @@ -376,7 +376,7 @@ cglobal deblock_h_luma_8_%1, 5,7 | |||
| movq m3, [pix_tmp+0x40] | |||
| TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r10, r11) | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| add rsp, 0x98 | |||
| %else | |||
| add rsp, 0x68 | |||
| @@ -513,7 +513,7 @@ DEBLOCK_LUMA avx, v, 16 | |||
| %macro LUMA_INTRA_P012 4 ; p0..p3 in memory | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| pavgb t0, p2, p1 | |||
| pavgb t1, p0, q0 | |||
| %else | |||
| @@ -524,7 +524,7 @@ DEBLOCK_LUMA avx, v, 16 | |||
| %endif | |||
| pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2 | |||
| mova t5, t1 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| paddb t2, p2, p1 | |||
| paddb t3, p0, q0 | |||
| %else | |||
| @@ -542,7 +542,7 @@ DEBLOCK_LUMA avx, v, 16 | |||
| pand t2, mpb_1 | |||
| psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4; | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| pavgb t1, p2, q1 | |||
| psubb t2, p2, q1 | |||
| %else | |||
| @@ -617,7 +617,7 @@ DEBLOCK_LUMA avx, v, 16 | |||
| %define t1 m5 | |||
| %define t2 m6 | |||
| %define t3 m7 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define p2 m8 | |||
| %define q2 m9 | |||
| %define t4 m10 | |||
| @@ -644,7 +644,7 @@ DEBLOCK_LUMA avx, v, 16 | |||
| ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||
| ;----------------------------------------------------------------------------- | |||
| cglobal deblock_%2_luma_intra_8_%1, 4,6,16 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| sub esp, 0x60 | |||
| %endif | |||
| lea r4, [r1*4] | |||
| @@ -659,7 +659,7 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16 | |||
| mova p0, [r4+r5] | |||
| mova q0, [r0] | |||
| mova q1, [r0+r1] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| pxor mpb_0, mpb_0 | |||
| mova mpb_1, [pb_1] | |||
| LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0 | |||
| @@ -695,13 +695,13 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16 | |||
| LUMA_INTRA_SWAP_PQ | |||
| LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5] | |||
| .end: | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| add esp, 0x60 | |||
| %endif | |||
| RET | |||
| INIT_MMX | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ;----------------------------------------------------------------------------- | |||
| ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -779,7 +779,7 @@ INIT_XMM | |||
| DEBLOCK_LUMA_INTRA sse2, v | |||
| INIT_AVX | |||
| DEBLOCK_LUMA_INTRA avx , v | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| INIT_MMX | |||
| DEBLOCK_LUMA_INTRA mmxext, v8 | |||
| %endif | |||
| @@ -824,7 +824,7 @@ cglobal deblock_v_chroma_8_mmxext, 5,6 | |||
| ; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) | |||
| ;----------------------------------------------------------------------------- | |||
| cglobal deblock_h_chroma_8_mmxext, 5,7 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define buf0 [rsp-24] | |||
| %define buf1 [rsp-16] | |||
| %else | |||
| @@ -302,7 +302,7 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) | |||
| %endmacro | |||
| INIT_XMM | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2 | |||
| ; m12=alpha, m13=beta | |||
| ; out: m0=p1', m3=q1', m1=p0', m2=q0' | |||
| @@ -435,7 +435,7 @@ DEBLOCK_LUMA_64 avx | |||
| ; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0 | |||
| ; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2' | |||
| %macro LUMA_INTRA_P012 12 ; p0..p3 in memory | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| paddw t0, %3, %2 | |||
| mova t2, %4 | |||
| paddw t2, %3 | |||
| @@ -501,7 +501,7 @@ DEBLOCK_LUMA_64 avx | |||
| LOAD_AB t0, t1, r2d, r3d | |||
| mova %1, t0 | |||
| LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mova %2, t0 ; mask0 | |||
| psrlw t3, %1, 2 | |||
| %else | |||
| @@ -598,7 +598,7 @@ DEBLOCK_LUMA_64 avx | |||
| %endif | |||
| %endmacro | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| ;----------------------------------------------------------------------------- | |||
| ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -792,7 +792,7 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16) | |||
| RET | |||
| %endmacro | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| INIT_MMX | |||
| DEBLOCK_LUMA mmxext | |||
| DEBLOCK_LUMA_INTRA mmxext | |||
| @@ -907,7 +907,7 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) | |||
| %endif | |||
| %endmacro | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| INIT_MMX | |||
| DEBLOCK_CHROMA mmxext | |||
| %endif | |||
| @@ -198,14 +198,14 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0 | |||
| ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride | |||
| %macro IDCT8_ADD_SSE 4 | |||
| IDCT8_1D_FULL %2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 | |||
| %else | |||
| TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16] | |||
| %endif | |||
| paddw m0, [pw_32] | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mova [%2 ], m0 | |||
| mova [%2+16], m4 | |||
| IDCT8_1D [%2], [%2+ 16] | |||
| @@ -225,7 +225,7 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0 | |||
| STORE_DIFF m1, m6, m7, [%1+%3 ] | |||
| STORE_DIFF m2, m6, m7, [%1+%3*2] | |||
| STORE_DIFF m3, m6, m7, [%1+%4 ] | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mova m0, [%2 ] | |||
| mova m1, [%2+16] | |||
| %else | |||
| @@ -371,7 +371,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0 | |||
| test r6, r6 | |||
| jz .no_dc | |||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define dst_reg r10 | |||
| %define dst_regd r10d | |||
| %else | |||
| @@ -381,7 +381,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0 | |||
| mov dst_regd, dword [r1+r5*4] | |||
| lea dst_reg, [r0+dst_reg] | |||
| DC_ADD_MMX2_OP movh, dst_reg, r3, r6 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r1, r1m | |||
| %endif | |||
| inc r5 | |||
| @@ -448,7 +448,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0 | |||
| test r6, r6 | |||
| jz .skipblock | |||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define dst_reg r10 | |||
| %define dst_regd r10d | |||
| %else | |||
| @@ -458,7 +458,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0 | |||
| mov dst_regd, dword [r1+r5*4] | |||
| add dst_reg, r0 | |||
| DC_ADD_MMX2_OP movh, dst_reg, r3, r6 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r1, r1m | |||
| %endif | |||
| .skipblock | |||
| @@ -489,7 +489,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0 | |||
| test r6, r6 | |||
| jz .no_dc | |||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define dst_reg r10 | |||
| %define dst_regd r10d | |||
| %else | |||
| @@ -501,7 +501,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0 | |||
| DC_ADD_MMX2_OP mova, dst_reg, r3, r6 | |||
| lea dst_reg, [dst_reg+r3*4] | |||
| DC_ADD_MMX2_OP mova, dst_reg, r3, r6 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r1, r1m | |||
| %endif | |||
| add r5, 4 | |||
| @@ -550,7 +550,7 @@ cglobal h264_idct8_add4_8_sse2, 5, 7, 10 | |||
| jz .no_dc | |||
| INIT_MMX | |||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define dst_reg r10 | |||
| %define dst_regd r10d | |||
| %else | |||
| @@ -562,7 +562,7 @@ INIT_MMX | |||
| DC_ADD_MMX2_OP mova, dst_reg, r3, r6 | |||
| lea dst_reg, [dst_reg+r3*4] | |||
| DC_ADD_MMX2_OP mova, dst_reg, r3, r6 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r1, r1m | |||
| %endif | |||
| add r5, 4 | |||
| @@ -575,7 +575,7 @@ INIT_XMM | |||
| mov dst_regd, dword [r1+r5*4] | |||
| add dst_reg, r0 | |||
| IDCT8_ADD_SSE dst_reg, r2, r3, r6 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r1, r1m | |||
| %endif | |||
| .skipblock | |||
| @@ -593,7 +593,7 @@ h264_idct_add8_mmx_plane: | |||
| or r6w, word [r2] | |||
| test r6, r6 | |||
| jz .skipblock | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r0d, dword [r1+r5*4] | |||
| add r0, [r10] | |||
| %else | |||
| @@ -617,13 +617,13 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0 | |||
| %ifdef PIC | |||
| lea r11, [scan8_mem] | |||
| %endif | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r0 | |||
| %endif | |||
| call h264_idct_add8_mmx_plane | |||
| mov r5, 32 | |||
| add r2, 384 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r10, gprsize | |||
| %else | |||
| add r0mp, gprsize | |||
| @@ -637,7 +637,7 @@ h264_idct_add8_mmx2_plane | |||
| movzx r6, byte [r4+r6] | |||
| test r6, r6 | |||
| jz .try_dc | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r0d, dword [r1+r5*4] | |||
| add r0, [r10] | |||
| %else | |||
| @@ -656,7 +656,7 @@ h264_idct_add8_mmx2_plane | |||
| test r6, r6 | |||
| jz .skipblock | |||
| DC_ADD_MMX2_INIT r2, r3, r6 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r0d, dword [r1+r5*4] | |||
| add r0, [r10] | |||
| %else | |||
| @@ -677,7 +677,7 @@ h264_idct_add8_mmx2_plane | |||
| cglobal h264_idct_add8_8_mmx2, 5, 7, 0 | |||
| mov r5, 16 | |||
| add r2, 512 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r0 | |||
| %endif | |||
| %ifdef PIC | |||
| @@ -686,7 +686,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0 | |||
| call h264_idct_add8_mmx2_plane | |||
| mov r5, 32 | |||
| add r2, 384 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r10, gprsize | |||
| %else | |||
| add r0mp, gprsize | |||
| @@ -738,7 +738,7 @@ x264_add8x4_idct_sse2: | |||
| test r0, r0 | |||
| jz .cycle%1end | |||
| mov r0d, dword [r1+%1*8] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r0, r10 | |||
| %else | |||
| add r0, r0m | |||
| @@ -753,7 +753,7 @@ x264_add8x4_idct_sse2: | |||
| ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, | |||
| ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) | |||
| cglobal h264_idct_add16_8_sse2, 5, 5, 8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r0 | |||
| %endif | |||
| ; unrolling of the loop leads to an average performance gain of | |||
| @@ -773,7 +773,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8 | |||
| test r0, r0 | |||
| jz .try%1dc | |||
| mov r0d, dword [r1+%1*8] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r0, r10 | |||
| %else | |||
| add r0, r0m | |||
| @@ -785,7 +785,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8 | |||
| or r0w, word [r2+32] | |||
| jz .cycle%1end | |||
| mov r0d, dword [r1+%1*8] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r0, r10 | |||
| %else | |||
| add r0, r0m | |||
| @@ -800,7 +800,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8 | |||
| ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, | |||
| ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) | |||
| cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r0 | |||
| %endif | |||
| add16intra_sse2_cycle 0, 0xc | |||
| @@ -817,7 +817,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 | |||
| movzx r0, word [r4+%2] | |||
| test r0, r0 | |||
| jz .try%1dc | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] | |||
| add r0, [r10] | |||
| %else | |||
| @@ -831,7 +831,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 | |||
| movsx r0, word [r2 ] | |||
| or r0w, word [r2+32] | |||
| jz .cycle%1end | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] | |||
| add r0, [r10] | |||
| %else | |||
| @@ -852,12 +852,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 | |||
| ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) | |||
| cglobal h264_idct_add8_8_sse2, 5, 7, 8 | |||
| add r2, 512 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r0 | |||
| %endif | |||
| add8_sse2_cycle 0, 0x34 | |||
| add8_sse2_cycle 1, 0x3c | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r10, gprsize | |||
| %else | |||
| add r0mp, gprsize | |||
| @@ -977,11 +977,11 @@ cglobal h264_luma_dc_dequant_idct_%1, 3,4,%2 | |||
| WALSH4_1D 0,1,2,3,4 | |||
| ; shift, tmp, output, qmul | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| DECLARE_REG_TMP 0,3,1,2 | |||
| ; we can't avoid this, because r0 is the shift register (ecx) on win64 | |||
| xchg r0, t2 | |||
| %elifdef ARCH_X86_64 | |||
| %elif ARCH_X86_64 | |||
| DECLARE_REG_TMP 3,1,0,2 | |||
| %else | |||
| DECLARE_REG_TMP 1,3,0,2 | |||
| @@ -98,7 +98,7 @@ cglobal h264_idct_add_10_%1, 3,3 | |||
| INIT_XMM | |||
| IDCT_ADD_10 sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT_ADD_10 avx | |||
| %endif | |||
| @@ -128,7 +128,7 @@ add4x4_idct_%1: | |||
| INIT_XMM | |||
| ALIGN 16 | |||
| ADD4x4IDCT sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| ALIGN 16 | |||
| ADD4x4IDCT avx | |||
| @@ -168,7 +168,7 @@ cglobal h264_idct_add16_10_%1, 5,6 | |||
| INIT_XMM | |||
| IDCT_ADD16_10 sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT_ADD16_10 avx | |||
| %endif | |||
| @@ -234,7 +234,7 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7 | |||
| INIT_XMM | |||
| IDCT8_DC_ADD sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT8_DC_ADD avx | |||
| %endif | |||
| @@ -305,7 +305,7 @@ cglobal h264_idct_add16intra_10_%1,5,7,8 | |||
| INIT_XMM | |||
| IDCT_ADD16INTRA_10 sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT_ADD16INTRA_10 avx | |||
| %endif | |||
| @@ -316,7 +316,7 @@ IDCT_ADD16INTRA_10 avx | |||
| ;----------------------------------------------------------------------------- | |||
| %macro IDCT_ADD8 1 | |||
| cglobal h264_idct_add8_10_%1,5,7 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r10, r0 | |||
| %endif | |||
| add r2, 1024 | |||
| @@ -324,7 +324,7 @@ cglobal h264_idct_add8_10_%1,5,7 | |||
| ADD16_OP_INTRA %1, 16, 4+ 6*8 | |||
| ADD16_OP_INTRA %1, 18, 4+ 7*8 | |||
| add r2, 1024-128*2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r0, [r10+gprsize] | |||
| %else | |||
| mov r0, r0m | |||
| @@ -342,7 +342,7 @@ cglobal h264_idct_add8_10_%1,5,7 | |||
| INIT_XMM | |||
| IDCT_ADD8 sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT_ADD8 avx | |||
| %endif | |||
| @@ -411,7 +411,7 @@ IDCT_ADD8 avx | |||
| ; %1=int16_t *block, %2=int16_t *dstblock | |||
| %macro IDCT8_ADD_SSE_START 2 | |||
| IDCT8_1D_FULL %1 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| TRANSPOSE4x4D 0,1,2,3,8 | |||
| mova [%2 ], m0 | |||
| TRANSPOSE4x4D 4,5,6,7,8 | |||
| @@ -452,7 +452,7 @@ IDCT_ADD8 avx | |||
| %macro IDCT8_ADD 1 | |||
| cglobal h264_idct8_add_10_%1, 3,4,16 | |||
| %ifndef UNIX64 | |||
| %if UNIX64 == 0 | |||
| %assign pad 16-gprsize-(stack_offset&15) | |||
| sub rsp, pad | |||
| call h264_idct8_add1_10_%1 | |||
| @@ -467,7 +467,7 @@ h264_idct8_add1_10_%1: | |||
| sub rsp, pad | |||
| add dword [r1], 32 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| IDCT8_ADD_SSE_START r1, rsp | |||
| SWAP 1, 9 | |||
| SWAP 2, 10 | |||
| @@ -519,7 +519,7 @@ h264_idct8_add1_10_%1: | |||
| INIT_XMM | |||
| IDCT8_ADD sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT8_ADD avx | |||
| %endif | |||
| @@ -559,7 +559,7 @@ cglobal h264_idct8_add4_10_%1, 0,7,16 | |||
| INIT_XMM | |||
| IDCT8_ADD4 sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| IDCT8_ADD4 avx | |||
| %endif | |||
| @@ -348,7 +348,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 | |||
| lea r3, [r0+r2*4-1] | |||
| add r4, r2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define e_reg r11 | |||
| %else | |||
| %define e_reg r0 | |||
| @@ -369,7 +369,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 | |||
| lea r5, [r5+r6*4] | |||
| movzx e_reg, byte [r3 ] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movzx r10, byte [r4+r2 ] | |||
| sub r10, e_reg | |||
| %else | |||
| @@ -385,7 +385,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 | |||
| movzx r4, byte [e_reg+r2 ] | |||
| movzx r6, byte [r3 ] | |||
| sub r6, r4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| lea r6, [r10+r6*2] | |||
| lea r5, [r5+r6*2] | |||
| add r5, r6 | |||
| @@ -395,7 +395,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 | |||
| %endif | |||
| movzx r4, byte [e_reg ] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movzx r10, byte [r3 +r2 ] | |||
| sub r10, r4 | |||
| sub r5, r10 | |||
| @@ -409,7 +409,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 | |||
| movzx r4, byte [e_reg+r1 ] | |||
| movzx r6, byte [r3 +r2*2] | |||
| sub r6, r4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r6, r10 | |||
| %endif | |||
| lea r5, [r5+r6*8] | |||
| @@ -420,7 +420,7 @@ cglobal pred16x16_plane_%3_%1, 2, 7, %2 | |||
| lea r5, [r5+r6*4] | |||
| add r5, r6 ; sum of V coefficients | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r0, r0m | |||
| %endif | |||
| @@ -641,7 +641,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2 | |||
| lea r3, [r0 -1] | |||
| add r4, r2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define e_reg r11 | |||
| %else | |||
| %define e_reg r0 | |||
| @@ -652,7 +652,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2 | |||
| sub r5, e_reg | |||
| movzx e_reg, byte [r3 ] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movzx r10, byte [r4+r2 ] | |||
| sub r10, e_reg | |||
| sub r5, r10 | |||
| @@ -666,7 +666,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2 | |||
| movzx e_reg, byte [r3+r1 ] | |||
| movzx r6, byte [r4+r2*2 ] | |||
| sub r6, e_reg | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| add r6, r10 | |||
| %endif | |||
| lea r5, [r5+r6*4] | |||
| @@ -680,7 +680,7 @@ cglobal pred8x8_plane_%1, 2, 7, %2 | |||
| lea r5, [r5+r6*8] | |||
| sar r5, 5 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| mov r0, r0m | |||
| %endif | |||
| @@ -84,7 +84,7 @@ INIT_XMM | |||
| PRED4x4_DR sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED4x4_DR ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED4x4_DR avx | |||
| %endif | |||
| @@ -124,7 +124,7 @@ INIT_XMM | |||
| PRED4x4_VR sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED4x4_VR ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED4x4_VR avx | |||
| %endif | |||
| @@ -167,7 +167,7 @@ INIT_XMM | |||
| PRED4x4_HD sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED4x4_HD ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED4x4_HD avx | |||
| %endif | |||
| @@ -238,7 +238,7 @@ cglobal pred4x4_down_left_10_%1, 3,3 | |||
| INIT_XMM | |||
| PRED4x4_DL sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED4x4_DL avx | |||
| %endif | |||
| @@ -267,7 +267,7 @@ cglobal pred4x4_vertical_left_10_%1, 3,3 | |||
| INIT_XMM | |||
| PRED4x4_VL sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED4x4_VL avx | |||
| %endif | |||
| @@ -577,7 +577,7 @@ cglobal pred8x8l_top_dc_10_%1, 4,4,6 | |||
| INIT_XMM | |||
| PRED8x8L_TOP_DC sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_TOP_DC avx | |||
| %endif | |||
| @@ -636,7 +636,7 @@ cglobal pred8x8l_dc_10_%1, 4,6,6 | |||
| INIT_XMM | |||
| PRED8x8L_DC sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_DC avx | |||
| %endif | |||
| @@ -671,7 +671,7 @@ cglobal pred8x8l_vertical_10_%1, 4,4,6 | |||
| INIT_XMM | |||
| PRED8x8L_VERTICAL sse2 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_VERTICAL avx | |||
| %endif | |||
| @@ -728,7 +728,7 @@ INIT_XMM | |||
| PRED8x8L_HORIZONTAL sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED8x8L_HORIZONTAL ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_HORIZONTAL avx | |||
| %endif | |||
| @@ -797,7 +797,7 @@ INIT_XMM | |||
| PRED8x8L_DOWN_LEFT sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED8x8L_DOWN_LEFT ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_DOWN_LEFT avx | |||
| %endif | |||
| @@ -872,7 +872,7 @@ INIT_XMM | |||
| PRED8x8L_DOWN_RIGHT sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED8x8L_DOWN_RIGHT ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_DOWN_RIGHT avx | |||
| %endif | |||
| @@ -943,7 +943,7 @@ INIT_XMM | |||
| PRED8x8L_VERTICAL_RIGHT sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED8x8L_VERTICAL_RIGHT ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_VERTICAL_RIGHT avx | |||
| %endif | |||
| @@ -1005,7 +1005,7 @@ INIT_XMM | |||
| PRED8x8L_HORIZONTAL_UP sse2 | |||
| %define PALIGNR PALIGNR_SSSE3 | |||
| PRED8x8L_HORIZONTAL_UP ssse3 | |||
| %ifdef HAVE_AVX | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| PRED8x8L_HORIZONTAL_UP avx | |||
| %endif | |||
| @@ -111,7 +111,7 @@ INIT_XMM | |||
| %endmacro | |||
| %macro MCAxA 8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %ifnidn %1,mmxext | |||
| MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8 | |||
| %endif | |||
| @@ -122,7 +122,7 @@ MCAxA_OP %1,%2,%3,%4,%5,%6,%7,%8 | |||
| %macro MCAxA_OP 8 | |||
| cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| call stub_%2_h264_qpel%4_%3_10_%1 | |||
| mov r0, r0m | |||
| mov r1, r1m | |||
| @@ -152,7 +152,7 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8 | |||
| call stub_%2_h264_qpel%4_%3_10_%1 | |||
| lea r0, [r10+r2*%4+%4*2] | |||
| lea r1, [r11+r2*%4+%4*2] | |||
| %ifndef UNIX64 ; fall through to function | |||
| %if UNIX64 == 0 ; fall through to function | |||
| call stub_%2_h264_qpel%4_%3_10_%1 | |||
| RET | |||
| %endif | |||
| @@ -165,7 +165,7 @@ cglobal %2_h264_qpel%5_%3_10_%1, %6,%7,%8 | |||
| MCAxA %1, %2, %3, %4, i, %5,%6,%7 | |||
| cglobal %2_h264_qpel%4_%3_10_%1, %5,%6,%7 | |||
| %ifndef UNIX64 ; no prologue or epilogue for UNIX64 | |||
| %if UNIX64 == 0 ; no prologue or epilogue for UNIX64 | |||
| call stub_%2_h264_qpel%4_%3_10_%1 | |||
| RET | |||
| %endif | |||
| @@ -126,7 +126,7 @@ INIT_XMM | |||
| WEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| %macro BIWEIGHT_SETUP 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define off_regd r11d | |||
| %else | |||
| %define off_regd r3d | |||
| @@ -244,7 +244,7 @@ INIT_XMM | |||
| BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 | |||
| %macro BIWEIGHT_SSSE3_SETUP 0 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define off_regd r11d | |||
| %else | |||
| %define off_regd r3d | |||
| @@ -152,7 +152,7 @@ WEIGHT_FUNC_HALF_MM sse4 | |||
| ; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height, | |||
| ; int log2_denom, int weightd, int weights, int offset); | |||
| ;----------------------------------------------------------------------------- | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| DECLARE_REG_TMP 3 | |||
| %else | |||
| DECLARE_REG_TMP 10 | |||
| @@ -219,13 +219,13 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win | |||
| subps m5, m0, m3 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| SWAP m5, m8 | |||
| %endif | |||
| mulps m7, m2, [ps_val1] | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mulps m5, m8, [ps_val2] | |||
| %else | |||
| mulps m5, m5, [ps_val2] | |||
| @@ -235,7 +235,7 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win | |||
| mulps m5, m6, [ps_val1] | |||
| subps m7, m7, m5 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| SWAP m5, m8 | |||
| %else | |||
| subps m5, m0, m3 | |||
| @@ -376,7 +376,7 @@ DEFINE_IMDCT | |||
| INIT_XMM sse | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define SPILL SWAP | |||
| %define UNSPILL SWAP | |||
| %define SPILLED(x) m %+ x | |||
| @@ -32,7 +32,7 @@ | |||
| %define W6sh2 8867 ; W6 = 35468 = 8867<<2 | |||
| %define W7sh2 4520 ; W7 = 18081 = 4520<<2 + 1 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| SECTION_RODATA | |||
| @@ -106,7 +106,7 @@ SECTION .text | |||
| INIT_MMX | |||
| cglobal vp3_v_loop_filter_mmx2, 3, 4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r1, r1d | |||
| %endif | |||
| mov r3, r1 | |||
| @@ -123,7 +123,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4 | |||
| RET | |||
| cglobal vp3_h_loop_filter_mmx2, 3, 4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r1, r1d | |||
| %endif | |||
| lea r3, [r1*3] | |||
| @@ -510,7 +510,7 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 | |||
| %define SHIFT(x) | |||
| %define ADD(x) | |||
| VP3_1D_IDCT_SSE2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 | |||
| %else | |||
| TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%1], [%1+16] | |||
| @@ -530,7 +530,7 @@ cglobal vp3_idct_%1, 1, 1, %2 | |||
| cglobal vp3_idct_put_%1, 3, %3, %2 | |||
| VP3_IDCT_%1 r2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r3, r2 | |||
| mov r2, r1 | |||
| mov r1, r0 | |||
| @@ -540,7 +540,7 @@ cglobal vp3_idct_put_%1, 3, %3, %2 | |||
| mov r1m, r0 | |||
| mov r2m, r1 | |||
| %endif | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| call put_signed_pixels_clamped_mmx | |||
| RET | |||
| %else | |||
| @@ -549,7 +549,7 @@ cglobal vp3_idct_put_%1, 3, %3, %2 | |||
| cglobal vp3_idct_add_%1, 3, %3, %2 | |||
| VP3_IDCT_%1 r2 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| mov r3, r2 | |||
| mov r2, r1 | |||
| mov r1, r0 | |||
| @@ -559,7 +559,7 @@ cglobal vp3_idct_add_%1, 3, %3, %2 | |||
| mov r1m, r0 | |||
| mov r2m, r1 | |||
| %endif | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| call add_pixels_clamped_mmx | |||
| RET | |||
| %else | |||
| @@ -567,7 +567,7 @@ cglobal vp3_idct_add_%1, 3, %3, %2 | |||
| %endif | |||
| %endmacro | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define REGS 4 | |||
| %else | |||
| %define REGS 3 | |||
| @@ -599,7 +599,7 @@ vp3_idct_funcs sse2, 9, REGS | |||
| INIT_MMX | |||
| cglobal vp3_idct_dc_add_mmx2, 3, 4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r1, r1d | |||
| %endif | |||
| lea r3, [r1*3] | |||
| @@ -127,7 +127,7 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2 | |||
| sub rsp, 8*15 | |||
| movq m6, [pw_64] | |||
| %endif | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif | |||
| @@ -35,11 +35,13 @@ | |||
| %define program_name ff | |||
| %ifdef ARCH_X86_64 | |||
| %define UNIX64 0 | |||
| %define WIN64 0 | |||
| %if ARCH_X86_64 | |||
| %ifidn __OUTPUT_FORMAT__,win32 | |||
| %define WIN64 | |||
| %define WIN64 1 | |||
| %else | |||
| %define UNIX64 | |||
| %define UNIX64 1 | |||
| %endif | |||
| %endif | |||
| @@ -79,9 +81,9 @@ | |||
| %endif | |||
| %endmacro | |||
| %ifdef WIN64 | |||
| %if WIN64 | |||
| %define PIC | |||
| %elifndef ARCH_X86_64 | |||
| %elif !ARCH_X86_64 | |||
| ; x86_32 doesn't require PIC. | |||
| ; Some distros prefer shared objects to be PIC, but nothing breaks if | |||
| ; the code contains a few textrels, so we'll skip that complexity. | |||
| @@ -132,7 +134,7 @@ | |||
| %define r%1m %6 | |||
| %ifid %6 ; i.e. it's a register | |||
| %define r%1mp %2 | |||
| %elifdef ARCH_X86_64 ; memory | |||
| %elif ARCH_X86_64 ; memory | |||
| %define r%1mp qword %6 | |||
| %else | |||
| %define r%1mp dword %6 | |||
| @@ -149,7 +151,7 @@ | |||
| %define e%1w %1 | |||
| %define r%1b %2 | |||
| %define e%1b %2 | |||
| %ifndef ARCH_X86_64 | |||
| %if ARCH_X86_64 == 0 | |||
| %define r%1 e%1 | |||
| %endif | |||
| %endmacro | |||
| @@ -185,7 +187,7 @@ DECLARE_REG_SIZE bp, bpl | |||
| DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define gprsize 8 | |||
| %else | |||
| %define gprsize 4 | |||
| @@ -261,7 +263,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 | |||
| %assign n_arg_names %%i | |||
| %endmacro | |||
| %ifdef WIN64 ; Windows x64 ;================================================= | |||
| %if WIN64 ; Windows x64 ;================================================= | |||
| DECLARE_REG 0, rcx, ecx, cx, cl, ecx | |||
| DECLARE_REG 1, rdx, edx, dx, dl, edx | |||
| @@ -346,7 +348,7 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] | |||
| %endif | |||
| %endmacro | |||
| %elifdef ARCH_X86_64 ; *nix x64 ;============================================= | |||
| %elif ARCH_X86_64 ; *nix x64 ;============================================= | |||
| DECLARE_REG 0, rdi, edi, di, dil, edi | |||
| DECLARE_REG 1, rsi, esi, si, sil, esi | |||
| @@ -447,7 +449,7 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28] | |||
| %endif ;====================================================================== | |||
| %ifndef WIN64 | |||
| %if WIN64 == 0 | |||
| %macro WIN64_SPILL_XMM 1 | |||
| %endmacro | |||
| %macro WIN64_RESTORE_XMM 1 | |||
| @@ -617,7 +619,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits | |||
| %define RESET_MM_PERMUTATION INIT_XMM %1 | |||
| %define mmsize 16 | |||
| %define num_mmregs 8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define num_mmregs 16 | |||
| %endif | |||
| %define mova movdqa | |||
| @@ -646,7 +648,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits | |||
| %define RESET_MM_PERMUTATION INIT_YMM %1 | |||
| %define mmsize 32 | |||
| %define num_mmregs 8 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| %define num_mmregs 16 | |||
| %endif | |||
| %define mova vmovaps | |||
| @@ -95,7 +95,7 @@ | |||
| %endmacro | |||
| %macro TRANSPOSE8x8W 9-11 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| SBUTTERFLY wd, %1, %2, %9 | |||
| SBUTTERFLY wd, %3, %4, %9 | |||
| SBUTTERFLY wd, %5, %6, %9 | |||
| @@ -64,7 +64,7 @@ SECTION .text | |||
| ; split the loop in an aligned and unaligned case | |||
| %macro YUYV_TO_Y_FN 2-3 | |||
| cglobal %2ToY, 3, 3, %1, dst, src, w | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd wq, wd | |||
| %endif | |||
| add dstq, wq | |||
| @@ -134,7 +134,7 @@ cglobal %2ToY, 3, 3, %1, dst, src, w | |||
| ; split the loop in an aligned and unaligned case | |||
| %macro YUYV_TO_UV_FN 2-3 | |||
| cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd wq, dword r4m | |||
| %else ; x86-32 | |||
| mov wq, r4m | |||
| @@ -189,7 +189,7 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w | |||
| ; %2 = nv12 or nv21 | |||
| %macro NVXX_TO_UV_FN 2 | |||
| cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd wq, dword r4m | |||
| %else ; x86-32 | |||
| mov wq, r4m | |||
| @@ -215,7 +215,7 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w | |||
| %endif ; mmsize == 8/16 | |||
| %endmacro | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| INIT_MMX mmx | |||
| YUYV_TO_Y_FN 0, yuyv | |||
| YUYV_TO_Y_FN 0, uyvy | |||
| @@ -58,7 +58,7 @@ SECTION .text | |||
| %macro yuv2planeX_fn 3 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| %define cntr_reg r1 | |||
| %define movsx mov | |||
| %else | |||
| @@ -72,7 +72,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 | |||
| %endif ; %1 == 8/9/10 | |||
| %if %1 == 8 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| %assign pad 0x2c - (stack_offset & 15) | |||
| SUB rsp, pad | |||
| %define m_dith m7 | |||
| @@ -91,7 +91,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 | |||
| .no_rot: | |||
| %if mmsize == 16 | |||
| punpcklbw m_dith, m6 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| punpcklwd m8, m_dith, m6 | |||
| pslld m8, 12 | |||
| %else ; x86-32 | |||
| @@ -100,7 +100,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 | |||
| %endif ; x86-32/64 | |||
| punpckhwd m_dith, m6 | |||
| pslld m_dith, 12 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| mova [rsp+ 0], m5 | |||
| mova [rsp+16], m_dith | |||
| %endif | |||
| @@ -135,7 +135,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 | |||
| %endif ; %1 == 8 | |||
| %if %1 == 8 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| mova m2, [rsp+mmsize*(0+%%i)] | |||
| mova m1, [rsp+mmsize*(1+%%i)] | |||
| %else ; x86-64 | |||
| @@ -233,7 +233,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 | |||
| jg .pixelloop | |||
| %if %1 == 8 | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| ADD rsp, pad | |||
| RET | |||
| %else ; x86-64 | |||
| @@ -245,7 +245,7 @@ cglobal yuv2planeX_%1, %3, 7, %2 | |||
| %endmacro | |||
| %define PALIGNR PALIGNR_MMX | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| INIT_MMX mmx2 | |||
| yuv2planeX_fn 8, 0, 7 | |||
| yuv2planeX_fn 9, 0, 5 | |||
| @@ -382,7 +382,7 @@ cglobal yuv2plane1_%1, %3, %3, %2 | |||
| REP_RET | |||
| %endmacro | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| INIT_MMX mmx | |||
| yuv2plane1_fn 8, 0, 5 | |||
| yuv2plane1_fn 16, 0, 3 | |||
| @@ -51,7 +51,7 @@ SECTION .text | |||
| ; SCALE_FUNC source_width, intermediate_nbits, filtersize, filtersuffix, opt, n_args, n_xmm | |||
| %macro SCALE_FUNC 7 | |||
| cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %endif ; x86-64 | |||
| %if %2 == 19 | |||
| @@ -237,7 +237,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %else ; %4 == X || %4 == X8 | |||
| %define r6sub 0 | |||
| %endif ; %4 ==/!= X4 | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| push r12 | |||
| movsxd r6, r6d ; filterSize | |||
| lea r12, [r3+(r6-r6sub)*srcmul] ; &src[filterSize&~4] | |||
| @@ -384,7 +384,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %ifnidn %3, X | |||
| REP_RET | |||
| %else ; %3 == X | |||
| %ifdef ARCH_X86_64 | |||
| %if ARCH_X86_64 | |||
| pop r12 | |||
| RET | |||
| %else ; x86-32 | |||
| @@ -419,7 +419,7 @@ SCALE_FUNCS 10, 19, %1, %3 | |||
| SCALE_FUNCS 16, 19, %1, %4 | |||
| %endmacro | |||
| %ifdef ARCH_X86_32 | |||
| %if ARCH_X86_32 | |||
| INIT_MMX | |||
| SCALE_FUNCS2 mmx, 0, 0, 0 | |||
| %endif | |||