| @@ -41,8 +41,8 @@ SECTION .text | |||||
| ; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) | ; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro AC3_EXPONENT_MIN 1 | |||||
| cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset | |||||
| %macro AC3_EXPONENT_MIN 0 | |||||
| cglobal ac3_exponent_min, 3, 4, 2, exp, reuse_blks, expn, offset | |||||
| shl reuse_blksq, 8 | shl reuse_blksq, 8 | ||||
| jz .end | jz .end | ||||
| LOOP_ALIGN | LOOP_ALIGN | ||||
| @@ -65,16 +65,17 @@ cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset | |||||
| %define PMINUB PMINUB_MMX | %define PMINUB PMINUB_MMX | ||||
| %define LOOP_ALIGN | %define LOOP_ALIGN | ||||
| INIT_MMX | |||||
| AC3_EXPONENT_MIN mmx | |||||
| INIT_MMX mmx | |||||
| AC3_EXPONENT_MIN | |||||
| %if HAVE_MMXEXT | %if HAVE_MMXEXT | ||||
| %define PMINUB PMINUB_MMXEXT | %define PMINUB PMINUB_MMXEXT | ||||
| %define LOOP_ALIGN ALIGN 16 | %define LOOP_ALIGN ALIGN 16 | ||||
| AC3_EXPONENT_MIN mmxext | |||||
| INIT_MMX mmxext | |||||
| AC3_EXPONENT_MIN | |||||
| %endif | %endif | ||||
| %if HAVE_SSE2_EXTERNAL | %if HAVE_SSE2_EXTERNAL | ||||
| INIT_XMM | |||||
| AC3_EXPONENT_MIN sse2 | |||||
| INIT_XMM sse2 | |||||
| AC3_EXPONENT_MIN | |||||
| %endif | %endif | ||||
| %undef PMINUB | %undef PMINUB | ||||
| %undef LOOP_ALIGN | %undef LOOP_ALIGN | ||||
| @@ -168,8 +169,8 @@ AC3_MAX_MSB_ABS_INT16 or_abs | |||||
| ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() | ; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| %macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set | |||||
| cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift | |||||
| %macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set | |||||
| cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift | |||||
| movd m0, shiftd | movd m0, shiftd | ||||
| .loop: | .loop: | ||||
| mova m1, [srcq ] | mova m1, [srcq ] | ||||
| @@ -195,19 +196,19 @@ cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift | |||||
| ; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) | ; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| INIT_MMX | |||||
| AC3_SHIFT l, 16, psllw, mmx | |||||
| INIT_XMM | |||||
| AC3_SHIFT l, 16, psllw, sse2 | |||||
| INIT_MMX mmx | |||||
| AC3_SHIFT l, 16, psllw | |||||
| INIT_XMM sse2 | |||||
| AC3_SHIFT l, 16, psllw | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) | ; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) | ||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| INIT_MMX | |||||
| AC3_SHIFT r, 32, psrad, mmx | |||||
| INIT_XMM | |||||
| AC3_SHIFT r, 32, psrad, sse2 | |||||
| INIT_MMX mmx | |||||
| AC3_SHIFT r, 32, psrad | |||||
| INIT_XMM sse2 | |||||
| AC3_SHIFT r, 32, psrad | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) | ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) | ||||
| @@ -215,8 +216,8 @@ AC3_SHIFT r, 32, psrad, sse2 | |||||
| ; The 3DNow! version is not bit-identical because pf2id uses truncation rather | ; The 3DNow! version is not bit-identical because pf2id uses truncation rather | ||||
| ; than round-to-nearest. | ; than round-to-nearest. | ||||
| INIT_MMX | |||||
| cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len | |||||
| INIT_MMX 3dnow | |||||
| cglobal float_to_fixed24, 3, 3, 0, dst, src, len | |||||
| movq m0, [pf_1_24] | movq m0, [pf_1_24] | ||||
| .loop: | .loop: | ||||
| movq m1, [srcq ] | movq m1, [srcq ] | ||||
| @@ -242,8 +243,8 @@ cglobal float_to_fixed24_3dnow, 3,3,0, dst, src, len | |||||
| femms | femms | ||||
| RET | RET | ||||
| INIT_XMM | |||||
| cglobal float_to_fixed24_sse, 3,3,3, dst, src, len | |||||
| INIT_XMM sse | |||||
| cglobal float_to_fixed24, 3, 3, 3, dst, src, len | |||||
| movaps m0, [pf_1_24] | movaps m0, [pf_1_24] | ||||
| .loop: | .loop: | ||||
| movaps m1, [srcq ] | movaps m1, [srcq ] | ||||
| @@ -267,8 +268,8 @@ cglobal float_to_fixed24_sse, 3,3,3, dst, src, len | |||||
| emms | emms | ||||
| RET | RET | ||||
| INIT_XMM | |||||
| cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len | |||||
| INIT_XMM sse2 | |||||
| cglobal float_to_fixed24, 3, 3, 9, dst, src, len | |||||
| movaps m0, [pf_1_24] | movaps m0, [pf_1_24] | ||||
| .loop: | .loop: | ||||
| movaps m1, [srcq ] | movaps m1, [srcq ] | ||||
| @@ -332,8 +333,8 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len | |||||
| paddd %1, %2 | paddd %1, %2 | ||||
| %endmacro | %endmacro | ||||
| INIT_XMM | |||||
| cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum | |||||
| INIT_XMM sse2 | |||||
| cglobal ac3_compute_mantissa_size, 1, 2, 4, mant_cnt, sum | |||||
| movdqa m0, [mant_cntq ] | movdqa m0, [mant_cntq ] | ||||
| movdqa m1, [mant_cntq+ 1*16] | movdqa m1, [mant_cntq+ 1*16] | ||||
| paddw m0, [mant_cntq+ 2*16] | paddw m0, [mant_cntq+ 2*16] | ||||
| @@ -373,20 +374,20 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum | |||||
| ; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) | ; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) | ||||
| ;------------------------------------------------------------------------------ | ;------------------------------------------------------------------------------ | ||||
| %macro PABSD_MMX 2 ; src/dst, tmp | |||||
| %macro PABSD 1-2 ; src/dst, unused | |||||
| %if cpuflag(ssse3) | |||||
| pabsd %1, %1 | |||||
| %else ; src/dst, tmp | |||||
| pxor %2, %2 | pxor %2, %2 | ||||
| pcmpgtd %2, %1 | pcmpgtd %2, %1 | ||||
| pxor %1, %2 | pxor %1, %2 | ||||
| psubd %1, %2 | psubd %1, %2 | ||||
| %endmacro | |||||
| %macro PABSD_SSSE3 1-2 ; src/dst, unused | |||||
| pabsd %1, %1 | |||||
| %endif | |||||
| %endmacro | %endmacro | ||||
| %if HAVE_AMD3DNOW_EXTERNAL | %if HAVE_AMD3DNOW_EXTERNAL | ||||
| INIT_MMX | |||||
| cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len | |||||
| INIT_MMX 3dnow | |||||
| cglobal ac3_extract_exponents, 3, 3, 0, exp, coef, len | |||||
| add expq, lenq | add expq, lenq | ||||
| lea coefq, [coefq+4*lenq] | lea coefq, [coefq+4*lenq] | ||||
| neg lenq | neg lenq | ||||
| @@ -395,8 +396,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len | |||||
| .loop: | .loop: | ||||
| movq m0, [coefq+4*lenq ] | movq m0, [coefq+4*lenq ] | ||||
| movq m1, [coefq+4*lenq+8] | movq m1, [coefq+4*lenq+8] | ||||
| PABSD_MMX m0, m2 | |||||
| PABSD_MMX m1, m2 | |||||
| PABSD m0, m2 | |||||
| PABSD m1, m2 | |||||
| pslld m0, 1 | pslld m0, 1 | ||||
| por m0, m3 | por m0, m3 | ||||
| pi2fd m2, m0 | pi2fd m2, m0 | ||||
| @@ -420,8 +421,8 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len | |||||
| REP_RET | REP_RET | ||||
| %endif | %endif | ||||
| %macro AC3_EXTRACT_EXPONENTS 1 | |||||
| cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len | |||||
| %macro AC3_EXTRACT_EXPONENTS 0 | |||||
| cglobal ac3_extract_exponents, 3, 3, 4, exp, coef, len | |||||
| add expq, lenq | add expq, lenq | ||||
| lea coefq, [coefq+4*lenq] | lea coefq, [coefq+4*lenq] | ||||
| neg lenq | neg lenq | ||||
| @@ -453,11 +454,10 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len | |||||
| %endmacro | %endmacro | ||||
| %if HAVE_SSE2_EXTERNAL | %if HAVE_SSE2_EXTERNAL | ||||
| INIT_XMM | |||||
| %define PABSD PABSD_MMX | |||||
| AC3_EXTRACT_EXPONENTS sse2 | |||||
| %if HAVE_SSSE3_EXTERNAL | |||||
| %define PABSD PABSD_SSSE3 | |||||
| AC3_EXTRACT_EXPONENTS ssse3 | |||||
| INIT_XMM sse2 | |||||
| AC3_EXTRACT_EXPONENTS | |||||
| %endif | %endif | ||||
| %if HAVE_SSSE3_EXTERNAL | |||||
| INIT_XMM ssse3 | |||||
| AC3_EXTRACT_EXPONENTS | |||||
| %endif | %endif | ||||