Currently there is a wild mix of 3dn2/3dnow2/3dnowext. Switching to "3dnowext", which is a more common name of the CPU flag, as reported e.g. by the Linux kernel, unifies this.tags/n1.0
| @@ -2358,9 +2358,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], | |||
| } | |||
| #if HAVE_6REGS | |||
| static void vector_fmul_window_3dnow2(float *dst, const float *src0, | |||
| const float *src1, const float *win, | |||
| int len) | |||
| static void vector_fmul_window_3dnowext(float *dst, const float *src0, | |||
| const float *src1, const float *win, | |||
| int len) | |||
| { | |||
| x86_reg i = -len * 4; | |||
| x86_reg j = len * 4 - 8; | |||
| @@ -2809,11 +2809,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, | |||
| #endif | |||
| } | |||
| static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, | |||
| int mm_flags) | |||
| static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx, | |||
| int mm_flags) | |||
| { | |||
| #if HAVE_6REGS && HAVE_INLINE_ASM | |||
| c->vector_fmul_window = vector_fmul_window_3dnow2; | |||
| c->vector_fmul_window = vector_fmul_window_3dnowext; | |||
| #endif | |||
| } | |||
| @@ -3051,7 +3051,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||
| dsputil_init_3dnow(c, avctx, mm_flags); | |||
| if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) | |||
| dsputil_init_3dnow2(c, avctx, mm_flags); | |||
| dsputil_init_3dnowext(c, avctx, mm_flags); | |||
| if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) | |||
| dsputil_init_sse(c, avctx, mm_flags); | |||
| @@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s) | |||
| } | |||
| if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { | |||
| /* 3DNowEx for K7 */ | |||
| s->imdct_calc = ff_imdct_calc_3dnow2; | |||
| s->imdct_half = ff_imdct_half_3dnow2; | |||
| s->fft_calc = ff_fft_calc_3dnow2; | |||
| s->imdct_calc = ff_imdct_calc_3dnowext; | |||
| s->imdct_half = ff_imdct_half_3dnowext; | |||
| s->fft_calc = ff_fft_calc_3dnowext; | |||
| } | |||
| #endif | |||
| if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { | |||
| @@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z); | |||
| void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z); | |||
| void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); | |||
| @@ -93,14 +93,14 @@ cextern cos_ %+ i | |||
| SECTION_TEXT | |||
| %macro T2_3DN 4 ; z0, z1, mem0, mem1 | |||
| %macro T2_3DNOW 4 ; z0, z1, mem0, mem1 | |||
| mova %1, %3 | |||
| mova %2, %1 | |||
| pfadd %1, %4 | |||
| pfsub %2, %4 | |||
| %endmacro | |||
| %macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1 | |||
| %macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1 | |||
| mova %5, %3 | |||
| pfsub %3, %4 | |||
| pfadd %5, %4 ; {t6,t5} | |||
| @@ -444,13 +444,13 @@ fft16_sse: | |||
| ret | |||
| %macro FFT48_3DN 0 | |||
| %macro FFT48_3DNOW 0 | |||
| align 16 | |||
| fft4 %+ SUFFIX: | |||
| T2_3DN m0, m1, Z(0), Z(1) | |||
| T2_3DNOW m0, m1, Z(0), Z(1) | |||
| mova m2, Z(2) | |||
| mova m3, Z(3) | |||
| T4_3DN m0, m1, m2, m3, m4, m5 | |||
| T4_3DNOW m0, m1, m2, m3, m4, m5 | |||
| PUNPCK m0, m1, m4 | |||
| PUNPCK m2, m3, m5 | |||
| mova Z(0), m0 | |||
| @@ -461,14 +461,14 @@ fft4 %+ SUFFIX: | |||
| align 16 | |||
| fft8 %+ SUFFIX: | |||
| T2_3DN m0, m1, Z(0), Z(1) | |||
| T2_3DNOW m0, m1, Z(0), Z(1) | |||
| mova m2, Z(2) | |||
| mova m3, Z(3) | |||
| T4_3DN m0, m1, m2, m3, m4, m5 | |||
| T4_3DNOW m0, m1, m2, m3, m4, m5 | |||
| mova Z(0), m0 | |||
| mova Z(2), m2 | |||
| T2_3DN m4, m5, Z(4), Z(5) | |||
| T2_3DN m6, m7, Z2(6), Z2(7) | |||
| T2_3DNOW m4, m5, Z(4), Z(5) | |||
| T2_3DNOW m6, m7, Z2(6), Z2(7) | |||
| PSWAPD m0, m5 | |||
| PSWAPD m2, m7 | |||
| pxor m0, [ps_m1p1] | |||
| @@ -477,12 +477,12 @@ fft8 %+ SUFFIX: | |||
| pfadd m7, m2 | |||
| pfmul m5, [ps_root2] | |||
| pfmul m7, [ps_root2] | |||
| T4_3DN m1, m3, m5, m7, m0, m2 | |||
| T4_3DNOW m1, m3, m5, m7, m0, m2 | |||
| mova Z(5), m5 | |||
| mova Z2(7), m7 | |||
| mova m0, Z(0) | |||
| mova m2, Z(2) | |||
| T4_3DN m0, m2, m4, m6, m5, m7 | |||
| T4_3DNOW m0, m2, m4, m6, m5, m7 | |||
| PUNPCK m0, m1, m5 | |||
| PUNPCK m2, m3, m7 | |||
| mova Z(0), m0 | |||
| @@ -500,7 +500,7 @@ fft8 %+ SUFFIX: | |||
| %if ARCH_X86_32 | |||
| %macro PSWAPD 2 | |||
| %if cpuflag(3dnow2) | |||
| %if cpuflag(3dnowext) | |||
| pswapd %1, %2 | |||
| %elifidn %1, %2 | |||
| movd [r0+12], %1 | |||
| @@ -512,11 +512,11 @@ fft8 %+ SUFFIX: | |||
| %endif | |||
| %endmacro | |||
| INIT_MMX 3dnow2 | |||
| FFT48_3DN | |||
| INIT_MMX 3dnowext | |||
| FFT48_3DNOW | |||
| INIT_MMX 3dnow | |||
| FFT48_3DN | |||
| FFT48_3DNOW | |||
| %endif | |||
| %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] | |||
| @@ -633,7 +633,7 @@ cglobal fft_calc, 2,5,8 | |||
| %if ARCH_X86_32 | |||
| INIT_MMX 3dnow | |||
| FFT_CALC_FUNC | |||
| INIT_MMX 3dnow2 | |||
| INIT_MMX 3dnowext | |||
| FFT_CALC_FUNC | |||
| %endif | |||
| INIT_XMM sse | |||
| @@ -727,7 +727,7 @@ cglobal imdct_calc, 3,5,3 | |||
| %if ARCH_X86_32 | |||
| INIT_MMX 3dnow | |||
| IMDCT_CALC_FUNC | |||
| INIT_MMX 3dnow2 | |||
| INIT_MMX 3dnowext | |||
| IMDCT_CALC_FUNC | |||
| %endif | |||
| @@ -743,8 +743,8 @@ INIT_MMX 3dnow | |||
| %define unpckhps punpckhdq | |||
| DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] | |||
| DECL_PASS pass_interleave_3dnow, PASS_BIG 0 | |||
| %define pass_3dnow2 pass_3dnow | |||
| %define pass_interleave_3dnow2 pass_interleave_3dnow | |||
| %define pass_3dnowext pass_3dnow | |||
| %define pass_interleave_3dnowext pass_interleave_3dnow | |||
| %endif | |||
| %ifdef PIC | |||
| @@ -813,7 +813,7 @@ DECL_FFT 5, _interleave | |||
| INIT_MMX 3dnow | |||
| DECL_FFT 4 | |||
| DECL_FFT 4, _interleave | |||
| INIT_MMX 3dnow2 | |||
| INIT_MMX 3dnowext | |||
| DECL_FFT 4 | |||
| DECL_FFT 4, _interleave | |||
| %endif | |||
| @@ -845,7 +845,7 @@ INIT_XMM sse | |||
| PSWAPD m5, m3 | |||
| pfmul m2, m3 | |||
| pfmul m6, m5 | |||
| %if cpuflag(3dnow2) | |||
| %if cpuflag(3dnowext) | |||
| pfpnacc m0, m4 | |||
| pfpnacc m2, m6 | |||
| %else | |||
| @@ -1018,7 +1018,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i | |||
| xor r4, r4 | |||
| sub r4, r3 | |||
| %endif | |||
| %if notcpuflag(3dnow2) && mmsize == 8 | |||
| %if notcpuflag(3dnowext) && mmsize == 8 | |||
| movd m7, [ps_m1m1m1m1] | |||
| %endif | |||
| .pre: | |||
| @@ -1102,7 +1102,7 @@ DECL_IMDCT POSROTATESHUF | |||
| INIT_MMX 3dnow | |||
| DECL_IMDCT POSROTATESHUF_3DNOW | |||
| INIT_MMX 3dnow2 | |||
| INIT_MMX 3dnowext | |||
| DECL_IMDCT POSROTATESHUF_3DNOW | |||
| %endif | |||
| @@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 | |||
| %macro PSWAPD_SSE 2 | |||
| pshufw %1, %2, 0x4e | |||
| %endmacro | |||
| %macro PSWAPD_3DN1 2 | |||
| %macro PSWAPD_3DNOW 2 | |||
| movq %1, %2 | |||
| psrlq %1, 32 | |||
| punpckldq %1, %2 | |||
| @@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, | |||
| %define pswapd PSWAPD_SSE | |||
| FLOAT_TO_INT16_INTERLEAVE6 sse | |||
| %define cvtps2pi pf2id | |||
| %define pswapd PSWAPD_3DN1 | |||
| %define pswapd PSWAPD_3DNOW | |||
| FLOAT_TO_INT16_INTERLEAVE6 3dnow | |||
| %undef pswapd | |||
| FLOAT_TO_INT16_INTERLEAVE6 3dn2 | |||
| FLOAT_TO_INT16_INTERLEAVE6 3dnowext | |||
| %undef cvtps2pi | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l | |||
| void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | |||
| void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | |||
| void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | |||
| void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); | |||
| #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | |||
| @@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow) | |||
| FLOAT_TO_INT16_INTERLEAVE(sse) | |||
| FLOAT_TO_INT16_INTERLEAVE(sse2) | |||
| static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ | |||
| static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, | |||
| long len, int channels) | |||
| { | |||
| if(channels==6) | |||
| ff_float_to_int16_interleave6_3dn2(dst, src, len); | |||
| ff_float_to_int16_interleave6_3dnowext(dst, src, len); | |||
| else | |||
| float_to_int16_interleave_3dnow(dst, src, len, channels); | |||
| } | |||
| @@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) | |||
| } | |||
| if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { | |||
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | |||
| c->float_to_int16_interleave = float_to_int16_interleave_3dn2; | |||
| c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; | |||
| } | |||
| } | |||
| if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { | |||
| @@ -557,7 +557,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits | |||
| %assign cpuflags_mmx (1<<0) | |||
| %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx | |||
| %assign cpuflags_3dnow (1<<2) | cpuflags_mmx | |||
| %assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow | |||
| %assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow | |||
| %assign cpuflags_sse (1<<4) | cpuflags_mmx2 | |||
| %assign cpuflags_sse2 (1<<5) | cpuflags_sse | |||
| %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 | |||