An assembler able to cope with AVX instructions is now required.tags/n1.1
| @@ -192,7 +192,6 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000 | |||
| INIT_YMM avx | |||
| SECTION_TEXT | |||
| %if HAVE_AVX_EXTERNAL | |||
| ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in) | |||
| cglobal dct32_float, 2,3,8, out, in, tmp | |||
| ; pass 1 | |||
| @@ -265,7 +264,6 @@ cglobal dct32_float, 2,3,8, out, in, tmp | |||
| INIT_XMM | |||
| PASS6_AND_PERMUTE | |||
| RET | |||
| %endif | |||
| %if ARCH_X86_64 | |||
| %define SPILL SWAP | |||
| @@ -1168,10 +1168,8 @@ ALIGN 16 | |||
| INIT_XMM sse | |||
| VECTOR_FMUL_REVERSE | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| VECTOR_FMUL_REVERSE | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; vector_fmul_add(float *dst, const float *src0, const float *src1, | |||
| @@ -1198,10 +1196,8 @@ ALIGN 16 | |||
| INIT_XMM sse | |||
| VECTOR_FMUL_ADD | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| VECTOR_FMUL_ADD | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_butterflies_float_interleave(float *dst, const float *src0, | |||
| @@ -1244,10 +1240,8 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len | |||
| INIT_XMM sse | |||
| BUTTERFLIES_FLOAT_INTERLEAVE | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| BUTTERFLIES_FLOAT_INTERLEAVE | |||
| %endif | |||
| INIT_XMM sse2 | |||
| ; %1 = aligned/unaligned | |||
| @@ -305,7 +305,6 @@ IF%1 mova Z(1), m5 | |||
| INIT_YMM avx | |||
| %if HAVE_AVX_EXTERNAL | |||
| align 16 | |||
| fft8_avx: | |||
| mova m0, Z(0) | |||
| @@ -394,7 +393,6 @@ fft32_interleave_avx: | |||
| sub r2d, mmsize/4 | |||
| jg .deint_loop | |||
| ret | |||
| %endif | |||
| INIT_XMM sse | |||
| @@ -539,7 +537,6 @@ DEFINE_ARGS zc, w, n, o1, o3 | |||
| INIT_YMM avx | |||
| %if HAVE_AVX_EXTERNAL | |||
| %macro INTERL_AVX 5 | |||
| vunpckhps %3, %2, %1 | |||
| vunpcklps %2, %2, %1 | |||
| @@ -561,7 +558,6 @@ cglobal fft_calc, 2,5,8 | |||
| FFT_DISPATCH _interleave %+ SUFFIX, r1 | |||
| REP_RET | |||
| %endif | |||
| INIT_XMM sse | |||
| @@ -780,11 +776,9 @@ align 8 | |||
| dispatch_tab %+ fullsuffix: pointer list_of_fft | |||
| %endmacro ; DECL_FFT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| DECL_FFT 6 | |||
| DECL_FFT 6, _interleave | |||
| %endif | |||
| INIT_XMM sse | |||
| DECL_FFT 5 | |||
| DECL_FFT 5, _interleave | |||
| @@ -1086,7 +1080,4 @@ DECL_IMDCT POSROTATESHUF_3DNOW | |||
| %endif | |||
| INIT_YMM avx | |||
| %if HAVE_AVX_EXTERNAL | |||
| DECL_IMDCT POSROTATESHUF_AVX | |||
| %endif | |||
| @@ -251,10 +251,8 @@ cglobal %1_h264_chroma_mc2_10, 6,7 | |||
| %define CHROMAMC_AVG NOTHING | |||
| INIT_XMM sse2 | |||
| CHROMA_MC8 put | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CHROMA_MC8 put | |||
| %endif | |||
| INIT_MMX mmx2 | |||
| CHROMA_MC4 put | |||
| CHROMA_MC2 put | |||
| @@ -262,10 +260,8 @@ CHROMA_MC2 put | |||
| %define CHROMAMC_AVG AVG | |||
| INIT_XMM sse2 | |||
| CHROMA_MC8 avg | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CHROMA_MC8 avg | |||
| %endif | |||
| INIT_MMX mmx2 | |||
| CHROMA_MC4 avg | |||
| CHROMA_MC2 avg | |||
| @@ -79,10 +79,8 @@ cglobal h264_idct_add_10, 3,3 | |||
| INIT_XMM sse2 | |||
| IDCT_ADD_10 | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT_ADD_10 | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) | |||
| @@ -109,11 +107,9 @@ add4x4_idct %+ SUFFIX: | |||
| INIT_XMM sse2 | |||
| ALIGN 16 | |||
| ADD4x4IDCT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| ALIGN 16 | |||
| ADD4x4IDCT | |||
| %endif | |||
| %macro ADD16_OP 2 | |||
| cmp byte [r4+%2], 0 | |||
| @@ -149,10 +145,8 @@ cglobal h264_idct_add16_10, 5,6 | |||
| INIT_XMM sse2 | |||
| IDCT_ADD16_10 | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT_ADD16_10 | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void h264_idct_dc_add(pixel *dst, dctcoef *block, int stride) | |||
| @@ -215,10 +209,8 @@ cglobal h264_idct8_dc_add_10,3,3,7 | |||
| INIT_XMM sse2 | |||
| IDCT8_DC_ADD | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT8_DC_ADD | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) | |||
| @@ -286,10 +278,8 @@ cglobal h264_idct_add16intra_10,5,7,8 | |||
| INIT_XMM sse2 | |||
| IDCT_ADD16INTRA_10 | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT_ADD16INTRA_10 | |||
| %endif | |||
| %assign last_block 36 | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -323,10 +313,8 @@ cglobal h264_idct_add8_10,5,8,7 | |||
| INIT_XMM sse2 | |||
| IDCT_ADD8 | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT_ADD8 | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void h264_idct8_add(pixel *dst, dctcoef *block, int stride) | |||
| @@ -500,10 +488,8 @@ h264_idct8_add1_10 %+ SUFFIX: | |||
| INIT_XMM sse2 | |||
| IDCT8_ADD | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT8_ADD | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8]) | |||
| @@ -540,7 +526,5 @@ cglobal h264_idct8_add4_10, 0,7,16 | |||
| INIT_XMM sse2 | |||
| IDCT8_ADD4 | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| IDCT8_ADD4 | |||
| %endif | |||
| @@ -82,10 +82,8 @@ INIT_XMM sse2 | |||
| PRED4x4_DR | |||
| INIT_XMM ssse3 | |||
| PRED4x4_DR | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED4x4_DR | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred4x4_vertical_right(pixel *src, const pixel *topright, int stride) | |||
| @@ -121,10 +119,8 @@ INIT_XMM sse2 | |||
| PRED4x4_VR | |||
| INIT_XMM ssse3 | |||
| PRED4x4_VR | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED4x4_VR | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride) | |||
| @@ -163,10 +159,8 @@ INIT_XMM sse2 | |||
| PRED4x4_HD | |||
| INIT_XMM ssse3 | |||
| PRED4x4_HD | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED4x4_HD | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred4x4_dc(pixel *src, const pixel *topright, int stride) | |||
| @@ -234,10 +228,8 @@ cglobal pred4x4_down_left_10, 3, 3 | |||
| INIT_XMM sse2 | |||
| PRED4x4_DL | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED4x4_DL | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred4x4_vertical_left(pixel *src, const pixel *topright, int stride) | |||
| @@ -263,10 +255,8 @@ cglobal pred4x4_vertical_left_10, 3, 3 | |||
| INIT_XMM sse2 | |||
| PRED4x4_VL | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED4x4_VL | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride) | |||
| @@ -573,10 +563,8 @@ cglobal pred8x8l_top_dc_10, 4, 4, 6 | |||
| INIT_XMM sse2 | |||
| PRED8x8L_TOP_DC | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_TOP_DC | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ;void pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride) | |||
| @@ -632,10 +620,8 @@ cglobal pred8x8l_dc_10, 4, 6, 6 | |||
| INIT_XMM sse2 | |||
| PRED8x8L_DC | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_DC | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred8x8l_vertical(pixel *src, int has_topleft, int has_topright, int stride) | |||
| @@ -667,10 +653,8 @@ cglobal pred8x8l_vertical_10, 4, 4, 6 | |||
| INIT_XMM sse2 | |||
| PRED8x8L_VERTICAL | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_VERTICAL | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride) | |||
| @@ -723,10 +707,8 @@ INIT_XMM sse2 | |||
| PRED8x8L_HORIZONTAL | |||
| INIT_XMM ssse3 | |||
| PRED8x8L_HORIZONTAL | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_HORIZONTAL | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ;void pred8x8l_down_left(pixel *src, int has_topleft, int has_topright, int stride) | |||
| @@ -791,10 +773,8 @@ INIT_XMM sse2 | |||
| PRED8x8L_DOWN_LEFT | |||
| INIT_XMM ssse3 | |||
| PRED8x8L_DOWN_LEFT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_DOWN_LEFT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ;void pred8x8l_down_right(pixel *src, int has_topleft, int has_topright, int stride) | |||
| @@ -865,10 +845,8 @@ INIT_XMM sse2 | |||
| PRED8x8L_DOWN_RIGHT | |||
| INIT_XMM ssse3 | |||
| PRED8x8L_DOWN_RIGHT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_DOWN_RIGHT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred8x8l_vertical_right(pixel *src, int has_topleft, int has_topright, int stride) | |||
| @@ -935,10 +913,8 @@ INIT_XMM sse2 | |||
| PRED8x8L_VERTICAL_RIGHT | |||
| INIT_XMM ssse3 | |||
| PRED8x8L_VERTICAL_RIGHT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_VERTICAL_RIGHT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void pred8x8l_horizontal_up(pixel *src, int has_topleft, int has_topright, int stride) | |||
| @@ -996,10 +972,8 @@ INIT_XMM sse2 | |||
| PRED8x8L_HORIZONTAL_UP | |||
| INIT_XMM ssse3 | |||
| PRED8x8L_HORIZONTAL_UP | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| PRED8x8L_HORIZONTAL_UP | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| @@ -155,10 +155,8 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len | |||
| INIT_XMM sse2 | |||
| CONV_S32_TO_FLT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| CONV_S32_TO_FLT | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_flt_to_s16(int16_t *dst, const float *src, int len); | |||
| @@ -228,10 +226,8 @@ cglobal conv_flt_to_s32, 3,3,6, dst, src, len | |||
| INIT_XMM sse2 | |||
| CONV_FLT_TO_S32 | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| CONV_FLT_TO_S32 | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16p_to_s16_2ch(int16_t *dst, int16_t *const *src, int len, | |||
| @@ -265,10 +261,8 @@ cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1 | |||
| INIT_XMM sse2 | |||
| CONV_S16P_TO_S16_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16P_TO_S16_2CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16p_to_s16_6ch(int16_t *dst, int16_t *const *src, int len, | |||
| @@ -388,10 +382,8 @@ INIT_XMM sse2 | |||
| CONV_S16P_TO_S16_6CH | |||
| INIT_XMM sse2slow | |||
| CONV_S16P_TO_S16_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16P_TO_S16_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16p_to_flt_2ch(float *dst, int16_t *const *src, int len, | |||
| @@ -437,10 +429,8 @@ cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1 | |||
| INIT_XMM sse2 | |||
| CONV_S16P_TO_FLT_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16P_TO_FLT_2CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16p_to_flt_6ch(float *dst, int16_t *const *src, int len, | |||
| @@ -541,10 +531,8 @@ INIT_XMM sse2 | |||
| CONV_S16P_TO_FLT_6CH | |||
| INIT_XMM ssse3 | |||
| CONV_S16P_TO_FLT_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16P_TO_FLT_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_fltp_to_s16_2ch(int16_t *dst, float *const *src, int len, | |||
| @@ -697,10 +685,8 @@ INIT_MMX sse | |||
| CONV_FLTP_TO_S16_6CH | |||
| INIT_XMM sse2 | |||
| CONV_FLTP_TO_S16_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLTP_TO_S16_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_fltp_to_flt_2ch(float *dst, float *const *src, int len, | |||
| @@ -734,10 +720,8 @@ cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1 | |||
| INIT_XMM sse | |||
| CONV_FLTP_TO_FLT_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLTP_TO_FLT_2CH | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len, | |||
| @@ -815,10 +799,8 @@ INIT_MMX mmx | |||
| CONV_FLTP_TO_FLT_6CH | |||
| INIT_XMM sse4 | |||
| CONV_FLTP_TO_FLT_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLTP_TO_FLT_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16_to_s16p_2ch(int16_t *const *dst, int16_t *src, int len, | |||
| @@ -864,10 +846,8 @@ INIT_XMM sse2 | |||
| CONV_S16_TO_S16P_2CH | |||
| INIT_XMM ssse3 | |||
| CONV_S16_TO_S16P_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16_TO_S16P_2CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16_to_s16p_6ch(int16_t *const *dst, int16_t *src, int len, | |||
| @@ -923,10 +903,8 @@ INIT_XMM sse2 | |||
| CONV_S16_TO_S16P_6CH | |||
| INIT_XMM ssse3 | |||
| CONV_S16_TO_S16P_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16_TO_S16P_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16_to_fltp_2ch(float *const *dst, int16_t *src, int len, | |||
| @@ -961,10 +939,8 @@ cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1 | |||
| INIT_XMM sse2 | |||
| CONV_S16_TO_FLTP_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16_TO_FLTP_2CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_s16_to_fltp_6ch(float *const *dst, int16_t *src, int len, | |||
| @@ -1042,10 +1018,8 @@ INIT_XMM ssse3 | |||
| CONV_S16_TO_FLTP_6CH | |||
| INIT_XMM sse4 | |||
| CONV_S16_TO_FLTP_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_S16_TO_FLTP_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_flt_to_s16p_2ch(int16_t *const *dst, float *src, int len, | |||
| @@ -1088,10 +1062,8 @@ cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1 | |||
| INIT_XMM sse2 | |||
| CONV_FLT_TO_S16P_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLT_TO_S16P_2CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_flt_to_s16p_6ch(int16_t *const *dst, float *src, int len, | |||
| @@ -1160,10 +1132,8 @@ INIT_XMM sse2 | |||
| CONV_FLT_TO_S16P_6CH | |||
| INIT_XMM ssse3 | |||
| CONV_FLT_TO_S16P_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLT_TO_S16P_6CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_flt_to_fltp_2ch(float *const *dst, float *src, int len, | |||
| @@ -1192,10 +1162,8 @@ cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1 | |||
| INIT_XMM sse | |||
| CONV_FLT_TO_FLTP_2CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLT_TO_FLTP_2CH | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_conv_flt_to_fltp_6ch(float *const *dst, float *src, int len, | |||
| @@ -1255,7 +1223,5 @@ cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5 | |||
| INIT_XMM sse2 | |||
| CONV_FLT_TO_FLTP_6CH | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| CONV_FLT_TO_FLTP_6CH | |||
| %endif | |||
| @@ -55,10 +55,8 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 | |||
| INIT_XMM sse | |||
| MIX_2_TO_1_FLTP_FLT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| MIX_2_TO_1_FLTP_FLT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_mix_2_to_1_s16p_flt(int16_t **src, float **matrix, int len, | |||
| @@ -174,10 +172,8 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 | |||
| INIT_XMM sse | |||
| MIX_1_TO_2_FLTP_FLT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| MIX_1_TO_2_FLTP_FLT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_mix_1_to_2_s16p_flt(int16_t **src, float **matrix, int len, | |||
| @@ -221,10 +217,8 @@ INIT_XMM sse2 | |||
| MIX_1_TO_2_S16P_FLT | |||
| INIT_XMM sse4 | |||
| MIX_1_TO_2_S16P_FLT | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_XMM avx | |||
| MIX_1_TO_2_S16P_FLT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_mix_3_8_to_1_2_fltp/s16p_flt(float/int16_t **src, float **matrix, | |||
| @@ -489,7 +483,6 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, s16p | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, s16p | |||
| ; do not use ymm AVX or FMA4 in x86-32 for 6 or more channels due to stack alignment issues | |||
| %if HAVE_AVX_EXTERNAL | |||
| %if ARCH_X86_64 || %%i < 6 | |||
| INIT_YMM avx | |||
| %else | |||
| @@ -500,7 +493,6 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s | |||
| INIT_XMM avx | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, s16p | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, s16p | |||
| %endif | |||
| %if HAVE_FMA4_EXTERNAL | |||
| %if ARCH_X86_64 || %%i < 6 | |||
| INIT_YMM fma4 | |||
| @@ -44,10 +44,8 @@ ALIGN 16 | |||
| INIT_XMM sse | |||
| VECTOR_FMUL | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| VECTOR_FMUL | |||
| %endif | |||
| ;------------------------------------------------------------------------------ | |||
| ; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len) | |||
| @@ -85,7 +83,5 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len | |||
| INIT_XMM sse | |||
| VECTOR_FMAC_SCALAR | |||
| %if HAVE_AVX_EXTERNAL | |||
| INIT_YMM avx | |||
| VECTOR_FMAC_SCALAR | |||
| %endif | |||