This separates code relying on inline from that relying on external assembly and fixes instances where the coalesced check was incorrect.tags/n1.0
@@ -20,6 +20,7 @@ | |||||
*/ | */ | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
#include "libavcodec/ac3dsp.h" | #include "libavcodec/ac3dsp.h" | ||||
@@ -50,29 +51,28 @@ extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_c | |||||
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmx; | c->ac3_exponent_min = ff_ac3_exponent_min_mmx; | ||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; | c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; | ||||
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; | c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; | ||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; | c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { | |||||
if (EXTERNAL_AMD3DNOW(mm_flags)) { | |||||
c->extract_exponents = ff_ac3_extract_exponents_3dnow; | c->extract_exponents = ff_ac3_extract_exponents_3dnow; | ||||
if (!bit_exact) { | if (!bit_exact) { | ||||
c->float_to_fixed24 = ff_float_to_fixed24_3dnow; | c->float_to_fixed24 = ff_float_to_fixed24_3dnow; | ||||
} | } | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; | ||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; | c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
c->float_to_fixed24 = ff_float_to_fixed24_sse; | c->float_to_fixed24 = ff_float_to_fixed24_sse; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->ac3_exponent_min = ff_ac3_exponent_min_sse2; | c->ac3_exponent_min = ff_ac3_exponent_min_sse2; | ||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; | c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; | ||||
c->float_to_fixed24 = ff_float_to_fixed24_sse2; | c->float_to_fixed24 = ff_float_to_fixed24_sse2; | ||||
@@ -83,11 +83,10 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||||
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | ||||
} | } | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) { | |||||
if (EXTERNAL_SSSE3(mm_flags)) { | |||||
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | ||||
if (!(mm_flags & AV_CPU_FLAG_ATOM)) { | if (!(mm_flags & AV_CPU_FLAG_ATOM)) { | ||||
c->extract_exponents = ff_ac3_extract_exponents_ssse3; | c->extract_exponents = ff_ac3_extract_exponents_ssse3; | ||||
} | } | ||||
} | } | ||||
#endif | |||||
} | } |
@@ -24,6 +24,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/mpegvideo.h" | #include "libavcodec/mpegvideo.h" | ||||
#include "libavcodec/mathops.h" | #include "libavcodec/mathops.h" | ||||
@@ -1180,17 +1181,16 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
} | } | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
#if HAVE_YASM | |||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; | c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx; | ||||
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; | c->hadamard8_diff[1] = ff_hadamard8_diff_mmx; | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; | c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx2; | ||||
c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; | c->hadamard8_diff[1] = ff_hadamard8_diff_mmx2; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2){ | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->sse[0] = ff_sse16_sse2; | c->sse[0] = ff_sse16_sse2; | ||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
@@ -1199,14 +1199,11 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
#endif | #endif | ||||
} | } | ||||
#if HAVE_SSSE3 && HAVE_ALIGNED_STACK | |||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
if (EXTERNAL_SSSE3(mm_flags) && HAVE_ALIGNED_STACK) { | |||||
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; | c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3; | ||||
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; | c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3; | ||||
} | } | ||||
#endif | |||||
} | } | ||||
#endif /* HAVE_YASM */ | |||||
ff_dsputil_init_pix_mmx(c, avctx); | ff_dsputil_init_pix_mmx(c, avctx); | ||||
} | } |
@@ -17,29 +17,29 @@ | |||||
*/ | */ | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/dct.h" | #include "libavcodec/dct.h" | ||||
#include "fft.h" | #include "fft.h" | ||||
av_cold void ff_fft_init_mmx(FFTContext *s) | av_cold void ff_fft_init_mmx(FFTContext *s) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int has_vectors = av_get_cpu_flags(); | int has_vectors = av_get_cpu_flags(); | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { | |||||
if (EXTERNAL_AMD3DNOW(has_vectors)) { | |||||
/* 3DNow! for K6-2/3 */ | /* 3DNow! for K6-2/3 */ | ||||
s->imdct_calc = ff_imdct_calc_3dnow; | s->imdct_calc = ff_imdct_calc_3dnow; | ||||
s->imdct_half = ff_imdct_half_3dnow; | s->imdct_half = ff_imdct_half_3dnow; | ||||
s->fft_calc = ff_fft_calc_3dnow; | s->fft_calc = ff_fft_calc_3dnow; | ||||
} | } | ||||
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { | |||||
if (EXTERNAL_AMD3DNOWEXT(has_vectors)) { | |||||
/* 3DNowEx for K7 */ | /* 3DNowEx for K7 */ | ||||
s->imdct_calc = ff_imdct_calc_3dnowext; | s->imdct_calc = ff_imdct_calc_3dnowext; | ||||
s->imdct_half = ff_imdct_half_3dnowext; | s->imdct_half = ff_imdct_half_3dnowext; | ||||
s->fft_calc = ff_fft_calc_3dnowext; | s->fft_calc = ff_fft_calc_3dnowext; | ||||
} | } | ||||
#endif | #endif | ||||
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { | |||||
if (EXTERNAL_SSE(has_vectors)) { | |||||
/* SSE for P3/P4/K8 */ | /* SSE for P3/P4/K8 */ | ||||
s->imdct_calc = ff_imdct_calc_sse; | s->imdct_calc = ff_imdct_calc_sse; | ||||
s->imdct_half = ff_imdct_half_sse; | s->imdct_half = ff_imdct_half_sse; | ||||
@@ -47,26 +47,23 @@ av_cold void ff_fft_init_mmx(FFTContext *s) | |||||
s->fft_calc = ff_fft_calc_sse; | s->fft_calc = ff_fft_calc_sse; | ||||
s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; | s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; | ||||
} | } | ||||
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) { | |||||
if (EXTERNAL_AVX(has_vectors) && s->nbits >= 5) { | |||||
/* AVX for SB */ | /* AVX for SB */ | ||||
s->imdct_half = ff_imdct_half_avx; | s->imdct_half = ff_imdct_half_avx; | ||||
s->fft_calc = ff_fft_calc_avx; | s->fft_calc = ff_fft_calc_avx; | ||||
s->fft_permutation = FF_FFT_PERM_AVX; | s->fft_permutation = FF_FFT_PERM_AVX; | ||||
} | } | ||||
#endif | |||||
} | } | ||||
#if CONFIG_DCT | #if CONFIG_DCT | ||||
av_cold void ff_dct_init_mmx(DCTContext *s) | av_cold void ff_dct_init_mmx(DCTContext *s) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int has_vectors = av_get_cpu_flags(); | int has_vectors = av_get_cpu_flags(); | ||||
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) | |||||
if (EXTERNAL_SSE(has_vectors)) | |||||
s->dct32 = ff_dct32_float_sse; | s->dct32 = ff_dct32_float_sse; | ||||
if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE) | |||||
if (EXTERNAL_SSE2(has_vectors)) | |||||
s->dct32 = ff_dct32_float_sse2; | s->dct32 = ff_dct32_float_sse2; | ||||
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX) | |||||
if (EXTERNAL_AVX(has_vectors)) | |||||
s->dct32 = ff_dct32_float_avx; | s->dct32 = ff_dct32_float_avx; | ||||
#endif | |||||
} | } | ||||
#endif | #endif |
@@ -24,6 +24,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/fmtconvert.h" | #include "libavcodec/fmtconvert.h" | ||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
@@ -117,27 +118,27 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) | |||||
#if HAVE_YASM | #if HAVE_YASM | ||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
c->float_interleave = float_interleave_mmx; | c->float_interleave = float_interleave_mmx; | ||||
if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) { | |||||
if (EXTERNAL_AMD3DNOW(mm_flags)) { | |||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
c->float_to_int16 = ff_float_to_int16_3dnow; | c->float_to_int16 = ff_float_to_int16_3dnow; | ||||
c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | ||||
} | } | ||||
} | } | ||||
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { | |||||
if (EXTERNAL_AMD3DNOWEXT(mm_flags)) { | |||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; | c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; | ||||
} | } | ||||
} | } | ||||
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse; | ||||
c->float_to_int16 = ff_float_to_int16_sse; | c->float_to_int16 = ff_float_to_int16_sse; | ||||
c->float_to_int16_interleave = float_to_int16_interleave_sse; | c->float_to_int16_interleave = float_to_int16_interleave_sse; | ||||
c->float_interleave = float_interleave_sse; | c->float_interleave = float_interleave_sse; | ||||
} | } | ||||
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; | c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2; | ||||
c->float_to_int16 = ff_float_to_int16_sse2; | c->float_to_int16 = ff_float_to_int16_sse2; | ||||
c->float_to_int16_interleave = float_to_int16_interleave_sse2; | c->float_to_int16_interleave = float_to_int16_interleave_sse2; | ||||
@@ -19,6 +19,7 @@ | |||||
*/ | */ | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/h264pred.h" | #include "libavcodec/h264pred.h" | ||||
#define PRED4x4(TYPE, DEPTH, OPT) \ | #define PRED4x4(TYPE, DEPTH, OPT) \ | ||||
@@ -169,11 +170,10 @@ void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int s | |||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) | void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (bit_depth == 8) { | if (bit_depth == 8) { | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; | h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; | ||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; | ||||
if (chroma_format_idc == 1) { | if (chroma_format_idc == 1) { | ||||
@@ -198,7 +198,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
} | } | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx2; | ||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; | h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx2; | ||||
if (chroma_format_idc == 1) | if (chroma_format_idc == 1) | ||||
@@ -250,11 +250,11 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
} | } | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE) { | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; | h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; | h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; | ||||
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; | h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; | ||||
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2; | h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2; | ||||
@@ -277,7 +277,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
} | } | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
if (EXTERNAL_SSSE3(mm_flags)) { | |||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; | ||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; | h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; | ||||
if (chroma_format_idc == 1) | if (chroma_format_idc == 1) | ||||
@@ -308,7 +308,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
} | } | ||||
} | } | ||||
} else if (bit_depth == 10) { | } else if (bit_depth == 10) { | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; | h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; | ||||
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; | h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; | ||||
@@ -324,7 +324,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext; | h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_mmxext; | ||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_mmxext; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; | h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; | ||||
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; | h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; | ||||
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; | h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; | ||||
@@ -356,7 +356,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2; | h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_10_sse2; | ||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_10_sse2; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
if (EXTERNAL_SSSE3(mm_flags)) { | |||||
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; | h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; | ||||
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; | h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; | ||||
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; | h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; | ||||
@@ -367,8 +367,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3; | h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_ssse3; | ||||
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3; | h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_ssse3; | ||||
} | } | ||||
#if HAVE_AVX | |||||
if (mm_flags & AV_CPU_FLAG_AVX) { | |||||
if (EXTERNAL_AVX(mm_flags)) { | |||||
h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; | h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; | ||||
h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; | h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; | ||||
h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx; | h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_avx; | ||||
@@ -384,7 +383,5 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx; | h->pred8x8l[VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_10_avx; | ||||
h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx; | h->pred8x8l[HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_10_avx; | ||||
} | } | ||||
#endif /* HAVE_AVX */ | |||||
} | } | ||||
#endif /* HAVE_YASM */ | |||||
} | } |
@@ -20,6 +20,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/h264dsp.h" | #include "libavcodec/h264dsp.h" | ||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
@@ -209,14 +210,13 @@ H264_BIWEIGHT_10_SSE(4, 10) | |||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | ||||
const int chroma_format_idc) | const int chroma_format_idc) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT) | |||||
if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags)) | |||||
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | ||||
if (bit_depth == 8) { | if (bit_depth == 8) { | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
c->h264_idct_dc_add = | c->h264_idct_dc_add = | ||||
c->h264_idct_add = ff_h264_idct_add_8_mmx; | c->h264_idct_add = ff_h264_idct_add_8_mmx; | ||||
c->h264_idct8_dc_add = | c->h264_idct8_dc_add = | ||||
@@ -230,7 +230,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
if (mm_flags & AV_CPU_FLAG_CMOV) | if (mm_flags & AV_CPU_FLAG_CMOV) | ||||
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; | c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; | ||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | ||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | ||||
@@ -259,7 +259,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2; | c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2; | ||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2; | c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2; | ||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; | c->h264_idct8_add = ff_h264_idct8_add_8_sse2; | ||||
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; | c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; | ||||
@@ -282,23 +282,21 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; | ||||
#endif /* HAVE_ALIGNED_STACK */ | #endif /* HAVE_ALIGNED_STACK */ | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
if (EXTERNAL_SSSE3(mm_flags)) { | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; | c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; | ||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; | c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_AVX) { | |||||
#if HAVE_ALIGNED_STACK | |||||
if (EXTERNAL_AVX(mm_flags) && HAVE_ALIGNED_STACK) { | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; | c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; | ||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; | c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; | ||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; | c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; | ||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; | ||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
} | } | ||||
} | } | ||||
} | } | ||||
} else if (bit_depth == 10) { | } else if (bit_depth == 10) { | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; | c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; | ||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; | c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; | ||||
@@ -308,7 +306,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; | ||||
#endif /* ARCH_X86_32 */ | #endif /* ARCH_X86_32 */ | ||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2; | c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2; | ||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->h264_idct_add = ff_h264_idct_add_10_sse2; | c->h264_idct_add = ff_h264_idct_add_10_sse2; | ||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; | ||||
@@ -338,7 +336,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; | ||||
#endif /* HAVE_ALIGNED_STACK */ | #endif /* HAVE_ALIGNED_STACK */ | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE4) { | |||||
if (EXTERNAL_SSE4(mm_flags)) { | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; | c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; | ||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; | c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; | ||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; | c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; | ||||
@@ -347,8 +345,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; | c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; | ||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; | c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; | ||||
} | } | ||||
#if HAVE_AVX_EXTERNAL | |||||
if (mm_flags & AV_CPU_FLAG_AVX) { | |||||
if (EXTERNAL_AVX(mm_flags)) { | |||||
c->h264_idct_dc_add = | c->h264_idct_dc_add = | ||||
c->h264_idct_add = ff_h264_idct_add_10_avx; | c->h264_idct_add = ff_h264_idct_add_10_avx; | ||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; | ||||
@@ -371,9 +368,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; | c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; | ||||
#endif /* HAVE_ALIGNED_STACK */ | #endif /* HAVE_ALIGNED_STACK */ | ||||
} | } | ||||
#endif /* HAVE_AVX_EXTERNAL */ | |||||
} | } | ||||
} | } | ||||
} | } | ||||
#endif /* HAVE_YASM */ | |||||
} | } |
@@ -21,6 +21,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/mpegaudiodsp.h" | #include "libavcodec/mpegaudiodsp.h" | ||||
@@ -247,18 +248,16 @@ void ff_mpadsp_init_mmx(MPADSPContext *s) | |||||
#endif /* HAVE_SSE2_INLINE */ | #endif /* HAVE_SSE2_INLINE */ | ||||
#if HAVE_YASM | #if HAVE_YASM | ||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { | |||||
if (EXTERNAL_AVX(mm_flags)) { | |||||
s->imdct36_blocks_float = imdct36_blocks_avx; | s->imdct36_blocks_float = imdct36_blocks_avx; | ||||
#if HAVE_SSE | |||||
} else if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
} else if (EXTERNAL_SSSE3(mm_flags)) { | |||||
s->imdct36_blocks_float = imdct36_blocks_ssse3; | s->imdct36_blocks_float = imdct36_blocks_ssse3; | ||||
} else if (mm_flags & AV_CPU_FLAG_SSE3) { | |||||
} else if (EXTERNAL_SSE3(mm_flags)) { | |||||
s->imdct36_blocks_float = imdct36_blocks_sse3; | s->imdct36_blocks_float = imdct36_blocks_sse3; | ||||
} else if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
} else if (EXTERNAL_SSE2(mm_flags)) { | |||||
s->imdct36_blocks_float = imdct36_blocks_sse2; | s->imdct36_blocks_float = imdct36_blocks_sse2; | ||||
} else if (mm_flags & AV_CPU_FLAG_SSE) { | |||||
} else if (EXTERNAL_SSE(mm_flags)) { | |||||
s->imdct36_blocks_float = imdct36_blocks_sse; | s->imdct36_blocks_float = imdct36_blocks_sse; | ||||
#endif /* HAVE_SSE */ | |||||
} | } | ||||
#endif /* HAVE_YASM */ | #endif /* HAVE_YASM */ | ||||
} | } |
@@ -21,6 +21,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/mpegvideo.h" | #include "libavcodec/mpegvideo.h" | ||||
@@ -86,19 +87,19 @@ void ff_MPV_encode_init_x86(MpegEncContext *s) | |||||
if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { | if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { | ||||
#if HAVE_MMX_INLINE | #if HAVE_MMX_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) | |||||
if (INLINE_MMX(mm_flags)) | |||||
s->dct_quantize = dct_quantize_MMX; | s->dct_quantize = dct_quantize_MMX; | ||||
#endif | #endif | ||||
#if HAVE_MMXEXT_INLINE | #if HAVE_MMXEXT_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) | |||||
if (INLINE_MMXEXT(mm_flags)) | |||||
s->dct_quantize = dct_quantize_MMX2; | s->dct_quantize = dct_quantize_MMX2; | ||||
#endif | #endif | ||||
#if HAVE_SSE2_INLINE | #if HAVE_SSE2_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE2) | |||||
if (INLINE_SSE2(mm_flags)) | |||||
s->dct_quantize = dct_quantize_SSE2; | s->dct_quantize = dct_quantize_SSE2; | ||||
#endif | #endif | ||||
#if HAVE_SSSE3_INLINE | #if HAVE_SSSE3_INLINE | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3) | |||||
if (INLINE_SSSE3(mm_flags)) | |||||
s->dct_quantize = dct_quantize_SSSE3; | s->dct_quantize = dct_quantize_SSSE3; | ||||
#endif | #endif | ||||
} | } | ||||
@@ -20,7 +20,7 @@ | |||||
*/ | */ | ||||
#include "libavutil/common.h" | #include "libavutil/common.h" | ||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/pngdsp.h" | #include "libavcodec/pngdsp.h" | ||||
void ff_add_png_paeth_prediction_mmx2 (uint8_t *dst, uint8_t *src, | void ff_add_png_paeth_prediction_mmx2 (uint8_t *dst, uint8_t *src, | ||||
@@ -34,18 +34,16 @@ void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1, | |||||
void ff_pngdsp_init_x86(PNGDSPContext *dsp) | void ff_pngdsp_init_x86(PNGDSPContext *dsp) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int flags = av_get_cpu_flags(); | int flags = av_get_cpu_flags(); | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
if (flags & AV_CPU_FLAG_MMX) | |||||
if (EXTERNAL_MMX(flags)) | |||||
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; | dsp->add_bytes_l2 = ff_add_bytes_l2_mmx; | ||||
#endif | #endif | ||||
if (flags & AV_CPU_FLAG_MMXEXT) | |||||
if (EXTERNAL_MMXEXT(flags)) | |||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; | dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2; | ||||
if (flags & AV_CPU_FLAG_SSE2) | |||||
if (EXTERNAL_SSE2(flags)) | |||||
dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; | dsp->add_bytes_l2 = ff_add_bytes_l2_sse2; | ||||
if (flags & AV_CPU_FLAG_SSSE3) | |||||
if (EXTERNAL_SSSE3(flags)) | |||||
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3; | dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3; | ||||
#endif | |||||
} | } |
@@ -20,6 +20,7 @@ | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
*/ | */ | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/proresdsp.h" | #include "libavcodec/proresdsp.h" | ||||
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, | void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, | ||||
@@ -31,24 +32,22 @@ void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize, | |||||
void ff_proresdsp_x86_init(ProresDSPContext *dsp) | void ff_proresdsp_x86_init(ProresDSPContext *dsp) | ||||
{ | { | ||||
#if ARCH_X86_64 && HAVE_YASM | |||||
#if ARCH_X86_64 | |||||
int flags = av_get_cpu_flags(); | int flags = av_get_cpu_flags(); | ||||
if (flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(flags)) { | |||||
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | ||||
dsp->idct_put = ff_prores_idct_put_10_sse2; | dsp->idct_put = ff_prores_idct_put_10_sse2; | ||||
} | } | ||||
if (flags & AV_CPU_FLAG_SSE4) { | |||||
if (EXTERNAL_SSE4(flags)) { | |||||
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | ||||
dsp->idct_put = ff_prores_idct_put_10_sse4; | dsp->idct_put = ff_prores_idct_put_10_sse4; | ||||
} | } | ||||
#if HAVE_AVX | |||||
if (flags & AV_CPU_FLAG_AVX) { | |||||
if (EXTERNAL_AVX(flags)) { | |||||
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; | ||||
dsp->idct_put = ff_prores_idct_put_10_avx; | dsp->idct_put = ff_prores_idct_put_10_avx; | ||||
} | } | ||||
#endif /* HAVE_AVX */ | |||||
#endif /* ARCH_X86_64 && HAVE_YASM */ | |||||
#endif /* ARCH_X86_64 */ | |||||
} | } |
@@ -21,6 +21,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/rv34dsp.h" | #include "libavcodec/rv34dsp.h" | ||||
@@ -32,16 +33,14 @@ void ff_rv34_idct_add_mmx2(uint8_t *dst, ptrdiff_t stride, DCTELEM *block); | |||||
av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) | av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_MMX) | |||||
if (EXTERNAL_MMX(mm_flags)) | |||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; | c->rv34_idct_dc_add = ff_rv34_idct_dc_add_mmx; | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; | c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmx2; | ||||
c->rv34_idct_add = ff_rv34_idct_add_mmx2; | c->rv34_idct_add = ff_rv34_idct_add_mmx2; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE4) | |||||
if (EXTERNAL_SSE4(mm_flags)) | |||||
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4; | c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4; | ||||
#endif /* HAVE_YASM */ | |||||
} | } |
@@ -28,6 +28,7 @@ | |||||
#include "libavcodec/rv34dsp.h" | #include "libavcodec/rv34dsp.h" | ||||
#include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
#if HAVE_YASM | #if HAVE_YASM | ||||
@@ -191,7 +192,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) | |||||
#if HAVE_YASM | #if HAVE_YASM | ||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; | c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx; | ||||
c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; | c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx; | ||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
@@ -204,7 +205,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) | |||||
QPEL_MC_SET(put_, _mmx) | QPEL_MC_SET(put_, _mmx) | ||||
#endif | #endif | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(mm_flags)) { | |||||
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; | c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2; | ||||
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; | c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2; | ||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; | c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmx2; | ||||
@@ -214,14 +215,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) | |||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
QPEL_MC_SET(avg_, _mmx2) | QPEL_MC_SET(avg_, _mmx2) | ||||
#endif | #endif | ||||
} else if (mm_flags & AV_CPU_FLAG_3DNOW) { | |||||
} else if (EXTERNAL_AMD3DNOW(mm_flags)) { | |||||
c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; | c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow; | ||||
c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; | c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow; | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
QPEL_MC_SET(avg_, _3dnow) | QPEL_MC_SET(avg_, _3dnow) | ||||
#endif | #endif | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; | c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2; | ||||
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; | c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2; | ||||
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; | c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2; | ||||
@@ -229,7 +230,7 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp) | |||||
QPEL_MC_SET(put_, _sse2) | QPEL_MC_SET(put_, _sse2) | ||||
QPEL_MC_SET(avg_, _sse2) | QPEL_MC_SET(avg_, _sse2) | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
if (EXTERNAL_SSSE3(mm_flags)) { | |||||
c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; | c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3; | ||||
c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; | c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3; | ||||
c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; | c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3; | ||||
@@ -21,6 +21,7 @@ | |||||
#include "config.h" | #include "config.h" | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/sbrdsp.h" | #include "libavcodec/sbrdsp.h" | ||||
float ff_sbr_sum_square_sse(float (*x)[2], int n); | float ff_sbr_sum_square_sse(float (*x)[2], int n); | ||||
@@ -29,12 +30,10 @@ void ff_sbr_hf_g_filt_sse(float (*Y)[2], const float (*X_high)[40][2], | |||||
void ff_sbrdsp_init_x86(SBRDSPContext *s) | void ff_sbrdsp_init_x86(SBRDSPContext *s) | ||||
{ | { | ||||
if (HAVE_YASM) { | |||||
int mm_flags = av_get_cpu_flags(); | |||||
int mm_flags = av_get_cpu_flags(); | |||||
if (mm_flags & AV_CPU_FLAG_SSE) { | |||||
s->sum_square = ff_sbr_sum_square_sse; | |||||
s->hf_g_filt = ff_sbr_hf_g_filt_sse; | |||||
} | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
s->sum_square = ff_sbr_sum_square_sse; | |||||
s->hf_g_filt = ff_sbr_hf_g_filt_sse; | |||||
} | } | ||||
} | } |
@@ -20,6 +20,7 @@ | |||||
#include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
#include "libavcodec/vp3dsp.h" | #include "libavcodec/vp3dsp.h" | ||||
#include "config.h" | #include "config.h" | ||||
@@ -38,18 +39,17 @@ void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); | |||||
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int cpuflags = av_get_cpu_flags(); | int cpuflags = av_get_cpu_flags(); | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(cpuflags)) { | |||||
c->idct_put = ff_vp3_idct_put_mmx; | c->idct_put = ff_vp3_idct_put_mmx; | ||||
c->idct_add = ff_vp3_idct_add_mmx; | c->idct_add = ff_vp3_idct_add_mmx; | ||||
c->idct_perm = FF_PARTTRANS_IDCT_PERM; | c->idct_perm = FF_PARTTRANS_IDCT_PERM; | ||||
} | } | ||||
#endif | #endif | ||||
if (HAVE_MMXEXT && cpuflags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(cpuflags)) { | |||||
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; | c->idct_dc_add = ff_vp3_idct_dc_add_mmx2; | ||||
if (!(flags & CODEC_FLAG_BITEXACT)) { | if (!(flags & CODEC_FLAG_BITEXACT)) { | ||||
@@ -58,10 +58,9 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags) | |||||
} | } | ||||
} | } | ||||
if (cpuflags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(cpuflags)) { | |||||
c->idct_put = ff_vp3_idct_put_sse2; | c->idct_put = ff_vp3_idct_put_sse2; | ||||
c->idct_add = ff_vp3_idct_add_sse2; | c->idct_add = ff_vp3_idct_add_sse2; | ||||
c->idct_perm = FF_TRANSPOSE_IDCT_PERM; | c->idct_perm = FF_TRANSPOSE_IDCT_PERM; | ||||
} | } | ||||
#endif | |||||
} | } |
@@ -22,6 +22,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
#include "libavcodec/vp56dsp.h" | #include "libavcodec/vp56dsp.h" | ||||
@@ -32,19 +33,17 @@ void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, | |||||
av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec) | av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) { | if (CONFIG_VP6_DECODER && codec == AV_CODEC_ID_VP6) { | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; | c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; | ||||
} | } | ||||
#endif | #endif | ||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; | c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; | ||||
} | } | ||||
} | } | ||||
#endif | |||||
} | } |
@@ -20,6 +20,7 @@ | |||||
#include "config.h" | #include "config.h" | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavresample/audio_convert.h" | #include "libavresample/audio_convert.h" | ||||
/* flat conversions */ | /* flat conversions */ | ||||
@@ -144,16 +145,15 @@ extern void ff_conv_flt_to_fltp_6ch_avx (float *const *dst, float *src, int len, | |||||
av_cold void ff_audio_convert_init_x86(AudioConvert *ac) | av_cold void ff_audio_convert_init_x86(AudioConvert *ac) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX) { | |||||
if (EXTERNAL_MMX(mm_flags)) { | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, | ||||
0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx); | 0, 1, 8, "MMX", ff_conv_s32_to_s16_mmx); | ||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ||||
6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); | 6, 1, 4, "MMX", ff_conv_fltp_to_flt_6ch_mmx); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, | ||||
6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse); | 6, 1, 2, "SSE", ff_conv_fltp_to_s16_6ch_sse); | ||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ||||
@@ -161,7 +161,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, | ||||
2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse); | 2, 16, 4, "SSE", ff_conv_flt_to_fltp_2ch_sse); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { | if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { | ||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, | ||||
0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2); | 0, 16, 16, "SSE2", ff_conv_s32_to_s16_sse2); | ||||
@@ -206,7 +206,7 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, | ||||
6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2); | 6, 16, 4, "SSE2", ff_conv_flt_to_fltp_6ch_sse2); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSE) { | |||||
if (EXTERNAL_SSSE3(mm_flags)) { | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16P, | ||||
6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); | 6, 16, 4, "SSSE3", ff_conv_s16p_to_flt_6ch_ssse3); | ||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, | ||||
@@ -220,13 +220,13 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_FLT, | ||||
6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3); | 6, 16, 4, "SSSE3", ff_conv_flt_to_s16p_6ch_ssse3); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { | |||||
if (EXTERNAL_SSE4(mm_flags)) { | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, | ||||
0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); | 0, 16, 8, "SSE4", ff_conv_s16_to_flt_sse4); | ||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, | ||||
6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); | 6, 16, 4, "SSE4", ff_conv_fltp_to_flt_6ch_sse4); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { | |||||
if (EXTERNAL_AVX(mm_flags)) { | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32, | ||||
0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); | 0, 32, 16, "AVX", ff_conv_s32_to_flt_avx); | ||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, | ||||
@@ -260,5 +260,4 @@ av_cold void ff_audio_convert_init_x86(AudioConvert *ac) | |||||
ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, | ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_FLT, | ||||
6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx); | 6, 16, 4, "AVX", ff_conv_flt_to_fltp_6ch_avx); | ||||
} | } | ||||
#endif | |||||
} | } |
@@ -20,6 +20,7 @@ | |||||
#include "config.h" | #include "config.h" | ||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavresample/audio_mix.h" | #include "libavresample/audio_mix.h" | ||||
extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len, | extern void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len, | ||||
@@ -105,7 +106,7 @@ DEFINE_MIX_3_8_TO_1_2(7) | |||||
DEFINE_MIX_3_8_TO_1_2(8) | DEFINE_MIX_3_8_TO_1_2(8) | ||||
#define SET_MIX_3_8_TO_1_2(chan) \ | #define SET_MIX_3_8_TO_1_2(chan) \ | ||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { \ | |||||
if (EXTERNAL_SSE(mm_flags)) { \ | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | ||||
chan, 1, 16, 4, "SSE", \ | chan, 1, 16, 4, "SSE", \ | ||||
ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ | ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ | ||||
@@ -113,7 +114,7 @@ DEFINE_MIX_3_8_TO_1_2(8) | |||||
chan, 2, 16, 4, "SSE", \ | chan, 2, 16, 4, "SSE", \ | ||||
ff_mix_## chan ##_to_2_fltp_flt_sse); \ | ff_mix_## chan ##_to_2_fltp_flt_sse); \ | ||||
} \ | } \ | ||||
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { \ | |||||
if (EXTERNAL_SSE2(mm_flags)) { \ | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | ||||
chan, 1, 16, 8, "SSE2", \ | chan, 1, 16, 8, "SSE2", \ | ||||
ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ | ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ | ||||
@@ -121,7 +122,7 @@ DEFINE_MIX_3_8_TO_1_2(8) | |||||
chan, 2, 16, 8, "SSE2", \ | chan, 2, 16, 8, "SSE2", \ | ||||
ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ | ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ | ||||
} \ | } \ | ||||
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { \ | |||||
if (EXTERNAL_SSE4(mm_flags)) { \ | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | ||||
chan, 1, 16, 8, "SSE4", \ | chan, 1, 16, 8, "SSE4", \ | ||||
ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ | ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ | ||||
@@ -129,7 +130,7 @@ DEFINE_MIX_3_8_TO_1_2(8) | |||||
chan, 2, 16, 8, "SSE4", \ | chan, 2, 16, 8, "SSE4", \ | ||||
ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ | ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ | ||||
} \ | } \ | ||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { \ | |||||
if (EXTERNAL_AVX(mm_flags)) { \ | |||||
int ptr_align = 32; \ | int ptr_align = 32; \ | ||||
int smp_align = 8; \ | int smp_align = 8; \ | ||||
if (ARCH_X86_32 || chan >= 6) { \ | if (ARCH_X86_32 || chan >= 6) { \ | ||||
@@ -149,7 +150,7 @@ DEFINE_MIX_3_8_TO_1_2(8) | |||||
chan, 2, 16, 8, "AVX", \ | chan, 2, 16, 8, "AVX", \ | ||||
ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ | ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ | ||||
} \ | } \ | ||||
if (mm_flags & AV_CPU_FLAG_FMA4 && HAVE_FMA4) { \ | |||||
if (EXTERNAL_FMA4(mm_flags)) { \ | |||||
int ptr_align = 32; \ | int ptr_align = 32; \ | ||||
int smp_align = 8; \ | int smp_align = 8; \ | ||||
if (ARCH_X86_32 || chan >= 6) { \ | if (ARCH_X86_32 || chan >= 6) { \ | ||||
@@ -175,13 +176,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) | |||||
#if HAVE_YASM | #if HAVE_YASM | ||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ||||
2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse); | 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse); | ||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ||||
1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse); | 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { | |||||
if (EXTERNAL_SSE2(mm_flags)) { | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ||||
2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2); | 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2); | ||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8, | ||||
@@ -189,13 +190,13 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ||||
1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2); | 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { | |||||
if (EXTERNAL_SSE4(mm_flags)) { | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ||||
2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4); | 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4); | ||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | ||||
1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4); | 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4); | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { | |||||
if (EXTERNAL_AVX(mm_flags)) { | |||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ||||
2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx); | 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx); | ||||
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, | ||||
@@ -20,6 +20,7 @@ | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/float_dsp.h" | #include "libavutil/float_dsp.h" | ||||
#include "cpu.h" | |||||
extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, | extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, | ||||
int len); | int len); | ||||
@@ -33,16 +34,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, | |||||
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) | void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) | ||||
{ | { | ||||
#if HAVE_YASM | |||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { | |||||
if (EXTERNAL_SSE(mm_flags)) { | |||||
fdsp->vector_fmul = ff_vector_fmul_sse; | fdsp->vector_fmul = ff_vector_fmul_sse; | ||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; | fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; | ||||
} | } | ||||
if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { | |||||
if (EXTERNAL_AVX(mm_flags)) { | |||||
fdsp->vector_fmul = ff_vector_fmul_avx; | fdsp->vector_fmul = ff_vector_fmul_avx; | ||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; | fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; | ||||
} | } | ||||
#endif | |||||
} | } |
@@ -46,6 +46,7 @@ | |||||
#include "libavutil/opt.h" | #include "libavutil/opt.h" | ||||
#include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "rgb2rgb.h" | #include "rgb2rgb.h" | ||||
#include "swscale.h" | #include "swscale.h" | ||||
#include "swscale_internal.h" | #include "swscale_internal.h" | ||||
@@ -473,7 +474,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, | |||||
filterAlign = 1; | filterAlign = 1; | ||||
} | } | ||||
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { | |||||
if (INLINE_MMX(cpu_flags)) { | |||||
// special case for unscaled vertical filtering | // special case for unscaled vertical filtering | ||||
if (minFilterSize == 1 && filterAlign == 2) | if (minFilterSize == 1 && filterAlign == 2) | ||||
filterAlign = 1; | filterAlign = 1; | ||||
@@ -973,8 +974,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, | FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, | ||||
(FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16, | (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16, | ||||
fail); | fail); | ||||
if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && | |||||
c->srcBpc == 8 && c->dstBpc <= 10) { | |||||
if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 10) { | |||||
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && | c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && | ||||
(srcW & 15) == 0) ? 1 : 0; | (srcW & 15) == 0) ? 1 : 0; | ||||
if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0 | if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0 | ||||
@@ -1004,7 +1004,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
c->chrXInc += 20; | c->chrXInc += 20; | ||||
} | } | ||||
// we don't use the x86 asm scaler if MMX is available | // we don't use the x86 asm scaler if MMX is available | ||||
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { | |||||
else if (INLINE_MMX(cpu_flags)) { | |||||
c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; | c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; | ||||
c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; | c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; | ||||
} | } | ||||
@@ -1050,8 +1050,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
} else | } else | ||||
#endif /* HAVE_MMXEXT_INLINE */ | #endif /* HAVE_MMXEXT_INLINE */ | ||||
{ | { | ||||
const int filterAlign = | |||||
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : | |||||
const int filterAlign = INLINE_MMX(cpu_flags) ? 4 : | |||||
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : | (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : | ||||
1; | 1; | ||||
@@ -1074,8 +1073,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
/* precalculate vertical scaler filter coefficients */ | /* precalculate vertical scaler filter coefficients */ | ||||
{ | { | ||||
const int filterAlign = | |||||
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 2 : | |||||
const int filterAlign = INLINE_MMX(cpu_flags) ? 2 : | |||||
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : | (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : | ||||
1; | 1; | ||||
@@ -1208,11 +1206,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
#endif | #endif | ||||
sws_format_name(dstFormat)); | sws_format_name(dstFormat)); | ||||
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
if (INLINE_MMXEXT(cpu_flags)) | |||||
av_log(c, AV_LOG_INFO, "using MMX2\n"); | av_log(c, AV_LOG_INFO, "using MMX2\n"); | ||||
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | |||||
else if (INLINE_AMD3DNOW(cpu_flags)) | |||||
av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | av_log(c, AV_LOG_INFO, "using 3DNOW\n"); | ||||
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) | |||||
else if (INLINE_MMX(cpu_flags)) | |||||
av_log(c, AV_LOG_INFO, "using MMX\n"); | av_log(c, AV_LOG_INFO, "using MMX\n"); | ||||
else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) | else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) | ||||
av_log(c, AV_LOG_INFO, "using AltiVec\n"); | av_log(c, AV_LOG_INFO, "using AltiVec\n"); | ||||
@@ -28,6 +28,7 @@ | |||||
#include "config.h" | #include "config.h" | ||||
#include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/bswap.h" | #include "libavutil/bswap.h" | ||||
#include "libswscale/rgb2rgb.h" | #include "libswscale/rgb2rgb.h" | ||||
@@ -133,13 +134,13 @@ av_cold void rgb2rgb_init_x86(void) | |||||
#if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
if (cpu_flags & AV_CPU_FLAG_MMX) | |||||
if (INLINE_MMX(cpu_flags)) | |||||
rgb2rgb_init_MMX(); | rgb2rgb_init_MMX(); | ||||
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) | |||||
if (INLINE_AMD3DNOW(cpu_flags)) | |||||
rgb2rgb_init_3DNOW(); | rgb2rgb_init_3DNOW(); | ||||
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) | |||||
if (INLINE_MMXEXT(cpu_flags)) | |||||
rgb2rgb_init_MMX2(); | rgb2rgb_init_MMX2(); | ||||
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) | |||||
if (INLINE_SSE2(cpu_flags)) | |||||
rgb2rgb_init_SSE2(); | rgb2rgb_init_SSE2(); | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
} | } |
@@ -25,6 +25,7 @@ | |||||
#include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
#include "libavutil/intreadwrite.h" | #include "libavutil/intreadwrite.h" | ||||
#include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
#include "libavutil/x86/cpu.h" | |||||
#include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
#include "libavutil/pixdesc.h" | #include "libavutil/pixdesc.h" | ||||
@@ -314,7 +315,6 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) | |||||
#endif | #endif | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
#if HAVE_YASM | |||||
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ | #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ | ||||
if (c->srcBpc == 8) { \ | if (c->srcBpc == 8) { \ | ||||
hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ | hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ | ||||
@@ -357,7 +357,7 @@ switch(c->dstBpc){ \ | |||||
c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ | c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ | ||||
break | break | ||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
if (cpu_flags & AV_CPU_FLAG_MMX) { | |||||
if (EXTERNAL_MMX(cpu_flags)) { | |||||
ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); | ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); | ||||
ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); | ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); | ||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); | ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMXEXT); | ||||
@@ -392,7 +392,7 @@ switch(c->dstBpc){ \ | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (cpu_flags & AV_CPU_FLAG_MMXEXT) { | |||||
if (EXTERNAL_MMXEXT(cpu_flags)) { | |||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); | ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1); | ||||
} | } | ||||
#endif /* ARCH_X86_32 */ | #endif /* ARCH_X86_32 */ | ||||
@@ -404,7 +404,7 @@ switch(c->dstBpc){ \ | |||||
else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ | else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \ | ||||
break; \ | break; \ | ||||
} | } | ||||
if (cpu_flags & AV_CPU_FLAG_SSE2) { | |||||
if (EXTERNAL_SSE2(cpu_flags)) { | |||||
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); | ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2); | ||||
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); | ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2); | ||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , | ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, , | ||||
@@ -441,7 +441,7 @@ switch(c->dstBpc){ \ | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (cpu_flags & AV_CPU_FLAG_SSSE3) { | |||||
if (EXTERNAL_SSSE3(cpu_flags)) { | |||||
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); | ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3); | ||||
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); | ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3); | ||||
switch (c->srcFormat) { | switch (c->srcFormat) { | ||||
@@ -451,7 +451,7 @@ switch(c->dstBpc){ \ | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (cpu_flags & AV_CPU_FLAG_SSE4) { | |||||
if (EXTERNAL_SSE4(cpu_flags)) { | |||||
/* Xto15 don't need special sse4 functions */ | /* Xto15 don't need special sse4 functions */ | ||||
ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); | ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3); | ||||
ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); | ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3); | ||||
@@ -462,7 +462,7 @@ switch(c->dstBpc){ \ | |||||
c->yuv2plane1 = ff_yuv2plane1_16_sse4; | c->yuv2plane1 = ff_yuv2plane1_16_sse4; | ||||
} | } | ||||
if (cpu_flags & AV_CPU_FLAG_AVX) { | |||||
if (EXTERNAL_AVX(cpu_flags)) { | |||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , | ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , | ||||
HAVE_ALIGNED_STACK || ARCH_X86_64); | HAVE_ALIGNED_STACK || ARCH_X86_64); | ||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); | ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); | ||||
@@ -490,5 +490,4 @@ switch(c->dstBpc){ \ | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
#endif | |||||
} | } |