* commit '4efab89332ea39a77145e8b15562b981d9dbde68': x86: Use *_FAST/*_SLOW CPU feature detection macros where appropriate Merged-by: James Almer <jamrial@gmail.com>tags/n3.3
| @@ -228,16 +228,19 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) | |||||
| c->float_to_fixed24 = ff_float_to_fixed24_sse2; | c->float_to_fixed24 = ff_float_to_fixed24_sse2; | ||||
| c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; | c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; | ||||
| c->extract_exponents = ff_ac3_extract_exponents_sse2; | c->extract_exponents = ff_ac3_extract_exponents_sse2; | ||||
| if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { | |||||
| c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; | |||||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | |||||
| } | |||||
| if (bit_exact) { | if (bit_exact) { | ||||
| c->apply_window_int16 = ff_apply_window_int16_sse2; | c->apply_window_int16 = ff_apply_window_int16_sse2; | ||||
| } else if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) { | |||||
| } | |||||
| } | |||||
| if (EXTERNAL_SSE2_FAST(cpu_flags)) { | |||||
| c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; | |||||
| c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; | |||||
| if (!bit_exact) { | |||||
| c->apply_window_int16 = ff_apply_window_int16_round_sse2; | c->apply_window_int16 = ff_apply_window_int16_round_sse2; | ||||
| } | } | ||||
| } | } | ||||
| if (EXTERNAL_SSSE3(cpu_flags)) { | if (EXTERNAL_SSSE3(cpu_flags)) { | ||||
| c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; | ||||
| if (cpu_flags & AV_CPU_FLAG_ATOM) { | if (cpu_flags & AV_CPU_FLAG_ATOM) { | ||||
| @@ -562,11 +562,6 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) | |||||
| } | } | ||||
| if (EXTERNAL_SSE2(cpu_flags)) { | if (EXTERNAL_SSE2(cpu_flags)) { | ||||
| if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { | |||||
| // these functions are slower than mmx on AMD, but faster on Intel | |||||
| H264_QPEL_FUNCS(0, 0, sse2); | |||||
| } | |||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| H264_QPEL_FUNCS(0, 1, sse2); | H264_QPEL_FUNCS(0, 1, sse2); | ||||
| H264_QPEL_FUNCS(0, 2, sse2); | H264_QPEL_FUNCS(0, 2, sse2); | ||||
| @@ -593,6 +588,12 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) | |||||
| } | } | ||||
| } | } | ||||
| if (EXTERNAL_SSE2_FAST(cpu_flags)) { | |||||
| if (!high_bit_depth) { | |||||
| H264_QPEL_FUNCS(0, 0, sse2); | |||||
| } | |||||
| } | |||||
| if (EXTERNAL_SSSE3(cpu_flags)) { | if (EXTERNAL_SSSE3(cpu_flags)) { | ||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| H264_QPEL_FUNCS(1, 0, ssse3); | H264_QPEL_FUNCS(1, 0, ssse3); | ||||
| @@ -154,7 +154,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c) | |||||
| #if HAVE_SSE2_INLINE | #if HAVE_SSE2_INLINE | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| if (HAVE_SSE2_INLINE && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) { | |||||
| if (INLINE_SSE2(cpu_flags) || INLINE_SSE2_SLOW(cpu_flags)) { | |||||
| c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; | c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; | ||||
| c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; | c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; | ||||
| } | } | ||||
| @@ -346,7 +346,7 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) | |||||
| c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | ||||
| } | } | ||||
| if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) { | |||||
| if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) { | |||||
| VP8_LUMA_MC_FUNC(0, 16, sse2); | VP8_LUMA_MC_FUNC(0, 16, sse2); | ||||
| VP8_MC_FUNC(1, 8, sse2); | VP8_MC_FUNC(1, 8, sse2); | ||||
| VP8_BILINEAR_MC_FUNC(0, 16, sse2); | VP8_BILINEAR_MC_FUNC(0, 16, sse2); | ||||
| @@ -416,7 +416,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) | |||||
| c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; | c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; | ||||
| } | } | ||||
| if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) { | |||||
| if (EXTERNAL_SSE2(cpu_flags) || EXTERNAL_SSE2_SLOW(cpu_flags)) { | |||||
| c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | ||||
| c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; | c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; | ||||