separate dsp.resample to dsp.resample_common and dsp.resample_linear
and choose to call faster resample_common even when linear_interp=on
when c->frac and c->dst_incr_mod are both zero
speed up resampling when exact_rational and linear_interp are both
enabled because exact_rational force c->frac and c->dst_incr_mod to
be zero when soft compensation does not happen
benchmark on exact_rational=on:linear_interp=on
old new
real 8.432s 5.097s
user 7.679s 4.989s
sys 0.125s 0.107s
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
tags/n3.3
| @@ -111,12 +111,10 @@ av_cold void swri_resample_dsp_arm_init(ResampleContext *c) | |||||
| switch(c->format) { | switch(c->format) { | ||||
| case AV_SAMPLE_FMT_FLTP: | case AV_SAMPLE_FMT_FLTP: | ||||
| if (!c->linear) | |||||
| c->dsp.resample = ff_resample_common_float_neon; | |||||
| c->dsp.resample_common = ff_resample_common_float_neon; | |||||
| break; | break; | ||||
| case AV_SAMPLE_FMT_S16P: | case AV_SAMPLE_FMT_S16P: | ||||
| if (!c->linear) | |||||
| c->dsp.resample = ff_resample_common_s16_neon; | |||||
| c->dsp.resample_common = ff_resample_common_s16_neon; | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| @@ -496,7 +496,12 @@ static int swri_resample(ResampleContext *c, | |||||
| dst_size = FFMIN(dst_size, delta_n); | dst_size = FFMIN(dst_size, delta_n); | ||||
| if (dst_size > 0) { | if (dst_size > 0) { | ||||
| *consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx); | |||||
| /* resample_linear and resample_common should have same behavior | |||||
| * when frac and dst_incr_mod are zero */ | |||||
| if (c->linear && (c->frac || c->dst_incr_mod)) | |||||
| *consumed = c->dsp.resample_linear(c, dst, src, dst_size, update_ctx); | |||||
| else | |||||
| *consumed = c->dsp.resample_common(c, dst, src, dst_size, update_ctx); | |||||
| } else { | } else { | ||||
| *consumed = 0; | *consumed = 0; | ||||
| } | } | ||||
| @@ -53,8 +53,10 @@ typedef struct ResampleContext { | |||||
| struct { | struct { | ||||
| void (*resample_one)(void *dst, const void *src, | void (*resample_one)(void *dst, const void *src, | ||||
| int n, int64_t index, int64_t incr); | int n, int64_t index, int64_t incr); | ||||
| int (*resample)(struct ResampleContext *c, void *dst, | |||||
| const void *src, int n, int update_ctx); | |||||
| int (*resample_common)(struct ResampleContext *c, void *dst, | |||||
| const void *src, int n, int update_ctx); | |||||
| int (*resample_linear)(struct ResampleContext *c, void *dst, | |||||
| const void *src, int n, int update_ctx); | |||||
| } dsp; | } dsp; | ||||
| } ResampleContext; | } ResampleContext; | ||||
| @@ -48,19 +48,23 @@ void swri_resample_dsp_init(ResampleContext *c) | |||||
| switch(c->format){ | switch(c->format){ | ||||
| case AV_SAMPLE_FMT_S16P: | case AV_SAMPLE_FMT_S16P: | ||||
| c->dsp.resample_one = resample_one_int16; | c->dsp.resample_one = resample_one_int16; | ||||
| c->dsp.resample = c->linear ? resample_linear_int16 : resample_common_int16; | |||||
| c->dsp.resample_common = resample_common_int16; | |||||
| c->dsp.resample_linear = resample_linear_int16; | |||||
| break; | break; | ||||
| case AV_SAMPLE_FMT_S32P: | case AV_SAMPLE_FMT_S32P: | ||||
| c->dsp.resample_one = resample_one_int32; | c->dsp.resample_one = resample_one_int32; | ||||
| c->dsp.resample = c->linear ? resample_linear_int32 : resample_common_int32; | |||||
| c->dsp.resample_common = resample_common_int32; | |||||
| c->dsp.resample_linear = resample_linear_int32; | |||||
| break; | break; | ||||
| case AV_SAMPLE_FMT_FLTP: | case AV_SAMPLE_FMT_FLTP: | ||||
| c->dsp.resample_one = resample_one_float; | c->dsp.resample_one = resample_one_float; | ||||
| c->dsp.resample = c->linear ? resample_linear_float : resample_common_float; | |||||
| c->dsp.resample_common = resample_common_float; | |||||
| c->dsp.resample_linear = resample_linear_float; | |||||
| break; | break; | ||||
| case AV_SAMPLE_FMT_DBLP: | case AV_SAMPLE_FMT_DBLP: | ||||
| c->dsp.resample_one = resample_one_double; | c->dsp.resample_one = resample_one_double; | ||||
| c->dsp.resample = c->linear ? resample_linear_double : resample_common_double; | |||||
| c->dsp.resample_common = resample_common_double; | |||||
| c->dsp.resample_linear = resample_linear_double; | |||||
| break; | break; | ||||
| } | } | ||||
| @@ -50,40 +50,40 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c) | |||||
| switch(c->format){ | switch(c->format){ | ||||
| case AV_SAMPLE_FMT_S16P: | case AV_SAMPLE_FMT_S16P: | ||||
| if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) { | if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext | |||||
| : ff_resample_common_int16_mmxext; | |||||
| c->dsp.resample_linear = ff_resample_linear_int16_mmxext; | |||||
| c->dsp.resample_common = ff_resample_common_int16_mmxext; | |||||
| } | } | ||||
| if (EXTERNAL_SSE2(mm_flags)) { | if (EXTERNAL_SSE2(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2 | |||||
| : ff_resample_common_int16_sse2; | |||||
| c->dsp.resample_linear = ff_resample_linear_int16_sse2; | |||||
| c->dsp.resample_common = ff_resample_common_int16_sse2; | |||||
| } | } | ||||
| if (EXTERNAL_XOP(mm_flags)) { | if (EXTERNAL_XOP(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_int16_xop | |||||
| : ff_resample_common_int16_xop; | |||||
| c->dsp.resample_linear = ff_resample_linear_int16_xop; | |||||
| c->dsp.resample_common = ff_resample_common_int16_xop; | |||||
| } | } | ||||
| break; | break; | ||||
| case AV_SAMPLE_FMT_FLTP: | case AV_SAMPLE_FMT_FLTP: | ||||
| if (EXTERNAL_SSE(mm_flags)) { | if (EXTERNAL_SSE(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_float_sse | |||||
| : ff_resample_common_float_sse; | |||||
| c->dsp.resample_linear = ff_resample_linear_float_sse; | |||||
| c->dsp.resample_common = ff_resample_common_float_sse; | |||||
| } | } | ||||
| if (EXTERNAL_AVX_FAST(mm_flags)) { | if (EXTERNAL_AVX_FAST(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_float_avx | |||||
| : ff_resample_common_float_avx; | |||||
| c->dsp.resample_linear = ff_resample_linear_float_avx; | |||||
| c->dsp.resample_common = ff_resample_common_float_avx; | |||||
| } | } | ||||
| if (EXTERNAL_FMA3_FAST(mm_flags)) { | if (EXTERNAL_FMA3_FAST(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_float_fma3 | |||||
| : ff_resample_common_float_fma3; | |||||
| c->dsp.resample_linear = ff_resample_linear_float_fma3; | |||||
| c->dsp.resample_common = ff_resample_common_float_fma3; | |||||
| } | } | ||||
| if (EXTERNAL_FMA4(mm_flags)) { | if (EXTERNAL_FMA4(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_float_fma4 | |||||
| : ff_resample_common_float_fma4; | |||||
| c->dsp.resample_linear = ff_resample_linear_float_fma4; | |||||
| c->dsp.resample_common = ff_resample_common_float_fma4; | |||||
| } | } | ||||
| break; | break; | ||||
| case AV_SAMPLE_FMT_DBLP: | case AV_SAMPLE_FMT_DBLP: | ||||
| if (EXTERNAL_SSE2(mm_flags)) { | if (EXTERNAL_SSE2(mm_flags)) { | ||||
| c->dsp.resample = c->linear ? ff_resample_linear_double_sse2 | |||||
| : ff_resample_common_double_sse2; | |||||
| c->dsp.resample_linear = ff_resample_linear_double_sse2; | |||||
| c->dsp.resample_common = ff_resample_common_double_sse2; | |||||
| } | } | ||||
| break; | break; | ||||
| } | } | ||||