@@ -34,7 +34,7 @@ SECTION .text | |||||
cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x | cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x | ||||
mov wd, dword wm | mov wd, dword wm | ||||
mov hd, dword hm | mov hd, dword hm | ||||
mova m4, [pb_128] | |||||
VBROADCASTI128 m4, [pb_128] | |||||
add inq, wq | add inq, wq | ||||
add thresholdq, wq | add thresholdq, wq | ||||
add minq, wq | add minq, wq | ||||
@@ -70,4 +70,9 @@ RET | |||||
INIT_XMM sse4 | INIT_XMM sse4 | ||||
THRESHOLD_8 | THRESHOLD_8 | ||||
%if HAVE_AVX2_EXTERNAL | |||||
INIT_YMM avx2 | |||||
THRESHOLD_8 | |||||
%endif | |||||
%endif | %endif |
@@ -30,12 +30,26 @@ void ff_threshold8_sse4(const uint8_t *in, const uint8_t *threshold, | |||||
ptrdiff_t flinesize, ptrdiff_t slinesize, | ptrdiff_t flinesize, ptrdiff_t slinesize, | ||||
ptrdiff_t olinesize, | ptrdiff_t olinesize, | ||||
int w, int h); | int w, int h); | ||||
void ff_threshold8_avx2(const uint8_t *in, const uint8_t *threshold, | |||||
const uint8_t *min, const uint8_t *max, | |||||
uint8_t *out, | |||||
ptrdiff_t ilinesize, ptrdiff_t tlinesize, | |||||
ptrdiff_t flinesize, ptrdiff_t slinesize, | |||||
ptrdiff_t olinesize, | |||||
int w, int h); | |||||
av_cold void ff_threshold_init_x86(ThresholdContext *s) | av_cold void ff_threshold_init_x86(ThresholdContext *s) | ||||
{ | { | ||||
int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags) && s->depth == 8) { | |||||
s->threshold = ff_threshold8_sse4; | |||||
if (ARCH_X86_64) { | |||||
if (s->depth == 8) { | |||||
if (EXTERNAL_SSE4(cpu_flags)) { | |||||
s->threshold = ff_threshold8_sse4; | |||||
} | |||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) { | |||||
s->threshold = ff_threshold8_avx2; | |||||
} | |||||
} | |||||
} | } | ||||
} | } |