@@ -371,8 +371,10 @@ DEFINE_IMDCT | |||||
INIT_XMM ssse3 | INIT_XMM ssse3 | ||||
DEFINE_IMDCT | DEFINE_IMDCT | ||||
%ifdef HAVE_AVX | |||||
INIT_XMM avx | INIT_XMM avx | ||||
DEFINE_IMDCT | DEFINE_IMDCT | ||||
%endif | |||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -717,5 +719,7 @@ cglobal four_imdct36_float, 5,5,8, out, buf, in, win, tmp | |||||
INIT_XMM sse | INIT_XMM sse | ||||
DEFINE_FOUR_IMDCT | DEFINE_FOUR_IMDCT | ||||
%ifdef HAVE_AVX | |||||
INIT_XMM avx | INIT_XMM avx | ||||
DEFINE_FOUR_IMDCT | DEFINE_FOUR_IMDCT | ||||
%endif |
@@ -306,7 +306,9 @@ INIT_XMM | |||||
idct_put_fn sse2, 16 | idct_put_fn sse2, 16 | ||||
INIT_XMM | INIT_XMM | ||||
idct_put_fn sse4, 16 | idct_put_fn sse4, 16 | ||||
%ifdef HAVE_AVX | |||||
INIT_AVX | INIT_AVX | ||||
idct_put_fn avx, 16 | idct_put_fn avx, 16 | ||||
%endif | |||||
%endif | %endif |
@@ -34,14 +34,14 @@ av_cold void v210_x86_init(V210DecContext *s) | |||||
if (cpu_flags & AV_CPU_FLAG_SSSE3) | if (cpu_flags & AV_CPU_FLAG_SSSE3) | ||||
s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3; | s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3; | ||||
if (cpu_flags & AV_CPU_FLAG_AVX) | |||||
if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) | |||||
s->unpack_frame = ff_v210_planar_unpack_aligned_avx; | s->unpack_frame = ff_v210_planar_unpack_aligned_avx; | ||||
} | } | ||||
else { | else { | ||||
if (cpu_flags & AV_CPU_FLAG_SSSE3) | if (cpu_flags & AV_CPU_FLAG_SSSE3) | ||||
s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3; | s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3; | ||||
if (cpu_flags & AV_CPU_FLAG_AVX) | |||||
if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) | |||||
s->unpack_frame = ff_v210_planar_unpack_unaligned_avx; | s->unpack_frame = ff_v210_planar_unpack_unaligned_avx; | ||||
} | } | ||||
#endif | #endif | ||||
@@ -76,10 +76,14 @@ cglobal v210_planar_unpack_%1_%2, 5, 5 | |||||
INIT_XMM | INIT_XMM | ||||
v210_planar_unpack unaligned, ssse3 | v210_planar_unpack unaligned, ssse3 | ||||
%ifdef HAVE_AVX | |||||
INIT_AVX | INIT_AVX | ||||
v210_planar_unpack unaligned, avx | v210_planar_unpack unaligned, avx | ||||
%endif | |||||
INIT_XMM | INIT_XMM | ||||
v210_planar_unpack aligned, ssse3 | v210_planar_unpack aligned, ssse3 | ||||
%ifdef HAVE_AVX | |||||
INIT_AVX | INIT_AVX | ||||
v210_planar_unpack aligned, avx | v210_planar_unpack aligned, avx | ||||
%endif |
@@ -233,6 +233,7 @@ YUYV_TO_UV_FN 3, uyvy | |||||
NVXX_TO_UV_FN 5, nv12 | NVXX_TO_UV_FN 5, nv12 | ||||
NVXX_TO_UV_FN 5, nv21 | NVXX_TO_UV_FN 5, nv21 | ||||
%ifdef HAVE_AVX | |||||
INIT_XMM avx | INIT_XMM avx | ||||
; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but | ; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but | ||||
; that's not faster in practice | ; that's not faster in practice | ||||
@@ -240,3 +241,4 @@ YUYV_TO_UV_FN 3, yuyv | |||||
YUYV_TO_UV_FN 3, uyvy, 1 | YUYV_TO_UV_FN 3, uyvy, 1 | ||||
NVXX_TO_UV_FN 5, nv12 | NVXX_TO_UV_FN 5, nv12 | ||||
NVXX_TO_UV_FN 5, nv21 | NVXX_TO_UV_FN 5, nv21 | ||||
%endif |
@@ -265,10 +265,12 @@ yuv2planeX_fn sse4, 9, 7, 5 | |||||
yuv2planeX_fn sse4, 10, 7, 5 | yuv2planeX_fn sse4, 10, 7, 5 | ||||
yuv2planeX_fn sse4, 16, 8, 5 | yuv2planeX_fn sse4, 16, 8, 5 | ||||
%ifdef HAVE_AVX | |||||
INIT_AVX | INIT_AVX | ||||
yuv2planeX_fn avx, 8, 10, 7 | yuv2planeX_fn avx, 8, 10, 7 | ||||
yuv2planeX_fn avx, 9, 7, 5 | yuv2planeX_fn avx, 9, 7, 5 | ||||
yuv2planeX_fn avx, 10, 7, 5 | yuv2planeX_fn avx, 10, 7, 5 | ||||
%endif | |||||
; %1=outout-bpc, %2=alignment (u/a) | ; %1=outout-bpc, %2=alignment (u/a) | ||||
%macro yuv2plane1_mainloop 2 | %macro yuv2plane1_mainloop 2 | ||||
@@ -402,8 +404,10 @@ yuv2plane1_fn 16, 6, 3 | |||||
INIT_XMM sse4 | INIT_XMM sse4 | ||||
yuv2plane1_fn 16, 5, 3 | yuv2plane1_fn 16, 5, 3 | ||||
%ifdef HAVE_AVX | |||||
INIT_XMM avx | INIT_XMM avx | ||||
yuv2plane1_fn 8, 5, 5 | yuv2plane1_fn 8, 5, 5 | ||||
yuv2plane1_fn 9, 5, 3 | yuv2plane1_fn 9, 5, 3 | ||||
yuv2plane1_fn 10, 5, 3 | yuv2plane1_fn 10, 5, 3 | ||||
yuv2plane1_fn 16, 5, 3 | yuv2plane1_fn 16, 5, 3 | ||||
%endif |
@@ -465,7 +465,7 @@ switch(c->dstBpc){ \ | |||||
c->yuv2plane1 = ff_yuv2plane1_16_sse4; | c->yuv2plane1 = ff_yuv2plane1_16_sse4; | ||||
} | } | ||||
if (cpu_flags & AV_CPU_FLAG_AVX) { | |||||
if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) { | |||||
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); | ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); | ||||
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); | ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); | ||||