| @@ -371,8 +371,10 @@ DEFINE_IMDCT | |||||
| INIT_XMM ssse3 | INIT_XMM ssse3 | ||||
| DEFINE_IMDCT | DEFINE_IMDCT | ||||
| %ifdef HAVE_AVX | |||||
| INIT_XMM avx | INIT_XMM avx | ||||
| DEFINE_IMDCT | DEFINE_IMDCT | ||||
| %endif | |||||
| INIT_XMM sse | INIT_XMM sse | ||||
| @@ -717,5 +719,7 @@ cglobal four_imdct36_float, 5,5,8, out, buf, in, win, tmp | |||||
| INIT_XMM sse | INIT_XMM sse | ||||
| DEFINE_FOUR_IMDCT | DEFINE_FOUR_IMDCT | ||||
| %ifdef HAVE_AVX | |||||
| INIT_XMM avx | INIT_XMM avx | ||||
| DEFINE_FOUR_IMDCT | DEFINE_FOUR_IMDCT | ||||
| %endif | |||||
| @@ -306,7 +306,9 @@ INIT_XMM | |||||
| idct_put_fn sse2, 16 | idct_put_fn sse2, 16 | ||||
| INIT_XMM | INIT_XMM | ||||
| idct_put_fn sse4, 16 | idct_put_fn sse4, 16 | ||||
| %ifdef HAVE_AVX | |||||
| INIT_AVX | INIT_AVX | ||||
| idct_put_fn avx, 16 | idct_put_fn avx, 16 | ||||
| %endif | |||||
| %endif | %endif | ||||
| @@ -34,14 +34,14 @@ av_cold void v210_x86_init(V210DecContext *s) | |||||
| if (cpu_flags & AV_CPU_FLAG_SSSE3) | if (cpu_flags & AV_CPU_FLAG_SSSE3) | ||||
| s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3; | s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3; | ||||
| if (cpu_flags & AV_CPU_FLAG_AVX) | |||||
| if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) | |||||
| s->unpack_frame = ff_v210_planar_unpack_aligned_avx; | s->unpack_frame = ff_v210_planar_unpack_aligned_avx; | ||||
| } | } | ||||
| else { | else { | ||||
| if (cpu_flags & AV_CPU_FLAG_SSSE3) | if (cpu_flags & AV_CPU_FLAG_SSSE3) | ||||
| s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3; | s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3; | ||||
| if (cpu_flags & AV_CPU_FLAG_AVX) | |||||
| if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) | |||||
| s->unpack_frame = ff_v210_planar_unpack_unaligned_avx; | s->unpack_frame = ff_v210_planar_unpack_unaligned_avx; | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -76,10 +76,14 @@ cglobal v210_planar_unpack_%1_%2, 5, 5 | |||||
| INIT_XMM | INIT_XMM | ||||
| v210_planar_unpack unaligned, ssse3 | v210_planar_unpack unaligned, ssse3 | ||||
| %ifdef HAVE_AVX | |||||
| INIT_AVX | INIT_AVX | ||||
| v210_planar_unpack unaligned, avx | v210_planar_unpack unaligned, avx | ||||
| %endif | |||||
| INIT_XMM | INIT_XMM | ||||
| v210_planar_unpack aligned, ssse3 | v210_planar_unpack aligned, ssse3 | ||||
| %ifdef HAVE_AVX | |||||
| INIT_AVX | INIT_AVX | ||||
| v210_planar_unpack aligned, avx | v210_planar_unpack aligned, avx | ||||
| %endif | |||||
| @@ -233,6 +233,7 @@ YUYV_TO_UV_FN 3, uyvy | |||||
| NVXX_TO_UV_FN 5, nv12 | NVXX_TO_UV_FN 5, nv12 | ||||
| NVXX_TO_UV_FN 5, nv21 | NVXX_TO_UV_FN 5, nv21 | ||||
| %ifdef HAVE_AVX | |||||
| INIT_XMM avx | INIT_XMM avx | ||||
| ; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but | ; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but | ||||
| ; that's not faster in practice | ; that's not faster in practice | ||||
| @@ -240,3 +241,4 @@ YUYV_TO_UV_FN 3, yuyv | |||||
| YUYV_TO_UV_FN 3, uyvy, 1 | YUYV_TO_UV_FN 3, uyvy, 1 | ||||
| NVXX_TO_UV_FN 5, nv12 | NVXX_TO_UV_FN 5, nv12 | ||||
| NVXX_TO_UV_FN 5, nv21 | NVXX_TO_UV_FN 5, nv21 | ||||
| %endif | |||||
| @@ -265,10 +265,12 @@ yuv2planeX_fn sse4, 9, 7, 5 | |||||
| yuv2planeX_fn sse4, 10, 7, 5 | yuv2planeX_fn sse4, 10, 7, 5 | ||||
| yuv2planeX_fn sse4, 16, 8, 5 | yuv2planeX_fn sse4, 16, 8, 5 | ||||
| %ifdef HAVE_AVX | |||||
| INIT_AVX | INIT_AVX | ||||
| yuv2planeX_fn avx, 8, 10, 7 | yuv2planeX_fn avx, 8, 10, 7 | ||||
| yuv2planeX_fn avx, 9, 7, 5 | yuv2planeX_fn avx, 9, 7, 5 | ||||
| yuv2planeX_fn avx, 10, 7, 5 | yuv2planeX_fn avx, 10, 7, 5 | ||||
| %endif | |||||
| ; %1=outout-bpc, %2=alignment (u/a) | ; %1=outout-bpc, %2=alignment (u/a) | ||||
| %macro yuv2plane1_mainloop 2 | %macro yuv2plane1_mainloop 2 | ||||
| @@ -402,8 +404,10 @@ yuv2plane1_fn 16, 6, 3 | |||||
| INIT_XMM sse4 | INIT_XMM sse4 | ||||
| yuv2plane1_fn 16, 5, 3 | yuv2plane1_fn 16, 5, 3 | ||||
| %ifdef HAVE_AVX | |||||
| INIT_XMM avx | INIT_XMM avx | ||||
| yuv2plane1_fn 8, 5, 5 | yuv2plane1_fn 8, 5, 5 | ||||
| yuv2plane1_fn 9, 5, 3 | yuv2plane1_fn 9, 5, 3 | ||||
| yuv2plane1_fn 10, 5, 3 | yuv2plane1_fn 10, 5, 3 | ||||
| yuv2plane1_fn 16, 5, 3 | yuv2plane1_fn 16, 5, 3 | ||||
| %endif | |||||
| @@ -465,7 +465,7 @@ switch(c->dstBpc){ \ | |||||
| c->yuv2plane1 = ff_yuv2plane1_16_sse4; | c->yuv2plane1 = ff_yuv2plane1_16_sse4; | ||||
| } | } | ||||
| if (cpu_flags & AV_CPU_FLAG_AVX) { | |||||
| if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) { | |||||
| ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); | ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); | ||||
| ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); | ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); | ||||