Reviewed-by: Christophe Gisquet <christophe.gisquet@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.0
@@ -252,7 +252,7 @@ cglobal hevc_sao_edge_filter_%2_%1, 1, 6, 8, 5*mmsize, dst, src, dststride, a_st | |||||
%endif ; ARCH | %endif ; ARCH | ||||
%if cpuflag(avx2) | |||||
%if mmsize > 16 | |||||
SPLATW m8, [offsetq+2] | SPLATW m8, [offsetq+2] | ||||
SPLATW m9, [offsetq+4] | SPLATW m9, [offsetq+4] | ||||
SPLATW m10, [offsetq+0] | SPLATW m10, [offsetq+0] | ||||
@@ -352,11 +352,18 @@ HEVC_SAO_EDGE_FILTER 12, 48, 6 | |||||
HEVC_SAO_EDGE_FILTER 12, 64, 8 | HEVC_SAO_EDGE_FILTER 12, 64, 8 | ||||
%if HAVE_AVX2_EXTERNAL | %if HAVE_AVX2_EXTERNAL | ||||
INIT_XMM avx2 | |||||
HEVC_SAO_EDGE_FILTER 10, 8, 1 | |||||
INIT_YMM avx2 | INIT_YMM avx2 | ||||
HEVC_SAO_EDGE_FILTER 10, 16, 1 | |||||
HEVC_SAO_EDGE_FILTER 10, 32, 2 | HEVC_SAO_EDGE_FILTER 10, 32, 2 | ||||
HEVC_SAO_EDGE_FILTER 10, 48, 3 | HEVC_SAO_EDGE_FILTER 10, 48, 3 | ||||
HEVC_SAO_EDGE_FILTER 10, 64, 4 | HEVC_SAO_EDGE_FILTER 10, 64, 4 | ||||
INIT_XMM avx2 | |||||
HEVC_SAO_EDGE_FILTER 12, 8, 1 | |||||
INIT_YMM avx2 | |||||
HEVC_SAO_EDGE_FILTER 12, 16, 1 | |||||
HEVC_SAO_EDGE_FILTER 12, 32, 2 | HEVC_SAO_EDGE_FILTER 12, 32, 2 | ||||
HEVC_SAO_EDGE_FILTER 12, 48, 3 | HEVC_SAO_EDGE_FILTER 12, 48, 3 | ||||
HEVC_SAO_EDGE_FILTER 12, 64, 4 | HEVC_SAO_EDGE_FILTER 12, 64, 4 | ||||
@@ -1045,9 +1045,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) | |||||
c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; | c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; | ||||
} | } | ||||
SAO_BAND_INIT(10, avx2); | SAO_BAND_INIT(10, avx2); | ||||
c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_10_avx2; | |||||
c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_10_avx2; | |||||
c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_10_avx2; | |||||
SAO_EDGE_INIT(10, avx2); | |||||
c->transform_add[2] = ff_hevc_transform_add16_10_avx2; | c->transform_add[2] = ff_hevc_transform_add16_10_avx2; | ||||
c->transform_add[3] = ff_hevc_transform_add32_10_avx2; | c->transform_add[3] = ff_hevc_transform_add32_10_avx2; | ||||
@@ -1101,9 +1099,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) | |||||
c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; | c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; | ||||
SAO_BAND_INIT(12, avx2); | SAO_BAND_INIT(12, avx2); | ||||
c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_12_avx2; | |||||
c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_12_avx2; | |||||
c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_12_avx2; | |||||
SAO_EDGE_INIT(12, avx2); | |||||
} | } | ||||
} | } | ||||
} | } |