diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 303d36d109..dcf2345954 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1612,16 +1612,26 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, #define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\ void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ {\ - OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\ + if (h&3)\ + ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\ + else\ + OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\ }\ void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ {\ - OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ + if (h&3)\ + ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\ + else\ + OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ }\ void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ {\ - OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ - OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ + if (h&3) {\ + ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\ + } else {\ + OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ + OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ + }\ } DIRAC_PIXOP(put, put, mmx) @@ -1631,21 +1641,35 @@ DIRAC_PIXOP(avg, ff_avg, mmxext) #if HAVE_YASM void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) { + if (h&3) + ff_put_dirac_pixels16_c(dst, src, stride, h); + else ff_put_pixels16_sse2(dst, src[0], stride, h); } void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) { + if (h&3) + ff_avg_dirac_pixels16_c(dst, src, stride, h); + else ff_avg_pixels16_sse2(dst, src[0], stride, h); } void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) { + if (h&3) { + ff_put_dirac_pixels32_c(dst, src, stride, h); + } else { ff_put_pixels16_sse2(dst , src[0] , stride, h); ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h); + } } void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) { + if (h&3) { + ff_avg_dirac_pixels32_c(dst, src, stride, h); + } else { ff_avg_pixels16_sse2(dst , src[0] , stride, h); ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h); + } } #endif #endif