From 26585d2a7f5f13bec3ed8623119bf625fdb728f4 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 4 Feb 2012 07:52:31 +0100 Subject: [PATCH] adler32: rewrite using integer SIMD. about twice as fast as before. the not CONFIG_SMALL case is also droped as it is not faster than the CONFIG_SMALL case. Signed-off-by: Michael Niedermayer --- libavutil/adler32.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/libavutil/adler32.c b/libavutil/adler32.c index 571242e1cb..c5f96db5e5 100644 --- a/libavutil/adler32.c +++ b/libavutil/adler32.c @@ -24,6 +24,7 @@ #include "config.h" #include "adler32.h" #include "common.h" +#include "intreadwrite.h" #define BASE 65521L /* largest prime smaller than 65536 */ @@ -38,22 +39,42 @@ unsigned long av_adler32_update(unsigned long adler, const uint8_t * buf, unsigned long s2 = adler >> 16; while (len > 0) { - unsigned len2 = FFMIN((len-1) & ~15, 2048); +#if HAVE_FAST_64BIT && HAVE_FAST_UNALIGNED && !CONFIG_SMALL + unsigned len2 = FFMIN((len-1) & ~7, 23*8); if (len2) { + uint64_t a1= 0; + uint64_t a2= 0; + uint64_t b1= 0; + uint64_t b2= 0; len -= len2; - -#if CONFIG_SMALL - while (len2 >= 4) { - DO4(buf); - len2 -= 4; + s2 += s1*len2; + while (len2 >= 8) { + uint64_t v = AV_RN64(buf); + a2 += a1; + b2 += b1; + a1 += v &0x00FF00FF00FF00FF; + b1 += (v>>8)&0x00FF00FF00FF00FF; + len2 -= 8; + buf+=8; + } + s1 += ((a1+b1)*0x1000100010001)>>48; + s2 += ((((a2&0xFFFF0000FFFF)+(b2&0xFFFF0000FFFF)+((a2>>16)&0xFFFF0000FFFF)+((b2>>16)&0xFFFF0000FFFF))*0x800000008)>>32) +#if HAVE_BIGENDIAN + + 2*((b1*0x1000200030004)>>48) + + ((a1*0x1000100010001)>>48) + + 2*((a1*0x0000100020003)>>48); +#else + + 2*((a1*0x4000300020001)>>48) + + ((b1*0x1000100010001)>>48) + + 2*((b1*0x3000200010000)>>48); +#endif } #else - while (len2 >= 16) { - DO16(buf); - len2 -= 16; + while (len > 4 && s2 < (1U << 31)) { + DO4(buf); + len -= 4; } #endif - } DO1(buf); len--; s1 %= BASE; s2 %= BASE;