]> git.sesse.net Git - ffmpeg/blobdiff - libavutil/adler32.c
Merge commit '40b331e1f41cf118bd1b0807cac801437255636f'
[ffmpeg] / libavutil / adler32.c
index 8a8065c4a33964a20f79d55bffa81026aadb805c..579d0229787edf5d1912c90e4aadee287c68b8e6 100644 (file)
@@ -32,6 +32,8 @@
 
 #include "config.h"
 #include "adler32.h"
+#include "common.h"
+#include "intreadwrite.h"
 
 #define BASE 65521L /* largest prime smaller than 65536 */
 
@@ -46,16 +48,46 @@ unsigned long av_adler32_update(unsigned long adler, const uint8_t * buf,
     unsigned long s2 = adler >> 16;
 
     while (len > 0) {
-#if CONFIG_SMALL
+#if HAVE_FAST_64BIT && HAVE_FAST_UNALIGNED && !CONFIG_SMALL
+        unsigned len2 = FFMIN((len-1) & ~7, 23*8);
+        if (len2) {
+            uint64_t a1= 0;
+            uint64_t a2= 0;
+            uint64_t b1= 0;
+            uint64_t b2= 0;
+            len -= len2;
+            s2 += s1*len2;
+            while (len2 >= 8) {
+                uint64_t v = AV_RN64(buf);
+                a2 += a1;
+                b2 += b1;
+                a1 +=  v    &0x00FF00FF00FF00FF;
+                b1 += (v>>8)&0x00FF00FF00FF00FF;
+                len2 -= 8;
+                buf+=8;
+            }
+
+            //We combine the 8 interleaved adler32 checksums without overflows
+            //Decreasing the number of iterations would allow below code to be
+            //simplified but would likely be slower due to the fewer iterations
+            //of the inner loop
+            s1 += ((a1+b1)*0x1000100010001)>>48;
+            s2 += ((((a2&0xFFFF0000FFFF)+(b2&0xFFFF0000FFFF)+((a2>>16)&0xFFFF0000FFFF)+((b2>>16)&0xFFFF0000FFFF))*0x800000008)>>32)
+#if HAVE_BIGENDIAN
+                 + 2*((b1*0x1000200030004)>>48)
+                 +   ((a1*0x1000100010001)>>48)
+                 + 2*((a1*0x0000100020003)>>48);
+#else
+                 + 2*((a1*0x4000300020001)>>48)
+                 +   ((b1*0x1000100010001)>>48)
+                 + 2*((b1*0x3000200010000)>>48);
+#endif
+        }
+#else
         while (len > 4  && s2 < (1U << 31)) {
             DO4(buf);
             len -= 4;
         }
-#else
-        while (len > 16 && s2 < (1U << 31)) {
-            DO16(buf);
-            len -= 16;
-        }
 #endif
         DO1(buf); len--;
         s1 %= BASE;
@@ -65,6 +97,7 @@ unsigned long av_adler32_update(unsigned long adler, const uint8_t * buf,
 }
 
 #ifdef TEST
+// LCOV_EXCL_START
 #include <string.h>
 #include "log.h"
 #include "timer.h"
@@ -95,4 +128,5 @@ int main(int argc, char **argv)
     av_log(NULL, AV_LOG_DEBUG, "%X (expected 50E6E508)\n", checksum);
     return checksum == 0x50e6e508 ? 0 : 1;
 }
+// LCOV_EXCL_STOP
 #endif