Initial unstuff code.
authorSteinar H. Gunderson <sesse@debian.org>
Fri, 2 Jan 2009 01:45:52 +0000 (02:45 +0100)
committerSteinar H. Gunderson <sesse@debian.org>
Fri, 2 Jan 2009 01:45:52 +0000 (02:45 +0100)
unstuff.c [new file with mode: 0644]
unstuff.h [new file with mode: 0644]
unstuff_test.c [new file with mode: 0644]

diff --git a/unstuff.c b/unstuff.c
new file mode 100644 (file)
index 0000000..4a69178
--- /dev/null
+++ b/unstuff.c
@@ -0,0 +1,124 @@
+#include <string.h>
+#include <assert.h>
+#include <mmintrin.h>
+#include <xmmintrin.h>
+#include <smmintrin.h>
+
+#include "unstuff.h"
+
+#define MARKER_CHAR 0xff
+#define STUFF_MARKER 0x00
+
+int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len)
+{
+       size_t bytes_written = 0;
+
+       for (unsigned i = 0; i < len; ++i, ++dst, ++src, ++bytes_written) {
+               *dst = *src;
+               if (*src == MARKER_CHAR) {
+                       if (i == len - 1 || src[1] != STUFF_MARKER) {
+                               return -1;
+                       }
+
+                       // Skip the stuff byte.
+                       ++src, ++i;
+               }
+       }
+
+       assert(bytes_written <= len);
+       return bytes_written;
+}
+
+int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len)
+{
+       size_t bytes_read = 0;
+       size_t bytes_written = 0;
+
+       while (len - bytes_read >= 0) {
+               // Find the first marker byte in the rest of the stream.
+               const uint8_t* ptr = memchr(src, MARKER_CHAR, len - bytes_read);
+               if (ptr == NULL) {
+                       // No marker bytes left.
+                       size_t len_to_copy = len - bytes_read;
+                       memcpy(dst, src, len_to_copy);
+                       bytes_written += len_to_copy;
+                       break;
+               }
+
+               const size_t len_to_copy = ptr - src + 1;
+               memcpy(dst, src, len_to_copy);
+
+               src += len_to_copy;
+               dst += len_to_copy;
+               bytes_read += len_to_copy;
+               bytes_written += len_to_copy;
+
+               assert(bytes_read <= len);
+               if (bytes_read == len) {
+                       // Partial marker.
+                       return -1;
+               } else {
+                       if (*src != STUFF_MARKER) {
+                               return -1;
+                       }
+                       ++src;
+                       ++bytes_read;
+               }
+       }
+
+       return bytes_written;   
+}
+
+int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len)
+{
+       __m128i marker_search = _mm_set1_epi8(MARKER_CHAR);
+
+       size_t bytes_read = 0;
+       size_t bytes_written = 0;
+       while (len - bytes_read >= 16) {
+               __m128i data = _mm_lddqu_si128((const __m128i*)src);
+
+               // The store here is safe (if there's stuff bytes, the data
+               // will simply get overwritten in the slow path); fire it off
+               // here so it can run in parallel with the compare.
+               _mm_storeu_si128((__m128i*)dst, data);
+
+               __m128i eq_mask = _mm_cmpeq_epi8(data, marker_search);
+               if (_mm_test_all_zeros(eq_mask, eq_mask)) {
+                       // Fast path; no stuff byte found.
+                       src += 16;
+                       dst += 16;
+                       bytes_read += 16;
+                       bytes_written += 16;
+                       continue;
+               }
+
+               // We found a stuff byte. If it was the last byte, we just
+               // defer that to the next chunk. Apart from that, we just keep
+               // going one by one byte. We could perhaps speed this up with
+               // the data from eq_mask(), but we're not doing that yet.
+               size_t len_this_chunk = (src[15] == 0xff ? 15 : 16);
+               for (unsigned j = 0; j < len_this_chunk; ++j, ++dst, ++src, ++bytes_written) {
+                       *dst = *src;
+
+                       if (*src == MARKER_CHAR) {
+                               assert(j != 15);
+                               if (src[1] != STUFF_MARKER) {
+                                       return -1;
+                               }
+                                       
+                               // Skip the stuff byte.
+                               ++src, ++j;
+                       }
+               }
+               bytes_read += len_this_chunk;
+       }
+
+       // Do the final bytes via the reference path.
+       int ret = unstuff_reference(dst, src, len - bytes_read);
+       if (ret == -1) {
+               return -1;
+       } else {
+               return bytes_written + ret;
+       }
+}
diff --git a/unstuff.h b/unstuff.h
new file mode 100644 (file)
index 0000000..c3ebac3
--- /dev/null
+++ b/unstuff.h
@@ -0,0 +1,30 @@
+#ifndef _UNSTUFF_H
+#define _UNSTUFF_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+// Copies from src to dst, removing all instances of 0xFF 0xF0.
+//
+// Returns:
+//   Number of bytes written to dst if successful.
+//   -1 if a marker was encountered (0xFF followed by something else than 0xF0).
+//   -1 if the last byte of "src" was 0xFF. 
+
+typedef int (unstuff_func_t)(uint8_t*, const uint8_t*, size_t);
+
+// Slow reference function.
+int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len);
+
+// Faster version (assuming few stuff bytes), using C standard library
+// functions (that are hopefully more efficient, assuming few stuff bytes)
+// to find the markers.
+// Slightly over twice as fast as unstuff_reference().
+int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len);
+
+// SSE4.1 version working on 16 bytes at a time.
+// 5-10% faster than unstuff_fast().
+int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len);
+
+#endif /* !defined(_UNSTUFF_H) */
diff --git a/unstuff_test.c b/unstuff_test.c
new file mode 100644 (file)
index 0000000..754f8bf
--- /dev/null
@@ -0,0 +1,131 @@
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/time.h>
+
+#include "unstuff.h"
+
+void test_basic_unstuff(unstuff_func_t* unstuff)
+{
+       uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 0xff, 0x00, 7, 8, 9, 0xff, 0x00, 10, 11, 12, 13, 14 };
+       uint8_t ref[] = { 1, 2, 3, 4, 5, 6, 0xff, 7, 8, 9, 0xff, 10, 11, 12, 13, 14 };
+       uint8_t dst[sizeof(bytes)];
+       int ret = (*unstuff)(dst, bytes, sizeof(bytes));
+
+       assert(ret == sizeof(ref));
+       assert(memcmp(dst, ref, ret) == 0);
+}
+
+// Stuff byte crossing the 16-byte boundaries the SSE4.1 function uses.
+void test_edge_unstuff(unstuff_func_t* unstuff)
+{
+       uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 0x00, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35 };
+       uint8_t ref[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35 };
+       uint8_t dst[sizeof(bytes)];
+       int ret = (*unstuff)(dst, bytes, sizeof(bytes));
+
+       assert(ret == sizeof(ref));
+       assert(memcmp(dst, ref, ret) == 0);
+}
+
+void test_marker(unstuff_func_t* unstuff)
+{
+       uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 0xff, 0x01, 7, 8, 9, 0xff, 0x00, 10, 11, 12, 13, 14 };
+       uint8_t dst[sizeof(bytes)];
+       int ret = (*unstuff)(dst, bytes, sizeof(bytes));
+
+       assert(ret == -1);
+}
+
+void test_marker_end(unstuff_func_t* unstuff)
+{
+       uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff };
+       uint8_t dst[sizeof(bytes)];
+       int ret = (*unstuff)(dst, bytes, sizeof(bytes));
+
+       assert(ret == -1);
+}
+
+void gen_random_stuffed_data(uint8_t* dst, size_t len)
+{
+       // Standard NR LCG (we avoid rand() to get consistent behavior across platforms).
+       uint32_t seed = 1234;
+       for (int i = 0; i < len; ++i) {
+               seed = seed * 1664525 + 1013904223;
+               uint8_t byte = (uint8_t)(seed & 0xff);
+               if (byte != 0xff) {
+                       dst[i] = byte;
+                       continue;
+               } else if (i == len - 1) {
+                       // Don't make a partial marker.
+                       dst[i] = 0x00;
+               } else {
+                       dst[i] = 0xff;
+                       dst[++i] = 0x00;
+               }
+       }
+}
+
+double timediff(const struct timeval* a, const struct timeval* b)
+{
+       return (double)(b->tv_sec - a->tv_sec) +
+               (double)(b->tv_usec - a->tv_usec) * 1e-6;
+}
+
+void test_performance(unstuff_func_t* unstuff)
+{
+       const size_t len = 4096;
+       const unsigned num_runs = 400000;
+
+       uint8_t src[len], dst[len];
+       gen_random_stuffed_data(src, len);
+
+       struct timeval start, now;
+       gettimeofday(&start, NULL);
+
+       for (int i = 0; i < num_runs; ++i) {
+               int ret = unstuff(dst, src, len);
+               assert(ret != -1);
+       }
+       
+       gettimeofday(&now, NULL);
+
+       double diff = timediff(&start, &now);
+       double mb_sec = (len * num_runs) / (1048576.0 * diff);
+       printf("%u runs with %zu bytes in %.2f seconds = %.2f MB/sec\n",
+               num_runs, len, diff, mb_sec);
+}
+
+void test_all_unstuff(unstuff_func_t* unstuff)
+{
+       printf("  test_basic_unstuff()\n");
+       test_basic_unstuff(unstuff);    
+
+       printf("  test_edge_unstuff()\n");
+       test_edge_unstuff(unstuff);
+
+       printf("  test_marker()\n");
+       test_marker(unstuff);
+
+       printf("  test_marker_end()\n");
+       test_marker_end(unstuff);
+
+       printf("  performance test: ");
+       test_performance(unstuff);
+}
+
+int main(void)
+{
+       printf("unstuff_reference:\n");
+       test_all_unstuff(unstuff_reference);
+       
+       printf("unstuff_fast:\n");
+       test_all_unstuff(unstuff_fast);
+
+       printf("unstuff_sse41:\n");
+       test_all_unstuff(unstuff_sse41);
+
+       printf("All tests pass.\n");
+       return 0;
+}