From: Steinar H. Gunderson Date: Fri, 2 Jan 2009 01:45:52 +0000 (+0100) Subject: Initial unstuff code. X-Git-Url: https://git.sesse.net/?p=fjl;a=commitdiff_plain;h=d865eae732932b202a0542f721838f3c8d9c5829;ds=sidebyside Initial unstuff code. --- d865eae732932b202a0542f721838f3c8d9c5829 diff --git a/unstuff.c b/unstuff.c new file mode 100644 index 0000000..4a69178 --- /dev/null +++ b/unstuff.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include + +#include "unstuff.h" + +#define MARKER_CHAR 0xff +#define STUFF_MARKER 0x00 + +int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len) +{ + size_t bytes_written = 0; + + for (unsigned i = 0; i < len; ++i, ++dst, ++src, ++bytes_written) { + *dst = *src; + if (*src == MARKER_CHAR) { + if (i == len - 1 || src[1] != STUFF_MARKER) { + return -1; + } + + // Skip the stuff byte. + ++src, ++i; + } + } + + assert(bytes_written <= len); + return bytes_written; +} + +int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len) +{ + size_t bytes_read = 0; + size_t bytes_written = 0; + + while (len - bytes_read >= 0) { + // Find the first marker byte in the rest of the stream. + const uint8_t* ptr = memchr(src, MARKER_CHAR, len - bytes_read); + if (ptr == NULL) { + // No marker bytes left. + size_t len_to_copy = len - bytes_read; + memcpy(dst, src, len_to_copy); + bytes_written += len_to_copy; + break; + } + + const size_t len_to_copy = ptr - src + 1; + memcpy(dst, src, len_to_copy); + + src += len_to_copy; + dst += len_to_copy; + bytes_read += len_to_copy; + bytes_written += len_to_copy; + + assert(bytes_read <= len); + if (bytes_read == len) { + // Partial marker. + return -1; + } else { + if (*src != STUFF_MARKER) { + return -1; + } + ++src; + ++bytes_read; + } + } + + return bytes_written; +} + +int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len) +{ + __m128i marker_search = _mm_set1_epi8(MARKER_CHAR); + + size_t bytes_read = 0; + size_t bytes_written = 0; + while (len - bytes_read >= 16) { + __m128i data = _mm_lddqu_si128((const __m128i*)src); + + // The store here is safe (if there's stuff bytes, the data + // will simply get overwritten in the slow path); fire it off + // here so it can run in parallel with the compare. + _mm_storeu_si128((__m128i*)dst, data); + + __m128i eq_mask = _mm_cmpeq_epi8(data, marker_search); + if (_mm_test_all_zeros(eq_mask, eq_mask)) { + // Fast path; no stuff byte found. + src += 16; + dst += 16; + bytes_read += 16; + bytes_written += 16; + continue; + } + + // We found a stuff byte. If it was the last byte, we just + // defer that to the next chunk. Apart from that, we just keep + // going one by one byte. We could perhaps speed this up with + // the data from eq_mask(), but we're not doing that yet. + size_t len_this_chunk = (src[15] == 0xff ? 15 : 16); + for (unsigned j = 0; j < len_this_chunk; ++j, ++dst, ++src, ++bytes_written) { + *dst = *src; + + if (*src == MARKER_CHAR) { + assert(j != 15); + if (src[1] != STUFF_MARKER) { + return -1; + } + + // Skip the stuff byte. + ++src, ++j; + } + } + bytes_read += len_this_chunk; + } + + // Do the final bytes via the reference path. + int ret = unstuff_reference(dst, src, len - bytes_read); + if (ret == -1) { + return -1; + } else { + return bytes_written + ret; + } +} diff --git a/unstuff.h b/unstuff.h new file mode 100644 index 0000000..c3ebac3 --- /dev/null +++ b/unstuff.h @@ -0,0 +1,30 @@ +#ifndef _UNSTUFF_H +#define _UNSTUFF_H 1 + +#include +#include +#include + +// Copies from src to dst, removing all instances of 0xFF 0xF0. +// +// Returns: +// Number of bytes written to dst if successful. +// -1 if a marker was encountered (0xFF followed by something else than 0xF0). +// -1 if the last byte of "src" was 0xFF. + +typedef int (unstuff_func_t)(uint8_t*, const uint8_t*, size_t); + +// Slow reference function. +int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len); + +// Faster version (assuming few stuff bytes), using C standard library +// functions (that are hopefully more efficient, assuming few stuff bytes) +// to find the markers. +// Slightly over twice as fast as unstuff_reference(). +int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len); + +// SSE4.1 version working on 16 bytes at a time. +// 5-10% faster than unstuff_fast(). +int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len); + +#endif /* !defined(_UNSTUFF_H) */ diff --git a/unstuff_test.c b/unstuff_test.c new file mode 100644 index 0000000..754f8bf --- /dev/null +++ b/unstuff_test.c @@ -0,0 +1,131 @@ +#include +#include +#include +#include +#include + +#include "unstuff.h" + +void test_basic_unstuff(unstuff_func_t* unstuff) +{ + uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 0xff, 0x00, 7, 8, 9, 0xff, 0x00, 10, 11, 12, 13, 14 }; + uint8_t ref[] = { 1, 2, 3, 4, 5, 6, 0xff, 7, 8, 9, 0xff, 10, 11, 12, 13, 14 }; + uint8_t dst[sizeof(bytes)]; + int ret = (*unstuff)(dst, bytes, sizeof(bytes)); + + assert(ret == sizeof(ref)); + assert(memcmp(dst, ref, ret) == 0); +} + +// Stuff byte crossing the 16-byte boundaries the SSE4.1 function uses. +void test_edge_unstuff(unstuff_func_t* unstuff) +{ + uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 0x00, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35 }; + uint8_t ref[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xff, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35 }; + uint8_t dst[sizeof(bytes)]; + int ret = (*unstuff)(dst, bytes, sizeof(bytes)); + + assert(ret == sizeof(ref)); + assert(memcmp(dst, ref, ret) == 0); +} + +void test_marker(unstuff_func_t* unstuff) +{ + uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 0xff, 0x01, 7, 8, 9, 0xff, 0x00, 10, 11, 12, 13, 14 }; + uint8_t dst[sizeof(bytes)]; + int ret = (*unstuff)(dst, bytes, sizeof(bytes)); + + assert(ret == -1); +} + +void test_marker_end(unstuff_func_t* unstuff) +{ + uint8_t bytes[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff }; + uint8_t dst[sizeof(bytes)]; + int ret = (*unstuff)(dst, bytes, sizeof(bytes)); + + assert(ret == -1); +} + +void gen_random_stuffed_data(uint8_t* dst, size_t len) +{ + // Standard NR LCG (we avoid rand() to get consistent behavior across platforms). + uint32_t seed = 1234; + for (int i = 0; i < len; ++i) { + seed = seed * 1664525 + 1013904223; + uint8_t byte = (uint8_t)(seed & 0xff); + if (byte != 0xff) { + dst[i] = byte; + continue; + } else if (i == len - 1) { + // Don't make a partial marker. + dst[i] = 0x00; + } else { + dst[i] = 0xff; + dst[++i] = 0x00; + } + } +} + +double timediff(const struct timeval* a, const struct timeval* b) +{ + return (double)(b->tv_sec - a->tv_sec) + + (double)(b->tv_usec - a->tv_usec) * 1e-6; +} + +void test_performance(unstuff_func_t* unstuff) +{ + const size_t len = 4096; + const unsigned num_runs = 400000; + + uint8_t src[len], dst[len]; + gen_random_stuffed_data(src, len); + + struct timeval start, now; + gettimeofday(&start, NULL); + + for (int i = 0; i < num_runs; ++i) { + int ret = unstuff(dst, src, len); + assert(ret != -1); + } + + gettimeofday(&now, NULL); + + double diff = timediff(&start, &now); + double mb_sec = (len * num_runs) / (1048576.0 * diff); + printf("%u runs with %zu bytes in %.2f seconds = %.2f MB/sec\n", + num_runs, len, diff, mb_sec); +} + +void test_all_unstuff(unstuff_func_t* unstuff) +{ + printf(" test_basic_unstuff()\n"); + test_basic_unstuff(unstuff); + + printf(" test_edge_unstuff()\n"); + test_edge_unstuff(unstuff); + + printf(" test_marker()\n"); + test_marker(unstuff); + + printf(" test_marker_end()\n"); + test_marker_end(unstuff); + + printf(" performance test: "); + test_performance(unstuff); +} + +int main(void) +{ + printf("unstuff_reference:\n"); + test_all_unstuff(unstuff_reference); + + printf("unstuff_fast:\n"); + test_all_unstuff(unstuff_fast); + + printf("unstuff_sse41:\n"); + test_all_unstuff(unstuff_sse41); + + printf("All tests pass.\n"); + return 0; +}