9 #define MARKER_CHAR 0xff
10 #define STUFF_MARKER 0x00
12 int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len)
14 size_t bytes_written = 0;
16 for (unsigned i = 0; i < len; ++i, ++dst, ++src, ++bytes_written) {
18 if (*src == MARKER_CHAR) {
19 if (i == len - 1 || src[1] != STUFF_MARKER) {
23 // Skip the stuff byte.
28 assert(bytes_written <= len);
32 int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len)
34 size_t bytes_written = 0;
35 const uint8_t* sptr = src;
39 // Find the first marker byte in the rest of the stream.
40 const uint8_t* ptr = memchr(sptr, MARKER_CHAR, len);
42 // No marker bytes left.
43 memcpy(dptr, sptr, len);
48 const size_t len_to_copy = ptr - sptr + 1;
49 memcpy(dptr, sptr, len_to_copy);
59 if (*sptr != STUFF_MARKER) {
70 int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len)
72 __m128i marker_search = _mm_set1_epi8(MARKER_CHAR);
74 const uint8_t* sptr = src;
77 __m128i data = _mm_lddqu_si128((const __m128i*)sptr);
79 // The store here is safe (if there's stuff bytes, the data
80 // will simply get overwritten in the slow path); fire it off
81 // here so it can run in parallel with the compare.
82 _mm_storeu_si128((__m128i*)dptr, data);
84 __m128i eq_mask = _mm_cmpeq_epi8(data, marker_search);
85 if (_mm_test_all_zeros(eq_mask, eq_mask)) {
86 // Fast path; no stuff byte found.
93 // We found a stuff byte. If it was the last byte, we just
94 // defer that to the next chunk. Apart from that, we just keep
95 // going one by one byte. We could perhaps speed this up with
96 // the data from eq_mask(), but we're not doing that yet.
97 size_t len_this_chunk = (sptr[15] == 0xff ? 15 : 16);
98 for (unsigned j = 0; j < len_this_chunk; ++j, ++dptr, ++sptr) {
101 if (*sptr == MARKER_CHAR) {
103 if (sptr[1] != STUFF_MARKER) {
107 // Skip the stuff byte.
111 len -= len_this_chunk;
114 // Do the final bytes via the reference path.
115 int ret = unstuff_reference(dptr, sptr, len);
119 return (dptr - dst) + ret;