9 #define MARKER_CHAR 0xff
10 #define STUFF_MARKER 0x00
12 int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len)
14 size_t bytes_written = 0;
16 for (unsigned i = 0; i < len; ++i, ++dst, ++src, ++bytes_written) {
18 if (*src == MARKER_CHAR) {
19 if (i == len - 1 || src[1] != STUFF_MARKER) {
23 // Skip the stuff byte.
28 assert(bytes_written <= len);
32 int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len)
34 const uint8_t* sptr = src;
38 // Find the first marker byte in the rest of the stream.
39 const uint8_t* ptr = memchr(sptr, MARKER_CHAR, len);
41 // No marker bytes left.
42 memcpy(dptr, sptr, len);
47 const size_t len_to_copy = ptr - sptr + 1;
48 memcpy(dptr, sptr, len_to_copy);
58 if (*sptr != STUFF_MARKER) {
69 int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len)
71 __m128i marker_search = _mm_set1_epi8(MARKER_CHAR);
73 const uint8_t* sptr = src;
76 __m128i data = _mm_lddqu_si128((const __m128i*)sptr);
78 // The store here is safe (if there's stuff bytes, the data
79 // will simply get overwritten in the slow path); fire it off
80 // here so it can run in parallel with the compare.
81 _mm_storeu_si128((__m128i*)dptr, data);
83 __m128i eq_mask = _mm_cmpeq_epi8(data, marker_search);
84 if (_mm_test_all_zeros(eq_mask, eq_mask)) {
85 // Fast path; no stuff byte found.
92 // We found a stuff byte. If it was the last byte, we just
93 // defer that to the next chunk. Apart from that, we just keep
94 // going one by one byte. We could perhaps speed this up with
95 // the data from eq_mask(), but we're not doing that yet.
96 size_t len_this_chunk = (sptr[15] == 0xff ? 15 : 16);
97 for (unsigned j = 0; j < len_this_chunk; ++j, ++dptr, ++sptr) {
100 if (*sptr == MARKER_CHAR) {
102 if (sptr[1] != STUFF_MARKER) {
106 // Skip the stuff byte.
110 len -= len_this_chunk;
113 // Do the final bytes via the reference path.
114 int ret = unstuff_reference(dptr, sptr, len);
118 return (dptr - dst) + ret;