Some asm() correction.
[fjl] / unstuff.c
1 #include <string.h>
2 #include <assert.h>
3 #include <mmintrin.h>
4 #include <xmmintrin.h>
5 #include <smmintrin.h>
6
7 #include "unstuff.h"
8
9 #define MARKER_CHAR 0xff
10 #define STUFF_MARKER 0x00
11
12 int unstuff_reference(uint8_t* dst, const uint8_t* src, size_t len)
13 {
14         size_t bytes_written = 0;
15
16         for (unsigned i = 0; i < len; ++i, ++dst, ++src, ++bytes_written) {
17                 *dst = *src;
18                 if (*src == MARKER_CHAR) {
19                         if (i == len - 1 || src[1] != STUFF_MARKER) {
20                                 return -1;
21                         }
22
23                         // Skip the stuff byte.
24                         ++src, ++i;
25                 }
26         }
27
28         assert(bytes_written <= len);
29         return bytes_written;
30 }
31
32 int unstuff_fast(uint8_t* dst, const uint8_t* src, size_t len)
33 {
34         const uint8_t* sptr = src;
35         uint8_t* dptr = dst;
36
37         while (len > 0) {
38                 // Find the first marker byte in the rest of the stream.
39                 const uint8_t* ptr = memchr(sptr, MARKER_CHAR, len);
40                 if (ptr == NULL) {
41                         // No marker bytes left.
42                         memcpy(dptr, sptr, len);
43                         dptr += len;
44                         break;
45                 }
46
47                 const size_t len_to_copy = ptr - sptr + 1;
48                 memcpy(dptr, sptr, len_to_copy);
49
50                 sptr += len_to_copy;
51                 dptr += len_to_copy;
52                 len -= len_to_copy;
53
54                 if (len == 0) {
55                         // Partial marker.
56                         return -1;
57                 } else {
58                         if (*sptr != STUFF_MARKER) {
59                                 return -1;
60                         }
61                         ++sptr;
62                         --len;
63                 }
64         }
65
66         return dptr - dst;      
67 }
68
69 int unstuff_sse41(uint8_t* dst, const uint8_t* src, size_t len)
70 {
71         __m128i marker_search = _mm_set1_epi8(MARKER_CHAR);
72
73         const uint8_t* sptr = src;
74         uint8_t* dptr = dst;
75         while (len >= 16) {
76                 __m128i data = _mm_lddqu_si128((const __m128i*)sptr);
77
78                 // The store here is safe (if there's stuff bytes, the data
79                 // will simply get overwritten in the slow path); fire it off
80                 // here so it can run in parallel with the compare.
81                 _mm_storeu_si128((__m128i*)dptr, data);
82
83                 __m128i eq_mask = _mm_cmpeq_epi8(data, marker_search);
84                 if (_mm_test_all_zeros(eq_mask, eq_mask)) {
85                         // Fast path; no stuff byte found.
86                         sptr += 16;
87                         dptr += 16;
88                         len -= 16;
89                         continue;
90                 }
91
92                 // We found a stuff byte. If it was the last byte, we just
93                 // defer that to the next chunk. Apart from that, we just keep
94                 // going one by one byte. We could perhaps speed this up with
95                 // the data from eq_mask(), but we're not doing that yet.
96                 size_t len_this_chunk = (sptr[15] == 0xff ? 15 : 16);
97                 for (unsigned j = 0; j < len_this_chunk; ++j, ++dptr, ++sptr) {
98                         *dptr = *sptr;
99
100                         if (*sptr == MARKER_CHAR) {
101                                 assert(j != 15);
102                                 if (sptr[1] != STUFF_MARKER) {
103                                         return -1;
104                                 }
105                                         
106                                 // Skip the stuff byte.
107                                 ++sptr, ++j;
108                         }
109                 }
110                 len -= len_this_chunk;
111         }
112
113         // Do the final bytes via the reference path.
114         int ret = unstuff_reference(dptr, sptr, len);
115         if (ret == -1) {
116                 return -1;
117         } else {
118                 return (dptr - dst) + ret;
119         }
120 }