Also fixed a bug in the "slow source".
CC=gcc
-CFLAGS=-std=gnu99 -O2 -msse4.1 -g
+CFLAGS=-std=gnu99 -msse4.1 -g
LDFLAGS=
all: tests
dehuff_test: $(DEHUFF_TEST_OBJS)
$(CC) $(LDFLAGS) -o $@ $(DEHUFF_TEST_OBJS)
-tests: unstuff_test input_test dehuff_test
+BYTESOURCE_TEST_OBJS=bytesource.o choice.o unstuff.o bytesource_test.o
+bytesource_test: $(BYTESOURCE_TEST_OBJS)
+ $(CC) $(LDFLAGS) -o $@ $(BYTESOURCE_TEST_OBJS)
+
+tests: unstuff_test input_test dehuff_test bytesource_test
clean:
$(RM) $(UNSTUFF_TEST_OBJS) unstuff_test
$(RM) $(INPUT_TEST_OBJS) input_test
$(RM) $(DEHUFF_TEST_OBJS) dehuff_test
+ $(RM) $(DEHUFF_TEST_OBJS) bytesource_test
test: tests
./unstuff_test
./input_test
./dehuff_test
+ ./bytesource_test
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include "choice.h"
+#include "bytesource.h"
+
+#define MARKER_CHAR 0xff
+#define STUFF_MARKER 0x00
+
+void init_byte_source(struct byte_source* source, raw_input_func_t* input_func, void* userdata)
+{
+ // TODO: should this size be a different constant?
+ memset(source, 0, sizeof(*source));
+ source->bytes = (uint8_t*)malloc(BYTESOURCE_CHUNK_SIZE);
+ source->input_func = input_func;
+ source->userdata = userdata;
+}
+
+uint8_t byte_source_read_marker(struct byte_source* source)
+{
+ assert(source->bytes_available >= 2);
+ assert(source->bytes[0] == MARKER_CHAR);
+ assert(source->bytes[1] != STUFF_MARKER);
+
+ uint8_t ret = source->bytes[1];
+
+ memmove(source->bytes, source->bytes + 2, source->bytes_available - 2);
+ source->bytes_available -= 2;
+
+ return ret;
+}
+
+ssize_t byte_source_input_func(void* source, uint8_t* buf, size_t len)
+{
+ struct byte_source* src = (struct byte_source*)source;
+
+ // If there's no data in the buffer (or only a partial marker), we have
+ // to read in more from our upstream src.
+ while (src->bytes_available == 0 ||
+ (src->bytes_available == 1 && src->bytes[0] == MARKER_CHAR)) {
+ const unsigned space_left = BYTESOURCE_CHUNK_SIZE - src->bytes_available;
+ const size_t bytes_to_read = (len > space_left ? space_left : len);
+ assert(bytes_to_read <= BYTESOURCE_CHUNK_SIZE);
+ const ssize_t bytes_read =
+ (*src->input_func)(src->userdata,
+ src->bytes + src->bytes_available,
+ bytes_to_read);
+ assert(bytes_read >= -1);
+ assert(bytes_read <= bytes_to_read);
+
+ if (bytes_read == -1) {
+ return -1;
+ } else if (bytes_read == 0) {
+ if (src->bytes_available == 1) {
+ // EOF in the middle of a marker => read error
+ return -1;
+ } else {
+ assert(src->bytes_available == 0);
+ return 0;
+ }
+ }
+
+ src->bytes_available += bytes_read;
+ }
+
+ // Now unstuff as much as we can. First of all, if there's a 0xFF at the
+ // end of the buffer, we don't include it this time; the unstuff function
+ // will only give us an error since it can't decide if it's a marker or
+ // a stuffed 0xFF.
+ unsigned bytes_to_unstuff = src->bytes_available;
+ bool end_marker = false;
+ assert(bytes_to_unstuff > 0);
+ if (src->bytes[bytes_to_unstuff - 1] == 0xff) {
+ --bytes_to_unstuff;
+ end_marker = true;
+ }
+
+ int unstuffed_bytes = (*unstuff_choice)(buf, src->bytes, bytes_to_unstuff);
+ assert(unstuffed_bytes != 0);
+ if (unstuffed_bytes > 0) {
+ // Fast path: No markers in the data. We can basically just
+ // return it.
+ if (end_marker) {
+ src->bytes_available = 1;
+ src->bytes[0] = 0xff;
+ } else {
+ src->bytes_available = 0;
+ src->bytes[0] = 0;
+ }
+ return unstuffed_bytes;
+ }
+
+ // Slow path: There was a marker in the data. Unstuff manually until
+ // we hit the marker, then return that.
+ assert(unstuffed_bytes == -1);
+ unsigned bytes_read;
+ unsigned bytes_written = 0;
+ for (bytes_read = 0; bytes_read < src->bytes_available; ++bytes_read) {
+ buf[bytes_written++] = src->bytes[bytes_read];
+ if (src->bytes[bytes_read] != MARKER_CHAR) {
+ continue;
+ }
+
+ assert(bytes_read < src->bytes_available);
+ if (src->bytes[bytes_read + 1] == STUFF_MARKER) {
+ // Skip the stuff byte.
+ ++bytes_read;
+ continue;
+ } else {
+ // OK, this is our marker.
+ break;
+ }
+ }
+
+ memmove(src->bytes, src->bytes + bytes_read, src->bytes_available - bytes_read);
+ src->bytes_available -= bytes_read;
+ assert(bytes_written >= 1);
+ return bytes_written - 1;
+}
--- /dev/null
+#ifndef _BYTESOURCE_H
+#define _BYTESOURCE_H 1
+
+#include <stdint.h>
+#include <sys/types.h>
+
+static const unsigned BYTESOURCE_CHUNK_SIZE = 4096;
+
+// Same as input_func_t, although it expects raw bytes, ie. with markers
+// and all.
+typedef ssize_t (raw_input_func_t)(void*, uint8_t*, size_t);
+
+// A data source taking in a byte stream and returning unstuffed bytes until
+// there's a marker. When there's a marker, it artificially returns EOF until
+// byte_source_read_marker() is called.
+struct byte_source {
+ uint8_t* bytes;
+ unsigned bytes_available;
+
+ raw_input_func_t* input_func;
+ void* userdata;
+};
+void init_byte_source(struct byte_source* source, raw_input_func_t* input_func, void* userdata);
+uint8_t byte_source_read_marker(struct byte_source* source);
+ssize_t byte_source_input_func(void* source, uint8_t* buf, size_t len);
+
+#endif /* !defined(_BYTESOURCE_H) */
--- /dev/null
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "bytesource.h"
+#include "choice.h"
+
+struct custom_read_userdata {
+ uint8_t* bytes;
+ unsigned bytes_left;
+};
+
+ssize_t custom_read(void* userdata, uint8_t* buf, size_t count)
+{
+ struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata;
+ size_t num_to_read = (ud->bytes_left > count ? count : ud->bytes_left);
+ memcpy(buf, ud->bytes, num_to_read);
+ ud->bytes += num_to_read;
+ ud->bytes_left -= num_to_read;
+ return num_to_read;
+}
+
+ssize_t custom_read_slow(void* userdata, uint8_t* buf, size_t count)
+{
+ struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata;
+ size_t num_to_read = ((count > 0 && ud->bytes_left > 0) ? 1 : 0);
+ memcpy(buf, ud->bytes, num_to_read);
+ ud->bytes += num_to_read;
+ ud->bytes_left -= num_to_read;
+ return num_to_read;
+}
+
+// Two streams, separated by a marker.
+uint8_t bytes[] = {
+ 0x01, 0x02, 0x03, 0xff, 0x00, 0x04, // some bytes
+ 0xff, 0xf7, // a marker
+ 0x05, 0x06, 0x07, 0x08, 0xff, 0x00 // more bytes
+};
+
+// Expected data.
+uint8_t first_stream[] = {
+ 0x01, 0x02, 0x03, 0xff, 0x04
+};
+uint8_t second_stream[] = {
+ 0x05, 0x06, 0x07, 0x08, 0xff
+};
+
+// Reading with a regular source.
+void test_basic_reading()
+{
+ struct custom_read_userdata ud;
+ ud.bytes = bytes;
+ ud.bytes_left = sizeof(bytes);
+
+ struct byte_source source;
+ init_byte_source(&source, custom_read, &ud);
+
+ // Read the first stream.
+ uint8_t buf[4096];
+ ssize_t ret;
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == sizeof(first_stream));
+ assert(memcmp(buf, first_stream, sizeof(first_stream)) == 0);
+
+ // Now we should get EOF.
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == 0);
+
+ // Read the marker.
+ uint8_t marker = byte_source_read_marker(&source);
+ assert(marker == 0xf7);
+
+ // Read the second stream.
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == sizeof(second_stream));
+ assert(memcmp(buf, second_stream, sizeof(second_stream)) == 0);
+
+ // ...and EOF again.
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == 0);
+}
+
+// Reading with a slow source.
+void test_slow_source()
+{
+ struct custom_read_userdata ud;
+ ud.bytes = bytes;
+ ud.bytes_left = sizeof(bytes);
+
+ struct byte_source source;
+ init_byte_source(&source, custom_read_slow, &ud);
+
+ uint8_t buf[4096];
+ ssize_t ret;
+
+ // Read the first stream. Since the source is slow, we'll get one by
+ // one byte, even though we asked for 4096.
+ for (unsigned i = 0; i < sizeof(first_stream); ++i) {
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == 1);
+ assert(buf[0] == first_stream[i]);
+ }
+
+ // Now we should get EOF.
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == 0);
+
+ // Read the marker.
+ uint8_t marker = byte_source_read_marker(&source);
+ assert(marker == 0xf7);
+
+ // Read the second stream.
+ for (unsigned i = 0; i < sizeof(second_stream); ++i) {
+ uint8_t buf[4096];
+ ssize_t ret;
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == 1);
+ assert(buf[0] == second_stream[i]);
+ }
+
+ // ...and EOF again.
+ ret = byte_source_input_func(&source, buf, 4096);
+ assert(ret == 0);
+}
+
+int main(void)
+{
+ init_choices();
+
+ printf("test_basic_reading()\n");
+ test_basic_reading();
+
+ printf("test_slow_source()\n");
+ test_slow_source();
+
+ printf("All tests pass.\n");
+ return 0;
+}
#include <stdlib.h>
#include <assert.h>
+#include "bytesource.h"
#include "dehuff.h"
void reliable_read(raw_input_func_t* input_func, void* userdata, uint8_t* buf, size_t len)
#include <stdint.h>
#include <sys/types.h>
+#include "bytesource.h"
#include "input.h"
// About 99% of all Huffman codes are <= 8 bits long (see codelen.txt),
#define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS)
static const int DEHUF_SLOW_PATH = -1;
-// A function to read bytes from some input source. The bytes should be
-// already unstuffed (and thus without markers).
-// A return value of -1 indicates error, a return value of 0 indicates EOF.
-typedef ssize_t (raw_input_func_t)(void*, uint8_t*, size_t);
-
struct huffman_table {
unsigned num_codes[17]; // BITS
unsigned char codes[256]; // HUFFVAL
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <string.h>
#include "input.h"
+#define MARKER_CHAR 0xff
+#define STUFF_MARKER 0x00
+
void init_bit_source(struct bit_source* source, input_func_t* input_func, void* userdata)
{
memset(source, 0, sizeof(*source));
ssize_t custom_read_slow(void* userdata, uint8_t* buf, size_t count)
{
struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata;
- size_t num_to_read = (count > 0 ? 1 : 0);
+ size_t num_to_read = ((count > 0 && ud->bytes_left > 0) ? 1 : 0);
memcpy(buf, ud->bytes, num_to_read);
ud->bytes += num_to_read;
ud->bytes_left -= num_to_read;