From: Steinar H. Gunderson Date: Sat, 3 Jan 2009 01:15:21 +0000 (+0100) Subject: Add a new input source converting JPEG-format bytes into unstuffed bytes. X-Git-Url: https://git.sesse.net/?p=fjl;a=commitdiff_plain;h=30860bda5fd2474a3b45b05f6b89dcf7230a75a8 Add a new input source converting JPEG-format bytes into unstuffed bytes. Also fixed a bug in the "slow source". --- diff --git a/Makefile b/Makefile index 37dfc95..e1c226a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS=-std=gnu99 -O2 -msse4.1 -g +CFLAGS=-std=gnu99 -msse4.1 -g LDFLAGS= all: tests @@ -16,13 +16,19 @@ DEHUFF_TEST_OBJS=dehuff.o input.o dehuff_test.o dehuff_test: $(DEHUFF_TEST_OBJS) $(CC) $(LDFLAGS) -o $@ $(DEHUFF_TEST_OBJS) -tests: unstuff_test input_test dehuff_test +BYTESOURCE_TEST_OBJS=bytesource.o choice.o unstuff.o bytesource_test.o +bytesource_test: $(BYTESOURCE_TEST_OBJS) + $(CC) $(LDFLAGS) -o $@ $(BYTESOURCE_TEST_OBJS) + +tests: unstuff_test input_test dehuff_test bytesource_test clean: $(RM) $(UNSTUFF_TEST_OBJS) unstuff_test $(RM) $(INPUT_TEST_OBJS) input_test $(RM) $(DEHUFF_TEST_OBJS) dehuff_test + $(RM) $(DEHUFF_TEST_OBJS) bytesource_test test: tests ./unstuff_test ./input_test ./dehuff_test + ./bytesource_test diff --git a/bytesource.c b/bytesource.c new file mode 100644 index 0000000..25cea22 --- /dev/null +++ b/bytesource.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include + +#include "choice.h" +#include "bytesource.h" + +#define MARKER_CHAR 0xff +#define STUFF_MARKER 0x00 + +void init_byte_source(struct byte_source* source, raw_input_func_t* input_func, void* userdata) +{ + // TODO: should this size be a different constant? + memset(source, 0, sizeof(*source)); + source->bytes = (uint8_t*)malloc(BYTESOURCE_CHUNK_SIZE); + source->input_func = input_func; + source->userdata = userdata; +} + +uint8_t byte_source_read_marker(struct byte_source* source) +{ + assert(source->bytes_available >= 2); + assert(source->bytes[0] == MARKER_CHAR); + assert(source->bytes[1] != STUFF_MARKER); + + uint8_t ret = source->bytes[1]; + + memmove(source->bytes, source->bytes + 2, source->bytes_available - 2); + source->bytes_available -= 2; + + return ret; +} + +ssize_t byte_source_input_func(void* source, uint8_t* buf, size_t len) +{ + struct byte_source* src = (struct byte_source*)source; + + // If there's no data in the buffer (or only a partial marker), we have + // to read in more from our upstream src. + while (src->bytes_available == 0 || + (src->bytes_available == 1 && src->bytes[0] == MARKER_CHAR)) { + const unsigned space_left = BYTESOURCE_CHUNK_SIZE - src->bytes_available; + const size_t bytes_to_read = (len > space_left ? space_left : len); + assert(bytes_to_read <= BYTESOURCE_CHUNK_SIZE); + const ssize_t bytes_read = + (*src->input_func)(src->userdata, + src->bytes + src->bytes_available, + bytes_to_read); + assert(bytes_read >= -1); + assert(bytes_read <= bytes_to_read); + + if (bytes_read == -1) { + return -1; + } else if (bytes_read == 0) { + if (src->bytes_available == 1) { + // EOF in the middle of a marker => read error + return -1; + } else { + assert(src->bytes_available == 0); + return 0; + } + } + + src->bytes_available += bytes_read; + } + + // Now unstuff as much as we can. First of all, if there's a 0xFF at the + // end of the buffer, we don't include it this time; the unstuff function + // will only give us an error since it can't decide if it's a marker or + // a stuffed 0xFF. + unsigned bytes_to_unstuff = src->bytes_available; + bool end_marker = false; + assert(bytes_to_unstuff > 0); + if (src->bytes[bytes_to_unstuff - 1] == 0xff) { + --bytes_to_unstuff; + end_marker = true; + } + + int unstuffed_bytes = (*unstuff_choice)(buf, src->bytes, bytes_to_unstuff); + assert(unstuffed_bytes != 0); + if (unstuffed_bytes > 0) { + // Fast path: No markers in the data. We can basically just + // return it. + if (end_marker) { + src->bytes_available = 1; + src->bytes[0] = 0xff; + } else { + src->bytes_available = 0; + src->bytes[0] = 0; + } + return unstuffed_bytes; + } + + // Slow path: There was a marker in the data. Unstuff manually until + // we hit the marker, then return that. + assert(unstuffed_bytes == -1); + unsigned bytes_read; + unsigned bytes_written = 0; + for (bytes_read = 0; bytes_read < src->bytes_available; ++bytes_read) { + buf[bytes_written++] = src->bytes[bytes_read]; + if (src->bytes[bytes_read] != MARKER_CHAR) { + continue; + } + + assert(bytes_read < src->bytes_available); + if (src->bytes[bytes_read + 1] == STUFF_MARKER) { + // Skip the stuff byte. + ++bytes_read; + continue; + } else { + // OK, this is our marker. + break; + } + } + + memmove(src->bytes, src->bytes + bytes_read, src->bytes_available - bytes_read); + src->bytes_available -= bytes_read; + assert(bytes_written >= 1); + return bytes_written - 1; +} diff --git a/bytesource.h b/bytesource.h new file mode 100644 index 0000000..fe17259 --- /dev/null +++ b/bytesource.h @@ -0,0 +1,27 @@ +#ifndef _BYTESOURCE_H +#define _BYTESOURCE_H 1 + +#include +#include + +static const unsigned BYTESOURCE_CHUNK_SIZE = 4096; + +// Same as input_func_t, although it expects raw bytes, ie. with markers +// and all. +typedef ssize_t (raw_input_func_t)(void*, uint8_t*, size_t); + +// A data source taking in a byte stream and returning unstuffed bytes until +// there's a marker. When there's a marker, it artificially returns EOF until +// byte_source_read_marker() is called. +struct byte_source { + uint8_t* bytes; + unsigned bytes_available; + + raw_input_func_t* input_func; + void* userdata; +}; +void init_byte_source(struct byte_source* source, raw_input_func_t* input_func, void* userdata); +uint8_t byte_source_read_marker(struct byte_source* source); +ssize_t byte_source_input_func(void* source, uint8_t* buf, size_t len); + +#endif /* !defined(_BYTESOURCE_H) */ diff --git a/bytesource_test.c b/bytesource_test.c new file mode 100644 index 0000000..79c242a --- /dev/null +++ b/bytesource_test.c @@ -0,0 +1,138 @@ +#include +#include +#include + +#include "bytesource.h" +#include "choice.h" + +struct custom_read_userdata { + uint8_t* bytes; + unsigned bytes_left; +}; + +ssize_t custom_read(void* userdata, uint8_t* buf, size_t count) +{ + struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata; + size_t num_to_read = (ud->bytes_left > count ? count : ud->bytes_left); + memcpy(buf, ud->bytes, num_to_read); + ud->bytes += num_to_read; + ud->bytes_left -= num_to_read; + return num_to_read; +} + +ssize_t custom_read_slow(void* userdata, uint8_t* buf, size_t count) +{ + struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata; + size_t num_to_read = ((count > 0 && ud->bytes_left > 0) ? 1 : 0); + memcpy(buf, ud->bytes, num_to_read); + ud->bytes += num_to_read; + ud->bytes_left -= num_to_read; + return num_to_read; +} + +// Two streams, separated by a marker. +uint8_t bytes[] = { + 0x01, 0x02, 0x03, 0xff, 0x00, 0x04, // some bytes + 0xff, 0xf7, // a marker + 0x05, 0x06, 0x07, 0x08, 0xff, 0x00 // more bytes +}; + +// Expected data. +uint8_t first_stream[] = { + 0x01, 0x02, 0x03, 0xff, 0x04 +}; +uint8_t second_stream[] = { + 0x05, 0x06, 0x07, 0x08, 0xff +}; + +// Reading with a regular source. +void test_basic_reading() +{ + struct custom_read_userdata ud; + ud.bytes = bytes; + ud.bytes_left = sizeof(bytes); + + struct byte_source source; + init_byte_source(&source, custom_read, &ud); + + // Read the first stream. + uint8_t buf[4096]; + ssize_t ret; + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == sizeof(first_stream)); + assert(memcmp(buf, first_stream, sizeof(first_stream)) == 0); + + // Now we should get EOF. + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == 0); + + // Read the marker. + uint8_t marker = byte_source_read_marker(&source); + assert(marker == 0xf7); + + // Read the second stream. + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == sizeof(second_stream)); + assert(memcmp(buf, second_stream, sizeof(second_stream)) == 0); + + // ...and EOF again. + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == 0); +} + +// Reading with a slow source. +void test_slow_source() +{ + struct custom_read_userdata ud; + ud.bytes = bytes; + ud.bytes_left = sizeof(bytes); + + struct byte_source source; + init_byte_source(&source, custom_read_slow, &ud); + + uint8_t buf[4096]; + ssize_t ret; + + // Read the first stream. Since the source is slow, we'll get one by + // one byte, even though we asked for 4096. + for (unsigned i = 0; i < sizeof(first_stream); ++i) { + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == 1); + assert(buf[0] == first_stream[i]); + } + + // Now we should get EOF. + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == 0); + + // Read the marker. + uint8_t marker = byte_source_read_marker(&source); + assert(marker == 0xf7); + + // Read the second stream. + for (unsigned i = 0; i < sizeof(second_stream); ++i) { + uint8_t buf[4096]; + ssize_t ret; + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == 1); + assert(buf[0] == second_stream[i]); + } + + // ...and EOF again. + ret = byte_source_input_func(&source, buf, 4096); + assert(ret == 0); +} + +int main(void) +{ + init_choices(); + + printf("test_basic_reading()\n"); + test_basic_reading(); + + printf("test_slow_source()\n"); + test_slow_source(); + + printf("All tests pass.\n"); + return 0; +} diff --git a/dehuff.c b/dehuff.c index 0a9fbbc..dd5c57d 100644 --- a/dehuff.c +++ b/dehuff.c @@ -2,6 +2,7 @@ #include #include +#include "bytesource.h" #include "dehuff.h" void reliable_read(raw_input_func_t* input_func, void* userdata, uint8_t* buf, size_t len) diff --git a/dehuff.h b/dehuff.h index d93e793..6865dae 100644 --- a/dehuff.h +++ b/dehuff.h @@ -5,6 +5,7 @@ #include #include +#include "bytesource.h" #include "input.h" // About 99% of all Huffman codes are <= 8 bits long (see codelen.txt), @@ -13,11 +14,6 @@ #define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS) static const int DEHUF_SLOW_PATH = -1; -// A function to read bytes from some input source. The bytes should be -// already unstuffed (and thus without markers). -// A return value of -1 indicates error, a return value of 0 indicates EOF. -typedef ssize_t (raw_input_func_t)(void*, uint8_t*, size_t); - struct huffman_table { unsigned num_codes[17]; // BITS unsigned char codes[256]; // HUFFVAL diff --git a/input.c b/input.c index a36d83f..7db6e1e 100644 --- a/input.c +++ b/input.c @@ -1,9 +1,13 @@ #include #include +#include #include #include "input.h" +#define MARKER_CHAR 0xff +#define STUFF_MARKER 0x00 + void init_bit_source(struct bit_source* source, input_func_t* input_func, void* userdata) { memset(source, 0, sizeof(*source)); diff --git a/input_test.c b/input_test.c index 108b3b6..3ff7ba0 100644 --- a/input_test.c +++ b/input_test.c @@ -24,7 +24,7 @@ ssize_t custom_read(void* userdata, uint8_t* buf, size_t count) ssize_t custom_read_slow(void* userdata, uint8_t* buf, size_t count) { struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata; - size_t num_to_read = (count > 0 ? 1 : 0); + size_t num_to_read = ((count > 0 && ud->bytes_left > 0) ? 1 : 0); memcpy(buf, ud->bytes, num_to_read); ud->bytes += num_to_read; ud->bytes_left -= num_to_read;