Add a new input source converting JPEG-format bytes into unstuffed bytes.
authorSteinar H. Gunderson <sesse@debian.org>
Sat, 3 Jan 2009 01:15:21 +0000 (02:15 +0100)
committerSteinar H. Gunderson <sesse@debian.org>
Sat, 3 Jan 2009 01:15:21 +0000 (02:15 +0100)
Also fixed a bug in the "slow source".

Makefile
bytesource.c [new file with mode: 0644]
bytesource.h [new file with mode: 0644]
bytesource_test.c [new file with mode: 0644]
dehuff.c
dehuff.h
input.c
input_test.c

index 37dfc95..e1c226a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 CC=gcc
-CFLAGS=-std=gnu99 -O2 -msse4.1 -g
+CFLAGS=-std=gnu99 -msse4.1 -g
 LDFLAGS=
 
 all: tests
@@ -16,13 +16,19 @@ DEHUFF_TEST_OBJS=dehuff.o input.o dehuff_test.o
 dehuff_test: $(DEHUFF_TEST_OBJS)
        $(CC) $(LDFLAGS) -o $@ $(DEHUFF_TEST_OBJS)
 
-tests: unstuff_test input_test dehuff_test
+BYTESOURCE_TEST_OBJS=bytesource.o choice.o unstuff.o bytesource_test.o
+bytesource_test: $(BYTESOURCE_TEST_OBJS)
+       $(CC) $(LDFLAGS) -o $@ $(BYTESOURCE_TEST_OBJS)
+
+tests: unstuff_test input_test dehuff_test bytesource_test
 clean:
        $(RM) $(UNSTUFF_TEST_OBJS) unstuff_test
        $(RM) $(INPUT_TEST_OBJS) input_test
        $(RM) $(DEHUFF_TEST_OBJS) dehuff_test
+       $(RM) $(DEHUFF_TEST_OBJS) bytesource_test
 
 test: tests
        ./unstuff_test
        ./input_test
        ./dehuff_test
+       ./bytesource_test
diff --git a/bytesource.c b/bytesource.c
new file mode 100644 (file)
index 0000000..25cea22
--- /dev/null
@@ -0,0 +1,122 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include "choice.h"
+#include "bytesource.h"
+
+#define MARKER_CHAR 0xff
+#define STUFF_MARKER 0x00
+
+void init_byte_source(struct byte_source* source, raw_input_func_t* input_func, void* userdata)
+{
+       // TODO: should this size be a different constant?
+       memset(source, 0, sizeof(*source));
+       source->bytes = (uint8_t*)malloc(BYTESOURCE_CHUNK_SIZE);
+       source->input_func = input_func;
+       source->userdata = userdata;
+}
+
+uint8_t byte_source_read_marker(struct byte_source* source)
+{
+       assert(source->bytes_available >= 2);
+       assert(source->bytes[0] == MARKER_CHAR);
+       assert(source->bytes[1] != STUFF_MARKER);
+
+       uint8_t ret = source->bytes[1];
+
+       memmove(source->bytes, source->bytes + 2, source->bytes_available - 2);
+       source->bytes_available -= 2;
+
+       return ret;
+}
+
+ssize_t byte_source_input_func(void* source, uint8_t* buf, size_t len)
+{
+       struct byte_source* src = (struct byte_source*)source;
+
+       // If there's no data in the buffer (or only a partial marker), we have
+       // to read in more from our upstream src.
+       while (src->bytes_available == 0 ||
+              (src->bytes_available == 1 && src->bytes[0] == MARKER_CHAR)) {
+               const unsigned space_left = BYTESOURCE_CHUNK_SIZE - src->bytes_available;
+               const size_t bytes_to_read = (len > space_left ? space_left : len);
+               assert(bytes_to_read <= BYTESOURCE_CHUNK_SIZE);
+               const ssize_t bytes_read =
+                       (*src->input_func)(src->userdata,
+                                             src->bytes + src->bytes_available,
+                                             bytes_to_read);
+               assert(bytes_read >= -1);
+               assert(bytes_read <= bytes_to_read);
+               
+               if (bytes_read == -1) {
+                       return -1;
+               } else if (bytes_read == 0) {
+                       if (src->bytes_available == 1) {
+                               // EOF in the middle of a marker => read error
+                               return -1;
+                       } else {
+                               assert(src->bytes_available == 0);
+                               return 0;
+                       }
+               }
+
+               src->bytes_available += bytes_read;
+       }
+       
+       // Now unstuff as much as we can. First of all, if there's a 0xFF at the
+       // end of the buffer, we don't include it this time; the unstuff function
+       // will only give us an error since it can't decide if it's a marker or
+       // a stuffed 0xFF.
+       unsigned bytes_to_unstuff = src->bytes_available;
+       bool end_marker = false;
+       assert(bytes_to_unstuff > 0);
+       if (src->bytes[bytes_to_unstuff - 1] == 0xff) {
+               --bytes_to_unstuff;
+               end_marker = true;
+       }
+
+       int unstuffed_bytes = (*unstuff_choice)(buf, src->bytes, bytes_to_unstuff);
+       assert(unstuffed_bytes != 0);
+       if (unstuffed_bytes > 0) {
+               // Fast path: No markers in the data. We can basically just
+               // return it.
+               if (end_marker) {
+                       src->bytes_available = 1;
+                       src->bytes[0] = 0xff;
+               } else {
+                       src->bytes_available = 0;
+                       src->bytes[0] = 0;
+               }
+               return unstuffed_bytes;
+       }
+
+       // Slow path: There was a marker in the data. Unstuff manually until
+       // we hit the marker, then return that.
+       assert(unstuffed_bytes == -1);
+       unsigned bytes_read;
+       unsigned bytes_written = 0;
+       for (bytes_read = 0; bytes_read < src->bytes_available; ++bytes_read) {
+               buf[bytes_written++] = src->bytes[bytes_read];
+               if (src->bytes[bytes_read] != MARKER_CHAR) {
+                       continue;
+               }
+
+               assert(bytes_read < src->bytes_available);
+               if (src->bytes[bytes_read + 1] == STUFF_MARKER) {
+                       // Skip the stuff byte.
+                       ++bytes_read;
+                       continue;
+               } else {
+                       // OK, this is our marker.
+                       break;
+               }       
+       }
+
+       memmove(src->bytes, src->bytes + bytes_read, src->bytes_available - bytes_read);
+       src->bytes_available -= bytes_read;
+       assert(bytes_written >= 1);
+       return bytes_written - 1;
+}
diff --git a/bytesource.h b/bytesource.h
new file mode 100644 (file)
index 0000000..fe17259
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _BYTESOURCE_H
+#define _BYTESOURCE_H 1
+
+#include <stdint.h>
+#include <sys/types.h>
+
+static const unsigned BYTESOURCE_CHUNK_SIZE = 4096;
+
+// Same as input_func_t, although it expects raw bytes, ie. with markers
+// and all.
+typedef ssize_t (raw_input_func_t)(void*, uint8_t*, size_t);
+
+// A data source taking in a byte stream and returning unstuffed bytes until
+// there's a marker. When there's a marker, it artificially returns EOF until
+// byte_source_read_marker() is called.
+struct byte_source {
+       uint8_t* bytes;
+       unsigned bytes_available;
+
+       raw_input_func_t* input_func;
+       void* userdata;
+};
+void init_byte_source(struct byte_source* source, raw_input_func_t* input_func, void* userdata);
+uint8_t byte_source_read_marker(struct byte_source* source);
+ssize_t byte_source_input_func(void* source, uint8_t* buf, size_t len);
+
+#endif /* !defined(_BYTESOURCE_H) */
diff --git a/bytesource_test.c b/bytesource_test.c
new file mode 100644 (file)
index 0000000..79c242a
--- /dev/null
@@ -0,0 +1,138 @@
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "bytesource.h"
+#include "choice.h"
+
+struct custom_read_userdata {
+       uint8_t* bytes;
+       unsigned bytes_left;
+};
+
+ssize_t custom_read(void* userdata, uint8_t* buf, size_t count)
+{
+       struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata;
+       size_t num_to_read = (ud->bytes_left > count ? count : ud->bytes_left);
+       memcpy(buf, ud->bytes, num_to_read);
+       ud->bytes += num_to_read;
+       ud->bytes_left -= num_to_read;
+       return num_to_read;     
+}
+
+ssize_t custom_read_slow(void* userdata, uint8_t* buf, size_t count)
+{
+       struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata;
+       size_t num_to_read = ((count > 0 && ud->bytes_left > 0) ? 1 : 0);
+       memcpy(buf, ud->bytes, num_to_read);
+       ud->bytes += num_to_read;
+       ud->bytes_left -= num_to_read;
+       return num_to_read;
+}
+
+// Two streams, separated by a marker.
+uint8_t bytes[] = {
+       0x01, 0x02, 0x03, 0xff, 0x00, 0x04,     // some bytes
+       0xff, 0xf7,                             // a marker
+       0x05, 0x06, 0x07, 0x08, 0xff, 0x00      // more bytes
+};
+
+// Expected data.
+uint8_t first_stream[] = {
+       0x01, 0x02, 0x03, 0xff, 0x04
+};
+uint8_t second_stream[] = {
+       0x05, 0x06, 0x07, 0x08, 0xff
+};
+
+// Reading with a regular source.
+void test_basic_reading()
+{
+       struct custom_read_userdata ud;
+       ud.bytes = bytes;
+       ud.bytes_left = sizeof(bytes);
+
+       struct byte_source source;
+       init_byte_source(&source, custom_read, &ud);
+
+       // Read the first stream.
+       uint8_t buf[4096];
+       ssize_t ret;
+       ret = byte_source_input_func(&source, buf, 4096);
+       assert(ret == sizeof(first_stream));
+       assert(memcmp(buf, first_stream, sizeof(first_stream)) == 0);
+
+       // Now we should get EOF.
+       ret = byte_source_input_func(&source, buf, 4096);
+       assert(ret == 0);
+
+       // Read the marker.
+       uint8_t marker = byte_source_read_marker(&source);
+       assert(marker == 0xf7);
+
+       // Read the second stream.
+       ret = byte_source_input_func(&source, buf, 4096);
+       assert(ret == sizeof(second_stream));
+       assert(memcmp(buf, second_stream, sizeof(second_stream)) == 0);
+
+       // ...and EOF again.
+       ret = byte_source_input_func(&source, buf, 4096);
+       assert(ret == 0);
+}
+
+// Reading with a slow source.
+void test_slow_source()
+{
+       struct custom_read_userdata ud;
+       ud.bytes = bytes;
+       ud.bytes_left = sizeof(bytes);
+
+       struct byte_source source;
+       init_byte_source(&source, custom_read_slow, &ud);
+               
+       uint8_t buf[4096];
+       ssize_t ret;
+       
+       // Read the first stream. Since the source is slow, we'll get one by
+       // one byte, even though we asked for 4096.
+       for (unsigned i = 0; i < sizeof(first_stream); ++i) {
+               ret = byte_source_input_func(&source, buf, 4096);
+               assert(ret == 1);
+               assert(buf[0] == first_stream[i]);
+       }
+
+       // Now we should get EOF.
+       ret = byte_source_input_func(&source, buf, 4096);
+       assert(ret == 0);
+
+       // Read the marker.
+       uint8_t marker = byte_source_read_marker(&source);
+       assert(marker == 0xf7);
+
+       // Read the second stream.
+       for (unsigned i = 0; i < sizeof(second_stream); ++i) {
+               uint8_t buf[4096];
+               ssize_t ret;
+               ret = byte_source_input_func(&source, buf, 4096);
+               assert(ret == 1);
+               assert(buf[0] == second_stream[i]);
+       }
+
+       // ...and EOF again.
+       ret = byte_source_input_func(&source, buf, 4096);
+       assert(ret == 0);
+}
+
+int main(void)
+{
+       init_choices();
+
+       printf("test_basic_reading()\n");
+       test_basic_reading();
+
+       printf("test_slow_source()\n");
+       test_slow_source();
+       
+       printf("All tests pass.\n");
+       return 0;
+}
index 0a9fbbc..dd5c57d 100644 (file)
--- a/dehuff.c
+++ b/dehuff.c
@@ -2,6 +2,7 @@
 #include <stdlib.h>
 #include <assert.h>
 
+#include "bytesource.h"
 #include "dehuff.h"
 
 void reliable_read(raw_input_func_t* input_func, void* userdata, uint8_t* buf, size_t len)
index d93e793..6865dae 100644 (file)
--- a/dehuff.h
+++ b/dehuff.h
@@ -5,6 +5,7 @@
 #include <stdint.h>
 #include <sys/types.h>
 
+#include "bytesource.h"
 #include "input.h"
 
 // About 99% of all Huffman codes are <= 8 bits long (see codelen.txt),
 #define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS)
 static const int DEHUF_SLOW_PATH = -1;
 
-// A function to read bytes from some input source. The bytes should be
-// already unstuffed (and thus without markers).
-// A return value of -1 indicates error, a return value of 0 indicates EOF.
-typedef ssize_t (raw_input_func_t)(void*, uint8_t*, size_t);
-
 struct huffman_table {
        unsigned num_codes[17];     // BITS
        unsigned char codes[256];   // HUFFVAL
diff --git a/input.c b/input.c
index a36d83f..7db6e1e 100644 (file)
--- a/input.c
+++ b/input.c
@@ -1,9 +1,13 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdbool.h>
 #include <string.h>
 
 #include "input.h"
 
+#define MARKER_CHAR 0xff
+#define STUFF_MARKER 0x00
+
 void init_bit_source(struct bit_source* source, input_func_t* input_func, void* userdata)
 {
        memset(source, 0, sizeof(*source));
index 108b3b6..3ff7ba0 100644 (file)
@@ -24,7 +24,7 @@ ssize_t custom_read(void* userdata, uint8_t* buf, size_t count)
 ssize_t custom_read_slow(void* userdata, uint8_t* buf, size_t count)
 {
        struct custom_read_userdata* ud = (struct custom_read_userdata*)userdata;
-       size_t num_to_read = (count > 0 ? 1 : 0);
+       size_t num_to_read = ((count > 0 && ud->bytes_left > 0) ? 1 : 0);
        memcpy(buf, ud->bytes, num_to_read);
        ud->bytes += num_to_read;
        ud->bytes_left -= num_to_read;