]> git.sesse.net Git - fjl/blobdiff - driver.c
Add an x86 optimized version of extend().
[fjl] / driver.c
index dd1b1ae8d8796840a51b7dbd7c4d8119b25fbad9..616b613f944ee5ae3ba12a69476eb37d9b20a336 100644 (file)
--- a/driver.c
+++ b/driver.c
@@ -1,9 +1,11 @@
 #include <stdio.h>
+#include <string.h>
 #include <stdlib.h>
 
 #include "bytesource.h"
 #include "choice.h"
 #include "dehuff.h"
+#include "idct.h"
 #include "input.h"
 #include "zigzag.h"
 
@@ -12,6 +14,13 @@ struct jpeg_image {
        unsigned width, height;
        unsigned num_components;
        unsigned hsample[256], vsample[256], qtable[256];
+       unsigned max_hsample, max_vsample;
+       unsigned stride[256];
+       unsigned num_blocks_horizontal, num_blocks_vertical;
+       uint32_t qvalues[256][DCTSIZE2];
+       void* idct_data[256];
+       uint8_t* pixel_data[256];
+       uint8_t* pixel_write_pointer[256];
 };
 
 ssize_t stdio_read(void* userdata, uint8_t* buf, size_t count) 
@@ -19,14 +28,47 @@ ssize_t stdio_read(void* userdata, uint8_t* buf, size_t count)
        return fread(buf, 1, count, (FILE*)userdata);
 }
 
+void read_dqt(struct byte_source* source, struct jpeg_image* image)
+{
+       unsigned len = read_uint16(byte_source_input_func, source);
+       assert(len >= 67);
+       uint8_t precision_table = read_uint8(byte_source_input_func, source);
+       int precision = precision_table >> 4;  // 0 = 8 bits, otherwise 16 bits.
+       int table = precision_table & 0x0f;
+
+       if (image->idct_data[table] != NULL) {
+               idct_choice_free(image->idct_data[table]);
+       }
+
+       if (precision != 0) {
+               assert(len == 131);
+               fprintf(stderr, "Quantization table %u: 16 bits/entry\n", table);
+       } else {
+               assert(len == 67);
+               fprintf(stderr, "Quantization table %u: 8 bits/entry\n", table);
+       }
+       
+       for (unsigned i = 0; i < 64; ++i) {
+               if (precision != 0) {
+                       image->qvalues[table][unzigzag[i]] =
+                               read_uint16(byte_source_input_func, source);
+               } else {
+                       image->qvalues[table][unzigzag[i]] =
+                               read_uint8(byte_source_input_func, source);
+               }       
+       }
+
+       image->idct_data[table] = idct_choice_alloc(image->qvalues[table]);
+}
+
 void read_sof(struct byte_source* source, struct jpeg_image* image)
 {
        unsigned len = read_uint16(byte_source_input_func, source);
        assert(len >= 8);
        image->precision = read_uint8(byte_source_input_func, source);
        assert(image->precision == 8);
-       image->width = read_uint16(byte_source_input_func, source);
        image->height = read_uint16(byte_source_input_func, source);
+       image->width = read_uint16(byte_source_input_func, source);
        image->num_components = read_uint8(byte_source_input_func, source);
        len -= 8;
 
@@ -42,9 +84,34 @@ void read_sof(struct byte_source* source, struct jpeg_image* image)
                image->qtable[c] = read_uint8(byte_source_input_func, source);
                len -= 3;
 
+               if (image->hsample[c] > image->max_hsample) {
+                       image->max_hsample = image->hsample[c];
+               }
+               if (image->vsample[c] > image->max_vsample) {
+                       image->max_vsample = image->vsample[c];
+               }
+
                fprintf(stderr, "Component %u: sampling factors %u x %x, quantization table %u\n",
                        c, image->hsample[c], image->vsample[c], image->qtable[c]);
        }
+       
+       image->num_blocks_horizontal = (image->width + image->max_hsample * DCTSIZE - 1) / (image->max_hsample * DCTSIZE);
+       image->num_blocks_vertical = (image->height + image->max_vsample * DCTSIZE - 1) / (image->max_vsample * DCTSIZE);
+
+       for (unsigned c = 0; c < 256; ++c) {
+               if (image->hsample[c] == 0) {
+                       continue;
+               }
+
+               unsigned width = image->num_blocks_horizontal * image->hsample[c] * DCTSIZE;
+               unsigned height = image->num_blocks_vertical * image->vsample[c] * DCTSIZE;
+               image->stride[c] = width;
+               image->pixel_data[c] = (uint8_t*)malloc(width * height);
+               assert(image->pixel_data[c] != NULL);
+               image->pixel_write_pointer[c] = image->pixel_data[c];
+
+               fprintf(stderr, "Component %u: allocating %d x %d\n", c, width, height);
+       }
 }
 
 void read_scan(struct byte_source* source, struct jpeg_image* image, huffman_tables_t* tables)
@@ -84,52 +151,80 @@ void read_scan(struct byte_source* source, struct jpeg_image* image, huffman_tab
        }
 
        struct bit_source bits;
-       init_bit_source(&bits, byte_source_input_func, source);
+       init_bit_source(&bits, byte_source_input_func, 8, source);
+               
+       unsigned mcu_x = 0, mcu_y = 0;
 
-       for ( ;; ) {
+       while (!bits.source_eof) {
                for (unsigned c = 0; c < num_components; ++c) {
                        unsigned cn = component_num[c];
-                       unsigned nc = image->vsample[cn] * image->hsample[cn];
-                       for (unsigned n = 0; n < nc; ++n) {
-                               const struct huffman_table* dc_table = &((*tables)[DC_CLASS][dc_huffman_table[c]]);
-                               const struct huffman_table* ac_table = &((*tables)[AC_CLASS][ac_huffman_table[c]]);
-
-                               // decode DC component
-                               unsigned dc_category = read_huffman_symbol(dc_table, &bits);
-                               possibly_refill(&bits, dc_category);
-                               last_dc[c] += extend(read_bits(&bits, dc_category), dc_category);
-
-               //              printf("dc=%d ac=", last_dc[c]);
-                               putchar(last_dc[c]);
-
-                               // decode AC components
-                               int zz[63] = { 0 };
-                               for (unsigned i = 0; i < 63; ++i) {
-                                       unsigned rs = read_huffman_symbol(ac_table, &bits);
-                                       unsigned r = rs >> 4;
-                                       unsigned s = rs & 0xf;
-
-                                       if (rs == 0x00) {
-                                               /* end of block */
-                                               break;
-                                       }
-                                       if (rs == 0xf0) {
-                                               /* 16 zero coefficients */
-                                               i += 15;
-                                               continue;
-                                       }
+                       assert(image->idct_data[image->qtable[cn]] != NULL);
 
-                                       possibly_refill(&bits, s);
+                       uint8_t* pixel_write_pointer_y = image->pixel_write_pointer[cn];
+                       for (unsigned local_yb = 0; local_yb < image->vsample[cn]; ++local_yb, pixel_write_pointer_y += image->stride[cn] * DCTSIZE) {
+                               uint8_t* pixel_write_pointer = pixel_write_pointer_y;
+                               for (unsigned local_xb = 0; local_xb < image->hsample[cn]; ++local_xb, pixel_write_pointer += DCTSIZE) {
+                                       const struct huffman_table* dc_table = &((*tables)[DC_CLASS][dc_huffman_table[c]]);
+                                       const struct huffman_table* ac_table = &((*tables)[AC_CLASS][ac_huffman_table[c]]);
 
-                                       i += r;
-                                       zz[unzigzag[i]] = extend(read_bits(&bits, s), s);
-                               }
-                               
-                               for (unsigned i = 0; i < 63; ++i) {
-                                       putchar(zz[i]);
-                                       //printf("%d ", zz[i]);
+                                       // decode DC component
+                                       unsigned dc_category = read_huffman_symbol(dc_table, &bits);
+                                       possibly_refill(&bits, dc_category + DEHUF_TABLE_BITS);
+                                       last_dc[c] += extend(read_bits(&bits, dc_category), dc_category);
+                                       
+                                       int16_t coeff[DCTSIZE2] = { 0 };
+                                       coeff[0] = last_dc[c];
+
+                                       // decode AC components
+                                       for (unsigned i = 1; i < DCTSIZE2; ++i) {
+                                               unsigned rs = read_huffman_symbol_no_refill(ac_table, &bits);
+                                               unsigned r = rs >> 4;
+                                               unsigned s = rs & 0xf;
+
+                                               if (rs == 0x00) {
+                                                       /* end of block */
+                                                       break;
+                                               }
+                                               if (rs == 0xf0) {
+                                                       /* 16 zero coefficients */
+                                                       possibly_refill(&bits, DEHUF_TABLE_BITS);
+                                                       i += 15;
+                                                       continue;
+                                               }
+                                               i += r;
+
+                                               possibly_refill(&bits, s + DEHUF_TABLE_BITS);
+                                               coeff[unzigzag[i]] = extend(read_bits(&bits, s), s);
+                                       }
+                       
+                                       uint8_t pixdata[DCTSIZE2];      
+                                       idct_choice(coeff, image->idct_data[image->qtable[cn]], pixdata);
+
+                                       uint8_t* dest_pixdata = pixel_write_pointer;
+                                       for (unsigned y = 0; y < DCTSIZE; ++y, dest_pixdata += image->stride[cn]) {
+                                               memcpy(dest_pixdata, pixdata + y * DCTSIZE, DCTSIZE);
+                                       }
                                }
-                               //printf("\n");
+                       }
+                       image->pixel_write_pointer[cn] += DCTSIZE * image->hsample[cn];
+               }
+       
+               if (++mcu_x == image->num_blocks_horizontal) {
+                       ++mcu_y;
+                       mcu_x = 0;
+               
+                       for (unsigned c = 0; c < num_components; ++c) {
+                               unsigned cn = component_num[c];
+                               image->pixel_write_pointer[cn] += (image->vsample[cn] * DCTSIZE - 1) * image->stride[cn];
+                       }
+
+                       // Some debug code.
+                       const int c = 1;
+                       if (mcu_y == image->num_blocks_vertical) {
+                               unsigned stride = image->num_blocks_horizontal * image->hsample[c] * DCTSIZE;
+                               unsigned height = image->num_blocks_vertical * image->vsample[c] * DCTSIZE;
+                               printf("P5\n%u %u\n255\n", stride, height);
+                               fwrite(image->pixel_data[c], stride * height, 1, stdout);
                        }
                }
        }
@@ -156,6 +251,7 @@ void skip_segment(struct byte_source* source)
 int main(void)
 {
        struct jpeg_image jpeg;
+       memset(&jpeg, 0, sizeof(jpeg));
        init_choices();
 
        struct byte_source source;
@@ -193,9 +289,11 @@ int main(void)
                        /* comment */
                case 0xff:
                        /* ignore */
+                       skip_segment(&source);
+                       break;
                case 0xdb:
                        /* DQT */
-                       skip_segment(&source);
+                       read_dqt(&source, &jpeg);
                        break;
                case 0xc0:
                        /* SOF0 (baseline DCT, Huffman encoded) */
@@ -204,6 +302,9 @@ int main(void)
                case 0xd8:
                        /* SOI */
                        break;
+               case 0xd9:
+                       /* EOI */
+                       exit(0);
                case 0xc4:
                        /* DHT (define Huffman tables) */
                        read_huffman_tables(&tables, byte_source_input_func, &source);