From 1cfb04470add4e31406785980e7ce14a8e9d0672 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 1 Jun 2009 17:27:18 +0200 Subject: [PATCH] Make a shiny new lookup table for looking up complete AC coefficients. No tests yet. --- dehuff.c | 44 ++++++++++++++++++++++++++++++++++++ dehuff.h | 18 +++++++++++++++ driver.c | 69 +++++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 110 insertions(+), 21 deletions(-) diff --git a/dehuff.c b/dehuff.c index c4a65ee..695da0b 100644 --- a/dehuff.c +++ b/dehuff.c @@ -119,6 +119,50 @@ void read_huffman_tables(huffman_tables_t* dst, input_func_t* input_func, void* } } } + + // Generate the AC lookup tables. + for (unsigned i = 0; i < DEHUF_AC_TABLE_SIZE; ++i) { + tbl->ac_table_codes[i] = AC_DEHUF_SLOW_PATH; + tbl->ac_table_length[i] = AC_DEHUF_SLOW_PATH; + tbl->ac_table_skip[i] = AC_DEHUF_SLOW_PATH; + + int lookup = i >> (DEHUF_AC_TABLE_BITS - DEHUF_TABLE_BITS); + int rs = tbl->lookup_table_codes[lookup]; + unsigned length = tbl->lookup_table_length[lookup]; + if (rs == DEHUF_SLOW_PATH) { + // Not enough bits to decode even the length. + continue; + } + if (rs == 0x00) { + // End of block. + tbl->ac_table_codes[i] = AC_END_OF_BLOCK; + tbl->ac_table_length[i] = length; + tbl->ac_table_skip[i] = 0; + continue; + } + if (rs == 0xf0) { + // 16 zero coefficients. + tbl->ac_table_codes[i] = AC_SIXTEEN_ZEROS; + tbl->ac_table_length[i] = length; + tbl->ac_table_skip[i] = 15; + continue; + } + + unsigned r = rs >> 4; + unsigned s = rs & 0xf; + if (s > DEHUF_AC_TABLE_BITS - length) { + // Not enough bits to decode this coefficient. + continue; + } + + unsigned bits = (i >> (DEHUF_AC_TABLE_BITS - length - s)) & ((1 << s) - 1); + + tbl->ac_table_codes[i] = extend(bits, s); + tbl->ac_table_length[i] = length + s; + tbl->ac_table_skip[i] = r; + + assert(tbl->ac_table_length[i] <= DEHUF_AC_TABLE_BITS); + } } free(buf); diff --git a/dehuff.h b/dehuff.h index 2669034..ec9c976 100644 --- a/dehuff.h +++ b/dehuff.h @@ -14,6 +14,15 @@ #define DEHUF_TABLE_SIZE (1 << DEHUF_TABLE_BITS) static const int DEHUF_SLOW_PATH = -1; +// About 98% of all AC coefficients (control byte + coefficient) are <= 10 bits +// long; again, see codelen.txt. This will cost us about 12 kB of data to store +// in L1 cache. +#define DEHUF_AC_TABLE_BITS 10 +#define DEHUF_AC_TABLE_SIZE (1 << DEHUF_AC_TABLE_BITS) +static const int AC_DEHUF_SLOW_PATH = 0xf0000000; +static const int AC_END_OF_BLOCK = 0xf0000001; +static const int AC_SIXTEEN_ZEROS = 0xf0000002; + struct huffman_table { unsigned num_codes[17]; // BITS unsigned char codes[256]; // HUFFVAL @@ -33,6 +42,15 @@ struct huffman_table { // the lookup tables is int to avoid extra zero extending. int lookup_table_codes[DEHUF_TABLE_SIZE]; int lookup_table_length[DEHUF_TABLE_SIZE]; + + // Further lookup tables for decoding AC coefficients. + // (Generated but obviously not used for DC coefficients.) + // Maps from 10-bit lookahead values to the signed coeffient (_codes), + // number of bits to skip (_length) and the number of zero coefficients + // after this one (_skip). + int ac_table_codes[DEHUF_AC_TABLE_SIZE]; + int ac_table_length[DEHUF_AC_TABLE_SIZE]; + int ac_table_skip[DEHUF_AC_TABLE_SIZE]; }; enum coefficient_class { diff --git a/driver.c b/driver.c index 5b45453..cda257e 100644 --- a/driver.c +++ b/driver.c @@ -114,6 +114,52 @@ void read_sof(struct byte_source* source, struct jpeg_image* image) } } +void decode_ac_coefficients(const struct huffman_table* tbl, struct bit_source* bits, int16_t* coeff) +{ + for (unsigned i = 1; i < DCTSIZE2; ++i) { + possibly_refill(bits, DEHUF_AC_TABLE_BITS); + unsigned lookup = peek_bits(bits, DEHUF_AC_TABLE_BITS); + int code = tbl->ac_table_codes[lookup]; + int length = tbl->ac_table_length[lookup]; + int r = tbl->ac_table_skip[lookup]; + + assert(length == AC_DEHUF_SLOW_PATH || (length > 0 && length <= DEHUF_AC_TABLE_BITS)); + + if (__builtin_expect(code == AC_DEHUF_SLOW_PATH, 0)) { + unsigned rs = read_huffman_symbol_no_refill(tbl, bits); + unsigned r = rs >> 4; + unsigned s = rs & 0xf; + i += r; + possibly_refill(bits, s); + + if (rs == 0x00) { + assert(code == AC_DEHUF_SLOW_PATH || code == AC_END_OF_BLOCK); + /* end of block */ + break; + } + if (rs == 0xf0) { + assert(code == AC_DEHUF_SLOW_PATH || code == AC_SIXTEEN_ZEROS); + /* 16 zero coefficients */ + continue; + } + + coeff[unzigzag[i]] = extend(read_bits(bits, s), s); + } else { + assert(r >= 0); + i += r; + assert(bits->bits_available >= length); + read_bits(bits, length); + if (code == AC_END_OF_BLOCK) { + break; + } + if (code == AC_SIXTEEN_ZEROS) { + continue; + } + coeff[unzigzag[i]] = code; + } + } +} + void read_scan(struct byte_source* source, struct jpeg_image* image, huffman_tables_t* tables) { unsigned len = read_uint16(byte_source_input_func, source); @@ -169,32 +215,13 @@ void read_scan(struct byte_source* source, struct jpeg_image* image, huffman_tab // decode DC component unsigned dc_category = read_huffman_symbol(dc_table, &bits); - possibly_refill(&bits, dc_category + DEHUF_TABLE_BITS); + possibly_refill(&bits, dc_category); last_dc[c] += extend(read_bits(&bits, dc_category), dc_category); int16_t coeff[DCTSIZE2] = { 0 }; coeff[0] = last_dc[c]; + decode_ac_coefficients(ac_table, &bits, coeff); - // decode AC components - for (unsigned i = 1; i < DCTSIZE2; ++i) { - unsigned rs = read_huffman_symbol_no_refill(ac_table, &bits); - unsigned r = rs >> 4; - unsigned s = rs & 0xf; - i += r; - possibly_refill(&bits, s + DEHUF_TABLE_BITS); - - if (rs == 0x00) { - /* end of block */ - break; - } - if (rs == 0xf0) { - /* 16 zero coefficients */ - continue; - } - - coeff[unzigzag[i]] = extend(read_bits(&bits, s), s); - } - uint8_t pixdata[DCTSIZE2]; idct_choice(coeff, image->idct_data[image->qtable[cn]], pixdata); -- 2.39.2