X-Git-Url: https://git.sesse.net/?p=fjl;a=blobdiff_plain;f=idct.c;fp=idct.c;h=0000000000000000000000000000000000000000;hp=c71d599833affb62944c4f915f89c9009aed034b;hb=56779091c47e5c61376d7f942cc58b831673e1d7;hpb=def75cfab6ca4da59ddc2e924898f3a8ed11ec89 diff --git a/idct.c b/idct.c deleted file mode 100644 index c71d599..0000000 --- a/idct.c +++ /dev/null @@ -1,220 +0,0 @@ -#include -#include -#include - -#include "idct.h" - -void* idct_reference_alloc(const uint32_t* quant_table) -{ - uint32_t* qt_copy = (uint32_t*)malloc(DCTSIZE2 * sizeof(uint32_t)); - // FIXME: check for NULL return - - memcpy(qt_copy, quant_table, DCTSIZE2 * sizeof(uint32_t)); - - return qt_copy; -} - -void idct_reference_free(void* userdata) -{ - free(userdata); -} - -void idct_reference(const int16_t* input, const void* userdata, uint8_t* output) -{ - const uint32_t* quant_table = (const uint32_t*)userdata; - double temp[DCTSIZE2]; - - for (unsigned y = 0; y < 8; ++y) { - for (unsigned x = 0; x < 8; ++x) { - double acc = 0.0; - for (unsigned u = 0; u < 8; ++u) { - for (unsigned v = 0; v < 8; ++v) { - double c_u = (u == 0) ? 1/sqrt(2.0) : 1.0; - double c_v = (v == 0) ? 1/sqrt(2.0) : 1.0; - acc += c_u * c_v - * input[u * DCTSIZE + v] * quant_table[u * DCTSIZE + v] - * cos((2 * x + 1) * v * M_PI / 16.0) - * cos((2 * y + 1) * u * M_PI / 16.0); - } - } - temp[y * DCTSIZE + x] = 0.25 * acc; - } - } - - for (unsigned y = 0; y < 8; ++y) { - for (unsigned x = 0; x < 8; ++x) { - double val = temp[y * DCTSIZE + x]; - if (val < 0.0) { - output[y * DCTSIZE + x] = 0; - } else if (val >= 255.0) { - output[y * DCTSIZE + x] = 255; - } else { - output[y * DCTSIZE + x] = (uint8_t)(val + 0.5); - } - } - } -} - -// AA&N (Arai, Agui and Nakajima) floating-point IDCT. -// This IDCT is based on the same DCT that libjpeg uses -- in fact, exactly the -// same figure from the same book ("JPEG: Still Image Data Compression Standard", -// page 52, figure 4-8). However, it is coded from scratch, and uses the -// transposition method for converting DCT -> IDCT suggested in the book. -// (libjpeg seems to use some other method that yields similar, but not -// the same, code.) - -// As this is generally meant as a reference and not useful code (we expect -// a SIMD fixed-point algorithm to be used in most cases), it has not been -// attempted significantly optimized. We assume the compiler will be smart -// enough to do all the variable propagation for us anyway. - -// Scale factors; 1.0 / (sqrt(2.0) * cos(k * M_PI / 16.0)), except for the first which is 1. -static const double scalefac[] = { - 1.0, 0.7209598220069479, 0.765366864730180, 0.8504300947672564, - 1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502 -}; - -// Premultiply the scale factors and the overall 1/8 factor into the quantization -// table entries (and convert to double). -void* idct_float_alloc(const uint32_t* quant_table) -{ - double* qt_copy = (double*)malloc(DCTSIZE2 * sizeof(double)); - - for (unsigned y = 0; y < DCTSIZE; ++y) { - for (unsigned x = 0; x < DCTSIZE; ++x) { - qt_copy[y * DCTSIZE + x] = (1.0/DCTSIZE) * quant_table[y * DCTSIZE + x] * - scalefac[x] * scalefac[y]; - } - } - - return qt_copy; -} - -void idct_float_free(void* userdata) -{ - free(userdata); -} - -// 1D 8-point DCT. -static inline void idct1d_float(double y0, double y1, double y2, double y3, double y4, double y5, double y6, double y7, double *x) -{ - // constants - static const double a1 = 0.7071067811865474; // sqrt(2) - static const double a2 = 0.5411961001461971; // cos(3/8 pi) * sqrt(2) - static const double a3 = a1; - static const double a4 = 1.3065629648763766; // cos(pi/8) * sqrt(2) - static const double a5 = 0.5 * (a4 - a2); - - // phase 1 - const double p1_0 = y0; - const double p1_1 = y4; - const double p1_2 = y2; - const double p1_3 = y6; - const double p1_4 = y5; - const double p1_5 = y1; - const double p1_6 = y7; - const double p1_7 = y3; - - // phase 2 - const double p2_0 = p1_0; - const double p2_1 = p1_1; - const double p2_2 = p1_2; - const double p2_3 = p1_3; - const double p2_4 = p1_4 - p1_7; - const double p2_5 = p1_5 + p1_6; - const double p2_6 = p1_5 - p1_6; - const double p2_7 = p1_4 + p1_7; - - // phase 3 - const double p3_0 = p2_0; - const double p3_1 = p2_1; - const double p3_2 = p2_2 - p2_3; - const double p3_3 = p2_2 + p2_3; - const double p3_4 = p2_4; - const double p3_5 = p2_5 - p2_7; - const double p3_6 = p2_6; - const double p3_7 = p2_5 + p2_7; - - // phase 4 - const double p4_0 = p3_0; - const double p4_1 = p3_1; - const double p4_2 = a1 * p3_2; - const double p4_3 = p3_3; - const double p4_4 = p3_4 * -a2 + (p3_4 + p3_6) * -a5; - const double p4_5 = a3 * p3_5; - const double p4_6 = p3_6 * a4 + (p3_4 + p3_6) * -a5; - const double p4_7 = p3_7; - - // phase 5 - const double p5_0 = p4_0 + p4_1; - const double p5_1 = p4_0 - p4_1; - const double p5_2 = p4_2; - const double p5_3 = p4_2 + p4_3; - const double p5_4 = p4_4; - const double p5_5 = p4_5; - const double p5_6 = p4_6; - const double p5_7 = p4_7; - - // phase 6 - const double p6_0 = p5_0 + p5_3; - const double p6_1 = p5_1 + p5_2; - const double p6_2 = p5_1 - p5_2; - const double p6_3 = p5_0 - p5_3; - const double p6_4 = -p5_4; - const double p6_5 = p5_5 - p5_4; - const double p6_6 = p5_5 + p5_6; - const double p6_7 = p5_6 + p5_7; - - // phase 7 - x[0] = p6_0 + p6_7; - x[1] = p6_1 + p6_6; - x[2] = p6_2 + p6_5; - x[3] = p6_3 + p6_4; - x[4] = p6_3 - p6_4; - x[5] = p6_2 - p6_5; - x[6] = p6_1 - p6_6; - x[7] = p6_0 - p6_7; -} - -void idct_float(const int16_t* input, const void* userdata, uint8_t* output) -{ - const double* quant_table = (const double*)userdata; - double temp[DCTSIZE2]; - - // IDCT columns. - for (unsigned x = 0; x < DCTSIZE; ++x) { - idct1d_float(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x], - input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x], - input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x], - input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x], - input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x], - input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x], - input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x], - input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x], - temp + x * DCTSIZE); - } - - // IDCT rows. - for (unsigned y = 0; y < DCTSIZE; ++y) { - double temp2[DCTSIZE]; - idct1d_float(temp[DCTSIZE * 0 + y], - temp[DCTSIZE * 1 + y], - temp[DCTSIZE * 2 + y], - temp[DCTSIZE * 3 + y], - temp[DCTSIZE * 4 + y], - temp[DCTSIZE * 5 + y], - temp[DCTSIZE * 6 + y], - temp[DCTSIZE * 7 + y], - temp2); - for (unsigned x = 0; x < DCTSIZE; ++x) { - const double val = temp2[x]; - if (val < 0.0) { - output[y * DCTSIZE + x] = 0; - } else if (val >= 255.0) { - output[y * DCTSIZE + x] = 255; - } else { - output[y * DCTSIZE + x] = (uint8_t)(val + 0.5); - } - } - } -}