X-Git-Url: https://git.sesse.net/?p=fjl;a=blobdiff_plain;f=idct.c;h=ad1cc364642582b1711fb57122a7df186214f72c;hp=b187a90c07996b4af5e8a6cd0ba5d954688b4293;hb=13b48c4e2de5ceb4922ee47db32dd1a18edfffe4;hpb=4b87e80c1ee4dd6a5d5c13e7b4321a956f53378f diff --git a/idct.c b/idct.c index b187a90..ad1cc36 100644 --- a/idct.c +++ b/idct.c @@ -1,9 +1,27 @@ #include +#include +#include #include "idct.h" -void idct_reference(const int16_t* input, const uint32_t* quant_table, uint8_t* output) +void* idct_reference_alloc(const uint32_t* quant_table) { + uint32_t* qt_copy = (uint32_t*)malloc(DCTSIZE2 * sizeof(uint32_t)); + // FIXME: check for NULL return + + memcpy(qt_copy, quant_table, DCTSIZE2 * sizeof(uint32_t)); + + return qt_copy; +} + +void idct_reference_free(void* userdata) +{ + free(userdata); +} + +void idct_reference(const int16_t* input, const void* userdata, uint8_t* output) +{ + const uint32_t* quant_table = (const uint32_t*)userdata; double temp[DCTSIZE2]; for (unsigned y = 0; y < 8; ++y) { @@ -56,6 +74,27 @@ static const double scalefac[] = { 1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502 }; +// Premultiply the scale factors and the overall 1/8 factor into the quantization +// table entries (and convert to double). +void* idct_float_alloc(const uint32_t* quant_table) +{ + double* qt_copy = (double*)malloc(DCTSIZE2 * sizeof(double)); + + for (unsigned y = 0; y < DCTSIZE; ++y) { + for (unsigned x = 0; x < DCTSIZE; ++x) { + qt_copy[y * DCTSIZE + x] = 0.125 * quant_table[y * DCTSIZE + x] * + scalefac[x] * scalefac[y]; + } + } + + return qt_copy; +} + +void idct_float_free(void* userdata) +{ + free(userdata); +} + // 1D 8-point DCT. static inline void idct1d_float(double y0, double y1, double y2, double y3, double y4, double y5, double y6, double y7, double *x) { @@ -67,14 +106,14 @@ static inline void idct1d_float(double y0, double y1, double y2, double y3, doub static const double a5 = 0.5 * (a4 - a2); // phase 1 - const double p1_0 = y0 * scalefac[0]; - const double p1_1 = y4 * scalefac[4]; - const double p1_2 = y2 * scalefac[2]; - const double p1_3 = y6 * scalefac[6]; - const double p1_4 = y5 * scalefac[5]; - const double p1_5 = y1 * scalefac[1]; - const double p1_6 = y7 * scalefac[7]; - const double p1_7 = y3 * scalefac[3]; + const double p1_0 = y0; + const double p1_1 = y4; + const double p1_2 = y2; + const double p1_3 = y6; + const double p1_4 = y5; + const double p1_5 = y1; + const double p1_6 = y7; + const double p1_7 = y3; // phase 2 const double p2_0 = p1_0; @@ -137,20 +176,21 @@ static inline void idct1d_float(double y0, double y1, double y2, double y3, doub x[7] = p6_0 - p6_7; } -void idct_float(const int16_t* input, const uint32_t* quant_table, uint8_t* output) +void idct_float(const int16_t* input, const void* userdata, uint8_t* output) { + const double* quant_table = (const double*)userdata; double temp[DCTSIZE2]; // IDCT columns. for (unsigned x = 0; x < DCTSIZE; ++x) { - idct1d_float(input[DCTSIZE * 0 + x] * (int32_t)quant_table[DCTSIZE * 0 + x], - input[DCTSIZE * 1 + x] * (int32_t)quant_table[DCTSIZE * 1 + x], - input[DCTSIZE * 2 + x] * (int32_t)quant_table[DCTSIZE * 2 + x], - input[DCTSIZE * 3 + x] * (int32_t)quant_table[DCTSIZE * 3 + x], - input[DCTSIZE * 4 + x] * (int32_t)quant_table[DCTSIZE * 4 + x], - input[DCTSIZE * 5 + x] * (int32_t)quant_table[DCTSIZE * 5 + x], - input[DCTSIZE * 6 + x] * (int32_t)quant_table[DCTSIZE * 6 + x], - input[DCTSIZE * 7 + x] * (int32_t)quant_table[DCTSIZE * 7 + x], + idct1d_float(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x], + input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x], + input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x], + input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x], + input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x], + input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x], + input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x], + input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x], temp + x * DCTSIZE); } @@ -167,7 +207,7 @@ void idct_float(const int16_t* input, const uint32_t* quant_table, uint8_t* outp temp[DCTSIZE * 7 + y], temp2); for (unsigned x = 0; x < DCTSIZE; ++x) { - double val = (1.0/8.0) * temp2[x]; + const double val = temp2[x]; if (val < 0.0) { output[y * DCTSIZE + x] = 0; } else if (val >= 255.0) {