#include <math.h>
+#include <string.h>
+#include <stdlib.h>
#include "idct.h"
-void idct_reference(const int16_t* input, const uint32_t* quant_table, uint8_t* output)
+void* idct_reference_alloc(const uint32_t* quant_table)
{
+ uint32_t* qt_copy = (uint32_t*)malloc(DCTSIZE2 * sizeof(uint32_t));
+ // FIXME: check for NULL return
+
+ memcpy(qt_copy, quant_table, DCTSIZE2 * sizeof(uint32_t));
+
+ return qt_copy;
+}
+
+void idct_reference_free(void* userdata)
+{
+ free(userdata);
+}
+
+void idct_reference(const int16_t* input, const void* userdata, uint8_t* output)
+{
+ const uint32_t* quant_table = (const uint32_t*)userdata;
double temp[DCTSIZE2];
for (unsigned y = 0; y < 8; ++y) {
1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502
};
+// Premultiply the scale factors and the overall 1/8 factor into the quantization
+// table entries (and convert to double).
+void* idct_float_alloc(const uint32_t* quant_table)
+{
+ double* qt_copy = (double*)malloc(DCTSIZE2 * sizeof(double));
+
+ for (unsigned y = 0; y < DCTSIZE; ++y) {
+ for (unsigned x = 0; x < DCTSIZE; ++x) {
+ qt_copy[y * DCTSIZE + x] = 0.125 * quant_table[y * DCTSIZE + x] *
+ scalefac[x] * scalefac[y];
+ }
+ }
+
+ return qt_copy;
+}
+
+void idct_float_free(void* userdata)
+{
+ free(userdata);
+}
+
// 1D 8-point DCT.
static inline void idct1d_float(double y0, double y1, double y2, double y3, double y4, double y5, double y6, double y7, double *x)
{
static const double a5 = 0.5 * (a4 - a2);
// phase 1
- const double p1_0 = y0 * scalefac[0];
- const double p1_1 = y4 * scalefac[4];
- const double p1_2 = y2 * scalefac[2];
- const double p1_3 = y6 * scalefac[6];
- const double p1_4 = y5 * scalefac[5];
- const double p1_5 = y1 * scalefac[1];
- const double p1_6 = y7 * scalefac[7];
- const double p1_7 = y3 * scalefac[3];
+ const double p1_0 = y0;
+ const double p1_1 = y4;
+ const double p1_2 = y2;
+ const double p1_3 = y6;
+ const double p1_4 = y5;
+ const double p1_5 = y1;
+ const double p1_6 = y7;
+ const double p1_7 = y3;
// phase 2
const double p2_0 = p1_0;
x[7] = p6_0 - p6_7;
}
-void idct_float(const int16_t* input, const uint32_t* quant_table, uint8_t* output)
+void idct_float(const int16_t* input, const void* userdata, uint8_t* output)
{
+ const double* quant_table = (const double*)userdata;
double temp[DCTSIZE2];
// IDCT columns.
for (unsigned x = 0; x < DCTSIZE; ++x) {
- idct1d_float(input[DCTSIZE * 0 + x] * (int32_t)quant_table[DCTSIZE * 0 + x],
- input[DCTSIZE * 1 + x] * (int32_t)quant_table[DCTSIZE * 1 + x],
- input[DCTSIZE * 2 + x] * (int32_t)quant_table[DCTSIZE * 2 + x],
- input[DCTSIZE * 3 + x] * (int32_t)quant_table[DCTSIZE * 3 + x],
- input[DCTSIZE * 4 + x] * (int32_t)quant_table[DCTSIZE * 4 + x],
- input[DCTSIZE * 5 + x] * (int32_t)quant_table[DCTSIZE * 5 + x],
- input[DCTSIZE * 6 + x] * (int32_t)quant_table[DCTSIZE * 6 + x],
- input[DCTSIZE * 7 + x] * (int32_t)quant_table[DCTSIZE * 7 + x],
+ idct1d_float(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x],
+ input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x],
+ input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x],
+ input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x],
+ input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x],
+ input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x],
+ input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x],
+ input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x],
temp + x * DCTSIZE);
}
temp[DCTSIZE * 7 + y],
temp2);
for (unsigned x = 0; x < DCTSIZE; ++x) {
- double val = (1.0/8.0) * temp2[x];
+ const double val = temp2[x];
if (val < 0.0) {
output[y * DCTSIZE + x] = 0;
} else if (val >= 255.0) {
#define DCTSIZE 8
#define DCTSIZE2 64
-typedef void (idct_func_t)(const int16_t*, const uint32_t*, uint8_t*);
+// void* idct_example_alloc(const uint32_t* quant_table);
+typedef void* (idct_alloc_t)(const uint32_t*);
+
+// void idct_example_free(const void* userdata);
+// userdata is the same as returned by the alloc function.
+typedef void (idct_free_t)(void*);
+
+// void idct_example(const int16_t* input, const void* userdata, uint8_t* output);
+// userdata is the same as returned by the alloc function.
+typedef void (idct_func_t)(const int16_t*, const void*, uint8_t*);
// Non-factorized reference version (section A.3.3 of the JPEG standard).
-void idct_reference(const int16_t* input, const uint32_t* quant_table, uint8_t* output);
+void* idct_reference_alloc(const uint32_t* quant_table);
+void idct_reference_free(void* userdata);
+void idct_reference(const int16_t* input, const void* userdata, uint8_t* output);
// Floating-point IDCT due to Arai, Agui and Nakajima (also known as AA&N).
// See idct.c for more details.
-void idct_float(const int16_t* input, const uint32_t* quant_table, uint8_t* output);
+void* idct_float_alloc(const uint32_t* quant_table);
+void idct_float_free(void* userdata);
+void idct_float(const int16_t* input, const void* userdata, uint8_t* output);
#endif /* !defined(_IDCT_H) */
// Test that the input is pretty close to the reference for random inputs.
// (If the reference funtion is given in, this becomes a simple test of its
// determinism.)
-void test_random_inputs(idct_func_t* idct)
+void test_random_inputs(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
{
int16_t coeff[DCTSIZE2];
uint32_t quant[DCTSIZE2];
uint8_t output[DCTSIZE2];
uint8_t reference[DCTSIZE2];
-
+
// Unit quantization (ie., no scaling).
for (unsigned i = 0; i < DCTSIZE2; ++i) {
quant[i] = 1;
}
+
+ void* userdata_reference = idct_reference_alloc(quant);
+ void* userdata = idct_alloc(quant);
for (unsigned i = 0; i < 1000; ++i) {
gen_random_coeffs(coeff, DCTSIZE2);
- (*idct)(coeff, quant, output);
- (idct_reference)(coeff, quant, reference);
+ (*idct)(coeff, userdata, output);
+ (idct_reference)(coeff, userdata_reference, reference);
// Find the RMS difference.
int diff_squared = 0;
assert(diff_squared <= 5);
}
+
+ idct_reference_free(userdata_reference);
+ idct_free(userdata);
}
// Test that a single DC coefficient becomes spread out to all blocks.
-void test_dc_becomes_spread_out(idct_func_t* idct)
+void test_dc_becomes_spread_out(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
{
int16_t coeff[DCTSIZE2] = { 0 };
uint32_t quant[DCTSIZE2];
uint8_t output[DCTSIZE2];
-
+
// Unit quantization (ie., no scaling).
for (unsigned i = 0; i < DCTSIZE2; ++i) {
quant[i] = 1;
}
+ void* userdata = idct_alloc(quant);
+
for (unsigned i = 0; i < 255*8; ++i) {
uint32_t reference_value = i / 8;
coeff[0] = i;
- (*idct)(coeff, quant, output);
+ (*idct)(coeff, userdata, output);
for (unsigned i = 0; i < DCTSIZE2; ++i) {
assert(abs(output[i] - reference_value) <= 1);
}
}
+
+ idct_free(userdata);
}
double timediff(const struct timeval* a, const struct timeval* b)
(double)(b->tv_usec - a->tv_usec) * 1e-6;
}
-void test_performance(idct_func_t* idct)
+void test_performance(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
{
const unsigned num_runs = (idct == idct_reference) ? 5000 : 5000000;
quant[i] = 1;
}
+ void* userdata = idct_alloc(quant);
+
start_benchmark_timer();
for (unsigned i = 0; i < num_runs; ++i) {
- (*idct)(coeff, quant, output);
+ (*idct)(coeff, userdata, output);
}
double diff = stop_benchmark_timer();
printf("%u runs in %.2f CPU seconds = %.2f IDCTs/sec\n",
num_runs, diff, num_runs / diff);
+
+ idct_free(userdata);
}
-void test_all_idct(idct_func_t* idct)
+void test_all_idct(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
{
printf(" test_dc_becomes_spread_out()\n");
- test_dc_becomes_spread_out(idct);
+ test_dc_becomes_spread_out(idct_alloc, idct_free, idct);
printf(" test_random_inputs()\n");
- test_random_inputs(idct);
+ test_random_inputs(idct_alloc, idct_free, idct);
printf(" performance test: ");
- test_performance(idct);
+ test_performance(idct_alloc, idct_free, idct);
}
int main(void)
{
printf("idct_reference:\n");
- test_all_idct(idct_reference);
+ test_all_idct(idct_reference_alloc, idct_reference_free, idct_reference);
printf("idct_float:\n");
- test_all_idct(idct_float);
+ test_all_idct(idct_float_alloc, idct_float_free, idct_float);
printf("All tests pass.\n");
return 0;