7 void* idct_reference_alloc(const uint32_t* quant_table)
9 uint32_t* qt_copy = (uint32_t*)malloc(DCTSIZE2 * sizeof(uint32_t));
10 // FIXME: check for NULL return
12 memcpy(qt_copy, quant_table, DCTSIZE2 * sizeof(uint32_t));
17 void idct_reference_free(void* userdata)
22 void idct_reference(const int16_t* input, const void* userdata, uint8_t* output)
24 const uint32_t* quant_table = (const uint32_t*)userdata;
25 double temp[DCTSIZE2];
27 for (unsigned y = 0; y < 8; ++y) {
28 for (unsigned x = 0; x < 8; ++x) {
30 for (unsigned u = 0; u < 8; ++u) {
31 for (unsigned v = 0; v < 8; ++v) {
32 double c_u = (u == 0) ? 1/sqrt(2.0) : 1.0;
33 double c_v = (v == 0) ? 1/sqrt(2.0) : 1.0;
35 * input[u * DCTSIZE + v] * quant_table[u * DCTSIZE + v]
36 * cos((2 * x + 1) * v * M_PI / 16.0)
37 * cos((2 * y + 1) * u * M_PI / 16.0);
40 temp[y * DCTSIZE + x] = 0.25 * acc;
44 for (unsigned y = 0; y < 8; ++y) {
45 for (unsigned x = 0; x < 8; ++x) {
46 double val = temp[y * DCTSIZE + x];
48 output[y * DCTSIZE + x] = 0;
49 } else if (val >= 255.0) {
50 output[y * DCTSIZE + x] = 255;
52 output[y * DCTSIZE + x] = (uint8_t)(val + 0.5);
58 // AA&N (Arai, Agui and Nakajima) floating-point IDCT.
59 // This IDCT is based on the same DCT that libjpeg uses -- in fact, exactly the
60 // same figure from the same book ("JPEG: Still Image Data Compression Standard",
61 // page 52, figure 4-8). However, it is coded from scratch, and uses the
62 // transposition method for converting DCT -> IDCT suggested in the book.
63 // (libjpeg seems to use some other method that yields similar, but not
66 // As this is generally meant as a reference and not useful code (we expect
67 // a SIMD fixed-point algorithm to be used in most cases), it has not been
68 // attempted significantly optimized. We assume the compiler will be smart
69 // enough to do all the variable propagation for us anyway.
71 // Scale factors; 1.0 / (sqrt(2.0) * cos(k * M_PI / 16.0)), except for the first which is 1.
72 static const double scalefac[] = {
73 1.0, 0.7209598220069479, 0.765366864730180, 0.8504300947672564,
74 1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502
77 // Premultiply the scale factors and the overall 1/8 factor into the quantization
78 // table entries (and convert to double).
79 void* idct_float_alloc(const uint32_t* quant_table)
81 double* qt_copy = (double*)malloc(DCTSIZE2 * sizeof(double));
83 for (unsigned y = 0; y < DCTSIZE; ++y) {
84 for (unsigned x = 0; x < DCTSIZE; ++x) {
85 qt_copy[y * DCTSIZE + x] = 0.125 * quant_table[y * DCTSIZE + x] *
86 scalefac[x] * scalefac[y];
93 void idct_float_free(void* userdata)
99 static inline void idct1d_float(double y0, double y1, double y2, double y3, double y4, double y5, double y6, double y7, double *x)
102 static const double a1 = 0.7071067811865474; // sqrt(2)
103 static const double a2 = 0.5411961001461971; // cos(3/8 pi) * sqrt(2)
104 static const double a3 = a1;
105 static const double a4 = 1.3065629648763766; // cos(pi/8) * sqrt(2)
106 static const double a5 = 0.5 * (a4 - a2);
109 const double p1_0 = y0;
110 const double p1_1 = y4;
111 const double p1_2 = y2;
112 const double p1_3 = y6;
113 const double p1_4 = y5;
114 const double p1_5 = y1;
115 const double p1_6 = y7;
116 const double p1_7 = y3;
119 const double p2_0 = p1_0;
120 const double p2_1 = p1_1;
121 const double p2_2 = p1_2;
122 const double p2_3 = p1_3;
123 const double p2_4 = p1_4 - p1_7;
124 const double p2_5 = p1_5 + p1_6;
125 const double p2_6 = p1_5 - p1_6;
126 const double p2_7 = p1_4 + p1_7;
129 const double p3_0 = p2_0;
130 const double p3_1 = p2_1;
131 const double p3_2 = p2_2 - p2_3;
132 const double p3_3 = p2_2 + p2_3;
133 const double p3_4 = p2_4;
134 const double p3_5 = p2_5 - p2_7;
135 const double p3_6 = p2_6;
136 const double p3_7 = p2_5 + p2_7;
139 const double p4_0 = p3_0;
140 const double p4_1 = p3_1;
141 const double p4_2 = a1 * p3_2;
142 const double p4_3 = p3_3;
143 const double p4_4 = p3_4 * -a2 + (p3_4 + p3_6) * -a5;
144 const double p4_5 = a3 * p3_5;
145 const double p4_6 = p3_6 * a4 + (p3_4 + p3_6) * -a5;
146 const double p4_7 = p3_7;
149 const double p5_0 = p4_0 + p4_1;
150 const double p5_1 = p4_0 - p4_1;
151 const double p5_2 = p4_2;
152 const double p5_3 = p4_2 + p4_3;
153 const double p5_4 = p4_4;
154 const double p5_5 = p4_5;
155 const double p5_6 = p4_6;
156 const double p5_7 = p4_7;
159 const double p6_0 = p5_0 + p5_3;
160 const double p6_1 = p5_1 + p5_2;
161 const double p6_2 = p5_1 - p5_2;
162 const double p6_3 = p5_0 - p5_3;
163 const double p6_4 = -p5_4;
164 const double p6_5 = p5_5 - p5_4;
165 const double p6_6 = p5_5 + p5_6;
166 const double p6_7 = p5_6 + p5_7;
179 void idct_float(const int16_t* input, const void* userdata, uint8_t* output)
181 const double* quant_table = (const double*)userdata;
182 double temp[DCTSIZE2];
185 for (unsigned x = 0; x < DCTSIZE; ++x) {
186 idct1d_float(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x],
187 input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x],
188 input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x],
189 input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x],
190 input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x],
191 input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x],
192 input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x],
193 input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x],
198 for (unsigned y = 0; y < DCTSIZE; ++y) {
199 double temp2[DCTSIZE];
200 idct1d_float(temp[DCTSIZE * 0 + y],
201 temp[DCTSIZE * 1 + y],
202 temp[DCTSIZE * 2 + y],
203 temp[DCTSIZE * 3 + y],
204 temp[DCTSIZE * 4 + y],
205 temp[DCTSIZE * 5 + y],
206 temp[DCTSIZE * 6 + y],
207 temp[DCTSIZE * 7 + y],
209 for (unsigned x = 0; x < DCTSIZE; ++x) {
210 const double val = temp2[x];
212 output[y * DCTSIZE + x] = 0;
213 } else if (val >= 255.0) {
214 output[y * DCTSIZE + x] = 255;
216 output[y * DCTSIZE + x] = (uint8_t)(val + 0.5);