Add a stupid integerization of the AA&N IDCT -- 30% faster or so, mostly
[fjl] / idct_test.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <math.h>
5 #include <assert.h>
6
7 #include "benchmark.h"
8 #include "idct.h"
9 #include "idct_reference.h"
10 #include "idct_float.h"
11 #include "idct_imprecise_int.h"
12
13 // Generate random coefficients in the range [-15..15].
14 void gen_random_coeffs(int16_t* dst, size_t len)
15 {
16         // Standard NR LCG (we avoid rand() to get consistent behavior across platforms).
17         static uint32_t seed = 1234;
18         for (unsigned i = 0; i < len; ++i) {
19                 seed = seed * 1664525U + 1013904223U;
20                 if (seed >> 31) {
21                         dst[i] = (uint8_t)(seed >> 27) & 0x7;
22                 } else {
23                         dst[i] = -((uint8_t)(seed >> 27) & 0x7);
24                 }
25         }
26 }
27
28 // Test that the input is pretty close to the reference for random inputs. 
29 // (If the reference funtion is given in, this becomes a simple test of its
30 // determinism.)
31 void test_random_inputs(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
32 {
33         int16_t coeff[DCTSIZE2]; 
34         uint32_t quant[DCTSIZE2];
35         uint8_t output[DCTSIZE2];
36         uint8_t reference[DCTSIZE2];
37
38         // Unit quantization (ie., no scaling).
39         for (unsigned i = 0; i < DCTSIZE2; ++i) {
40                 quant[i] = 1;
41         }
42         
43         void* userdata_reference = idct_reference_alloc(quant);
44         void* userdata = idct_alloc(quant);
45
46         for (unsigned i = 0; i < 1000; ++i) {   
47                 gen_random_coeffs(coeff, DCTSIZE2);
48
49                 (*idct)(coeff, userdata, output);
50                 (idct_reference)(coeff, userdata_reference, reference);
51
52                 // Find the RMS difference.
53                 int diff_squared = 0;
54                 for (unsigned i = 0; i < DCTSIZE2; ++i) {
55                         diff_squared += (output[i] - reference[i]) * (output[i] - reference[i]);
56                 }
57
58                 assert(diff_squared <= 5);
59         }
60
61         idct_reference_free(userdata_reference);
62         idct_free(userdata);
63 }
64
65 // Test that a single DC coefficient becomes spread out to all blocks.
66 void test_dc_becomes_spread_out(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
67 {
68         int16_t coeff[DCTSIZE2] = { 0 }; 
69         uint32_t quant[DCTSIZE2];
70         uint8_t output[DCTSIZE2];
71         
72         // Unit quantization (ie., no scaling).
73         for (unsigned i = 0; i < DCTSIZE2; ++i) {
74                 quant[i] = 1;
75         }
76
77         void* userdata = idct_alloc(quant);
78
79         for (unsigned i = 0; i < 255*8; ++i) {  
80                 uint32_t reference_value = i / 8;
81                 coeff[0] = i;
82
83                 (*idct)(coeff, userdata, output);
84
85                 for (unsigned i = 0; i < DCTSIZE2; ++i) {
86                         assert(abs(output[i] - reference_value) <= 1);
87                 }
88         }
89         
90         idct_free(userdata);
91 }
92
93 double timediff(const struct timeval* a, const struct timeval* b)
94 {
95         return (double)(b->tv_sec - a->tv_sec) +
96                 (double)(b->tv_usec - a->tv_usec) * 1e-6;
97 }
98
99 void test_performance(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
100 {
101         const unsigned num_runs = (idct == idct_reference) ? 5000 : 5000000;
102
103         int16_t coeff[DCTSIZE2]; 
104         uint32_t quant[DCTSIZE2];
105         uint8_t output[DCTSIZE2];
106                 
107         gen_random_coeffs(coeff, DCTSIZE2);
108         
109         // Unit quantization (ie., no scaling).
110         for (unsigned i = 0; i < DCTSIZE2; ++i) {
111                 quant[i] = 1;
112         }
113
114         void* userdata = idct_alloc(quant);
115
116         start_benchmark_timer();
117
118         for (unsigned i = 0; i < num_runs; ++i) {
119                 (*idct)(coeff, userdata, output);
120         }
121         
122         double diff = stop_benchmark_timer();
123         printf("%u runs in %.2f CPU seconds = %.2f IDCTs/sec\n",
124                 num_runs, diff, num_runs / diff);
125
126         idct_free(userdata);
127 }
128
129 void test_all_idct(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
130 {
131         printf("  test_dc_becomes_spread_out()\n");
132         test_dc_becomes_spread_out(idct_alloc, idct_free, idct);        
133
134         printf("  test_random_inputs()\n");
135         test_random_inputs(idct_alloc, idct_free, idct);        
136
137         printf("  performance test: ");
138         test_performance(idct_alloc, idct_free, idct);
139 }
140
141 int main(void)
142 {
143         printf("idct_reference:\n");
144         test_all_idct(idct_reference_alloc, idct_reference_free, idct_reference);
145
146         printf("idct_float:\n");
147         test_all_idct(idct_float_alloc, idct_float_free, idct_float);
148         
149         printf("idct_imprecise_int:\n");
150         test_all_idct(idct_imprecise_int_alloc, idct_imprecise_int_free, idct_imprecise_int);
151
152         printf("All tests pass.\n");
153         return 0;
154 }