Let IDCTs do precalculation outside the inner loops. Speeds up (as expected)
[fjl] / idct_test.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <math.h>
5 #include <assert.h>
6
7 #include "benchmark.h"
8 #include "idct.h"
9
10 // Generate random coefficients in the range [-15..15].
11 void gen_random_coeffs(int16_t* dst, size_t len)
12 {
13         // Standard NR LCG (we avoid rand() to get consistent behavior across platforms).
14         static uint32_t seed = 1234;
15         for (unsigned i = 0; i < len; ++i) {
16                 seed = seed * 1664525U + 1013904223U;
17                 if (seed >> 31) {
18                         dst[i] = (uint8_t)(seed >> 27) & 0x7;
19                 } else {
20                         dst[i] = -((uint8_t)(seed >> 27) & 0x7);
21                 }
22         }
23 }
24
25 // Test that the input is pretty close to the reference for random inputs. 
26 // (If the reference funtion is given in, this becomes a simple test of its
27 // determinism.)
28 void test_random_inputs(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
29 {
30         int16_t coeff[DCTSIZE2]; 
31         uint32_t quant[DCTSIZE2];
32         uint8_t output[DCTSIZE2];
33         uint8_t reference[DCTSIZE2];
34
35         // Unit quantization (ie., no scaling).
36         for (unsigned i = 0; i < DCTSIZE2; ++i) {
37                 quant[i] = 1;
38         }
39         
40         void* userdata_reference = idct_reference_alloc(quant);
41         void* userdata = idct_alloc(quant);
42
43         for (unsigned i = 0; i < 1000; ++i) {   
44                 gen_random_coeffs(coeff, DCTSIZE2);
45
46                 (*idct)(coeff, userdata, output);
47                 (idct_reference)(coeff, userdata_reference, reference);
48
49                 // Find the RMS difference.
50                 int diff_squared = 0;
51                 for (unsigned i = 0; i < DCTSIZE2; ++i) {
52                         diff_squared += (output[i] - reference[i]) * (output[i] - reference[i]);
53                 }
54
55                 assert(diff_squared <= 5);
56         }
57
58         idct_reference_free(userdata_reference);
59         idct_free(userdata);
60 }
61
62 // Test that a single DC coefficient becomes spread out to all blocks.
63 void test_dc_becomes_spread_out(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
64 {
65         int16_t coeff[DCTSIZE2] = { 0 }; 
66         uint32_t quant[DCTSIZE2];
67         uint8_t output[DCTSIZE2];
68         
69         // Unit quantization (ie., no scaling).
70         for (unsigned i = 0; i < DCTSIZE2; ++i) {
71                 quant[i] = 1;
72         }
73
74         void* userdata = idct_alloc(quant);
75
76         for (unsigned i = 0; i < 255*8; ++i) {  
77                 uint32_t reference_value = i / 8;
78                 coeff[0] = i;
79
80                 (*idct)(coeff, userdata, output);
81
82                 for (unsigned i = 0; i < DCTSIZE2; ++i) {
83                         assert(abs(output[i] - reference_value) <= 1);
84                 }
85         }
86         
87         idct_free(userdata);
88 }
89
90 double timediff(const struct timeval* a, const struct timeval* b)
91 {
92         return (double)(b->tv_sec - a->tv_sec) +
93                 (double)(b->tv_usec - a->tv_usec) * 1e-6;
94 }
95
96 void test_performance(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
97 {
98         const unsigned num_runs = (idct == idct_reference) ? 5000 : 5000000;
99
100         int16_t coeff[DCTSIZE2]; 
101         uint32_t quant[DCTSIZE2];
102         uint8_t output[DCTSIZE2];
103                 
104         gen_random_coeffs(coeff, DCTSIZE2);
105         
106         // Unit quantization (ie., no scaling).
107         for (unsigned i = 0; i < DCTSIZE2; ++i) {
108                 quant[i] = 1;
109         }
110
111         void* userdata = idct_alloc(quant);
112
113         start_benchmark_timer();
114
115         for (unsigned i = 0; i < num_runs; ++i) {
116                 (*idct)(coeff, userdata, output);
117         }
118         
119         double diff = stop_benchmark_timer();
120         printf("%u runs in %.2f CPU seconds = %.2f IDCTs/sec\n",
121                 num_runs, diff, num_runs / diff);
122
123         idct_free(userdata);
124 }
125
126 void test_all_idct(idct_alloc_t* idct_alloc, idct_free_t* idct_free, idct_func_t* idct)
127 {
128         printf("  test_dc_becomes_spread_out()\n");
129         test_dc_becomes_spread_out(idct_alloc, idct_free, idct);        
130
131         printf("  test_random_inputs()\n");
132         test_random_inputs(idct_alloc, idct_free, idct);        
133
134         printf("  performance test: ");
135         test_performance(idct_alloc, idct_free, idct);
136 }
137
138 int main(void)
139 {
140         printf("idct_reference:\n");
141         test_all_idct(idct_reference_alloc, idct_reference_free, idct_reference);
142
143         printf("idct_float:\n");
144         test_all_idct(idct_float_alloc, idct_float_free, idct_float);
145
146         printf("All tests pass.\n");
147         return 0;
148 }