]> git.sesse.net Git - qscale/blob - qscale.c
Invert directions again, increasing speed but adding bugs.
[qscale] / qscale.c
1 #include <stdio.h>
2 #include <math.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include "jpeglib.h"
6
7 #define CACHE_LINE_FACTOR 8
8
9 double sinc(double x)
10 {
11         // This is bad for very small x, should use power series instead.
12         if (x == 0.0)
13                 return 1.0;
14         else
15                 return sin(x) / x;
16 }
17
18 double lanczos_tap(double x)
19 {
20         if (x < -3.0 || x > 3.0)
21                 return 0.0;
22         if (x < 0.0)
23                 return sinc(-x*M_PI) * sinc(-x*M_PI / 3.0);
24         else
25                 return sinc(x*M_PI) * sinc(x*M_PI / 3.0);
26 }
27
28
29 struct pix_desc {
30         unsigned start, end;
31         unsigned startcoeff;
32 };
33
34 void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw, unsigned dstride)
35 {
36         struct pix_desc *pd = (struct pix_desc *)malloc(nw * sizeof(struct pix_desc));
37         int size_coeffs = 8;
38         float *coeffs = (float *)malloc(size_coeffs * sizeof(float));
39         int num_coeffs = 0;
40         int x, y, sx;
41         double sf = (double)w / (double)nw;
42         double support = (w > nw) ? (3.0 * sf) : (3.0 / sf);
43
44         /* calculate the filter */
45         for (x = 0; x < nw; ++x) {
46                 int start = ceil(x * sf - support);
47                 int end = floor(x * sf + support);
48                 double sum = 0.0;
49
50                 if (start < 0) {
51                         start = 0;
52                 }
53                 if (end > w - 1) {
54                         end = w - 1;
55                 }
56
57                 pd[x].start = start;
58                 pd[x].end = end;
59                 pd[x].startcoeff = num_coeffs;
60
61                 for (sx = start; sx <= end; ++sx) {
62                         double nd = (w > nw) ? (sx/sf - x) : (sx - x*sf);
63                         double f = lanczos_tap(nd);
64                         if (num_coeffs == size_coeffs) {
65                                 size_coeffs <<= 1;
66                                 coeffs = (float *)realloc(coeffs, size_coeffs * sizeof(float));
67                         }
68                 
69                         coeffs[num_coeffs++] = f;
70                         sum += f;
71                 }
72
73                 for (sx = start; sx <= end; ++sx) {
74                         coeffs[pd[x].startcoeff + sx - start] /= sum;
75                 }
76         }
77         
78         for (y = 0; y < h; ++y) {
79                 float *sptr = pix + y*w;
80                 unsigned char *dptr = npix + y*dstride;
81                 unsigned char ch = 0;
82                 for (x = 0; x < nw; ++x) {
83                         float acc = 0.0;
84                         float *cf = &coeffs[pd[x].startcoeff];
85                         unsigned sx;
86                         
87                         for (sx = pd[x].start; sx <= pd[x].end; ++sx) {
88                                 acc += sptr[sx] * *cf++;
89                         }
90
91                         if (acc < 0.0)
92                                 ch = 0;
93                         else if (acc > 255.0)
94                                 ch = 255;
95                         else
96                                 ch = (unsigned char)acc;
97                         *dptr++ = ch;
98                 }
99                 for ( ; x < dstride; ++x) {
100                         *dptr++ = ch;
101                 }
102         }
103 }
104
105 void vscale(unsigned char *pix, float *npix, unsigned w, unsigned h, unsigned nh, unsigned dstride)
106 {
107         struct pix_desc *pd = (struct pix_desc *)malloc(nh * sizeof(struct pix_desc));
108         int size_coeffs = 8;
109         float *coeffs = (float *)malloc(size_coeffs * sizeof(float));
110         int num_coeffs = 0;
111         int x, y, sy;
112         double sf = (double)h / (double)nh;
113         double support = (h > nh) ? (3.0 * sf) : (3.0 / sf);
114
115         /* calculate the filter */
116         for (y = 0; y < nh; ++y) {
117                 int start = ceil(y * sf - support);
118                 int end = floor(y * sf + support);
119                 double sum = 0.0;
120
121                 if (start < 0) {
122                         start = 0;
123                 }
124                 if (end > h - 1) {
125                         end = h - 1;
126                 }
127
128                 pd[y].start = start;
129                 pd[y].end = end;
130                 pd[y].startcoeff = num_coeffs;
131
132                 for (sy = start; sy <= end; ++sy) {
133                         double nd = (h > nh) ? (sy/sf - y) : (sy - y*sf);
134                         double f = lanczos_tap(nd);
135                         if (num_coeffs == size_coeffs) {
136                                 size_coeffs <<= 1;
137                                 coeffs = (float *)realloc(coeffs, size_coeffs * sizeof(float));
138                         }
139                         
140                         coeffs[num_coeffs++] = f;
141                         sum += f;
142                 }
143
144                 for (sy = start; sy <= end; ++sy) {
145                         coeffs[pd[y].startcoeff + sy - start] /= sum;
146                 }
147         }
148
149 #if CACHE_LINE_FACTOR > 1
150         for (x = 0; x < w; x += CACHE_LINE_FACTOR) {
151                 unsigned char *sptr = pix + x;
152                 float *dptr = npix + x;
153                 for (y = 0; y < nh; ++y) {
154                         int i;
155                         float acc[CACHE_LINE_FACTOR];
156                         for (i = 0; i < CACHE_LINE_FACTOR; ++i)
157                                 acc[i] = 0.0;
158                         float *cf = &coeffs[pd[y].startcoeff];
159                         unsigned sy;
160                         
161                         for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
162                                 //asm volatile ("prefetcht0 %0" :: "m" (sptr[(sy+1) * w]));
163                                 for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
164                                         acc[i] += sptr[sy * w + i] * *cf;
165                                 }
166                                 ++cf;
167                         }
168
169                         for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
170                                 dptr[i] = acc[i];
171                         }
172                         dptr += dstride;
173                 }
174         }
175         for (x = (x/CACHE_LINE_FACTOR)*CACHE_LINE_FACTOR; x < w; ++x) {
176 #else
177         for (x = 0; x < w; ++x) {
178 #endif
179                 unsigned char *sptr = pix + x;
180                 float *dptr = npix + x;
181                 for (y = 0; y < nh; ++y) {
182                         float acc = 0.0;
183                         float *cf = &coeffs[pd[y].startcoeff];
184                         unsigned sy;
185                         
186                         for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
187                                 acc += sptr[sy * w] * *cf++;
188                         }
189
190                         *dptr = acc;
191                         dptr += dstride;
192                 }
193         }
194 }
195
196 int main(int argc, char **argv)
197 {
198         unsigned nominal_w = atoi(argv[1]);
199         unsigned nominal_h = atoi(argv[2]);
200
201         unsigned samp_h0 = 2, samp_v0 = 2;
202         unsigned samp_h1 = 1, samp_v1 = 1;
203         unsigned samp_h2 = 1, samp_v2 = 1;
204         unsigned max_samp_h = 2, max_samp_v = 2;
205
206         unsigned nw0 = nominal_w * samp_h0 / max_samp_h, nh0 = nominal_h * samp_v0 / max_samp_v;
207         unsigned nw1 = nominal_w * samp_h1 / max_samp_h, nh1 = nominal_h * samp_v1 / max_samp_v;
208         unsigned nw2 = nominal_w * samp_h2 / max_samp_h, nh2 = nominal_h * samp_v2 / max_samp_v;
209
210         unsigned stride0 = (nw0 + DCTSIZE-1) & ~(DCTSIZE-1);
211         unsigned stride1 = (nw1 + DCTSIZE-1) & ~(DCTSIZE-1);
212         unsigned stride2 = (nw2 + DCTSIZE-1) & ~(DCTSIZE-1);
213
214         struct jpeg_decompress_struct dinfo;
215         struct jpeg_error_mgr jerr;
216         dinfo.err = jpeg_std_error(&jerr);
217         jpeg_create_decompress(&dinfo);
218         jpeg_stdio_src(&dinfo, stdin);
219         jpeg_read_header(&dinfo, TRUE);
220         dinfo.raw_data_out = TRUE;
221         jpeg_start_decompress(&dinfo);
222
223         fprintf(stderr, "Scaling using Lanczos filter:\n");
224         fprintf(stderr, "  Y component: %ux%u -> %ux%u\n", dinfo.comp_info[0].width_in_blocks * DCTSIZE, dinfo.comp_info[0].height_in_blocks * DCTSIZE, nw0, nh0);
225         fprintf(stderr, "  Cb component: %ux%u -> %ux%u\n", dinfo.comp_info[1].width_in_blocks * DCTSIZE, dinfo.comp_info[1].height_in_blocks * DCTSIZE, nw1, nh1);
226         fprintf(stderr, "  Cr component: %ux%u -> %ux%u\n", dinfo.comp_info[2].width_in_blocks * DCTSIZE, dinfo.comp_info[2].height_in_blocks * DCTSIZE, nw2, nh2);
227
228         JSAMPLE *data_y  = (JSAMPLE*)malloc(dinfo.comp_info[0].height_in_blocks * dinfo.comp_info[0].width_in_blocks * DCTSIZE * DCTSIZE);
229         JSAMPLE *data_cb = (JSAMPLE*)malloc(dinfo.comp_info[1].height_in_blocks * dinfo.comp_info[1].width_in_blocks * DCTSIZE * DCTSIZE);
230         JSAMPLE *data_cr = (JSAMPLE*)malloc(dinfo.comp_info[2].height_in_blocks * dinfo.comp_info[2].width_in_blocks * DCTSIZE * DCTSIZE);
231         JSAMPLE *data_ny, *data_ncb, *data_ncr;
232
233         int total_lines = 0, blocks = 0;
234         while (total_lines < dinfo.comp_info[0].height_in_blocks * DCTSIZE) {
235                 unsigned max_lines = dinfo.max_v_samp_factor * DCTSIZE;
236
237                 JSAMPROW y_row_ptrs[max_lines];
238                 JSAMPROW cb_row_ptrs[max_lines];
239                 JSAMPROW cr_row_ptrs[max_lines];
240                 JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
241                 int i;
242
243                 for (i = 0; i < max_lines; ++i) {
244                         y_row_ptrs[i]  = data_y  + (i+blocks*DCTSIZE*dinfo.comp_info[0].v_samp_factor) * dinfo.comp_info[0].width_in_blocks * DCTSIZE;
245                         cb_row_ptrs[i] = data_cb + (i+blocks*DCTSIZE*dinfo.comp_info[1].v_samp_factor) * dinfo.comp_info[1].width_in_blocks * DCTSIZE;
246                         cr_row_ptrs[i] = data_cr + (i+blocks*DCTSIZE*dinfo.comp_info[2].v_samp_factor) * dinfo.comp_info[2].width_in_blocks * DCTSIZE;
247                 }
248                 
249                 total_lines += max_lines;
250                 ++blocks;
251
252                 if (jpeg_read_raw_data(&dinfo, ptrs, max_lines) == 0)
253                         break;
254         }
255
256         {
257                 float *npix = (float*)malloc(dinfo.comp_info[0].width_in_blocks * DCTSIZE * nh0 * sizeof(float));       
258                 vscale(data_y, npix, dinfo.comp_info[0].width_in_blocks * DCTSIZE, dinfo.comp_info[0].height_in_blocks * DCTSIZE, nh0, dinfo.comp_info[0].width_in_blocks * DCTSIZE);
259                 data_ny = (unsigned char *)malloc(nw0 * stride0);
260                 hscale(npix, data_ny, dinfo.comp_info[0].width_in_blocks * DCTSIZE, nh0, nw0, stride0);
261                 free(npix);
262         }
263         {
264                 float *npix = (float*)malloc(dinfo.comp_info[1].width_in_blocks * DCTSIZE * nh1 * sizeof(float));       
265                 vscale(data_cr, npix, dinfo.comp_info[1].width_in_blocks * DCTSIZE, dinfo.comp_info[1].height_in_blocks * DCTSIZE, nh1, dinfo.comp_info[1].width_in_blocks * DCTSIZE);
266                 data_ncr = (unsigned char *)malloc(nw1 * stride1);
267                 hscale(npix, data_ncr, dinfo.comp_info[1].width_in_blocks * DCTSIZE, nh1, nw1, stride1);
268                 free(npix);
269         }
270         {
271                 float *npix = (float*)malloc(dinfo.comp_info[2].width_in_blocks * DCTSIZE * nh2 * sizeof(float));       
272                 vscale(data_cb, npix, dinfo.comp_info[2].width_in_blocks * DCTSIZE, dinfo.comp_info[2].height_in_blocks * DCTSIZE, nh2, dinfo.comp_info[2].width_in_blocks * DCTSIZE);
273                 data_ncb = (unsigned char *)malloc(nw2 * stride2);
274                 hscale(npix, data_ncb, dinfo.comp_info[2].width_in_blocks * DCTSIZE, nh2, nw2, stride2);
275                 free(npix);
276         }
277         jpeg_destroy_decompress(&dinfo);
278         
279         struct jpeg_compress_struct cinfo;
280         cinfo.err = jpeg_std_error(&jerr);
281         jpeg_create_compress(&cinfo);
282         jpeg_stdio_dest(&cinfo, stdout);
283         cinfo.input_components = 3;
284         jpeg_set_defaults(&cinfo);
285         jpeg_set_quality(&cinfo, 85, FALSE);
286         cinfo.image_width = nominal_w;
287         cinfo.image_height = nominal_h;
288         cinfo.raw_data_in = TRUE;
289         jpeg_set_colorspace(&cinfo, JCS_YCbCr);
290         cinfo.comp_info[0].h_samp_factor = samp_h0;
291         cinfo.comp_info[0].v_samp_factor = samp_v0;
292         cinfo.comp_info[1].h_samp_factor = samp_h1;
293         cinfo.comp_info[1].v_samp_factor = samp_v1;
294         cinfo.comp_info[2].h_samp_factor = samp_h2;
295         cinfo.comp_info[2].v_samp_factor = samp_v2;
296         jpeg_start_compress(&cinfo, TRUE);
297
298         total_lines = 0;
299         blocks = 0;
300         while (total_lines < cinfo.comp_info[0].height_in_blocks * DCTSIZE) {
301                 unsigned max_lines = cinfo.max_v_samp_factor * DCTSIZE;
302
303                 JSAMPROW y_row_ptrs[max_lines];
304                 JSAMPROW cb_row_ptrs[max_lines];
305                 JSAMPROW cr_row_ptrs[max_lines];
306                 JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
307                 int i;
308
309                 for (i = 0; i < max_lines; ++i) {
310                         // simple edge extension
311                         int yline = i + blocks*DCTSIZE*cinfo.comp_info[0].v_samp_factor;
312                         if (yline > nh0 - 1)
313                                 yline = nh0 - 1;
314
315                         int cbline = i + blocks*DCTSIZE*cinfo.comp_info[1].v_samp_factor;
316                         if (cbline > nh1 - 1)
317                                 cbline = nh1 - 1;
318
319                         int crline = i + blocks*DCTSIZE*cinfo.comp_info[2].v_samp_factor;
320                         if (crline > nh2 - 1)
321                                 crline = nh2 - 1;
322
323                         y_row_ptrs[i]  = data_ny  + yline * stride0;
324                         cb_row_ptrs[i] = data_ncb + cbline * stride1;
325                         cr_row_ptrs[i] = data_ncr + crline * stride2;
326                 }
327                 
328                 total_lines += max_lines;
329                 ++blocks;
330
331                 jpeg_write_raw_data(&cinfo, ptrs, max_lines);
332         }
333         jpeg_finish_compress(&cinfo);
334         jpeg_destroy_compress(&cinfo);
335
336         return 0;
337 }
338