7 #define CACHE_LINE_FACTOR 8
11 // This is bad for very small x, should use power series instead.
18 double lanczos_tap(double x)
20 if (x < -3.0 || x > 3.0)
23 return sinc(-x*M_PI) * sinc(-x*M_PI / 3.0);
25 return sinc(x*M_PI) * sinc(x*M_PI / 3.0);
37 void hscale_calc_filter(struct filter *filter, unsigned w, unsigned h, unsigned nw)
39 struct pix_desc *pd = (struct pix_desc *)malloc(nw * sizeof(struct pix_desc));
41 float *coeffs = (float *)malloc(size_coeffs * sizeof(float));
44 double sf = (double)w / (double)nw;
45 double support = (w > nw) ? (3.0 * sf) : (3.0 / sf);
47 /* calculate the filter */
48 for (x = 0; x < nw; ++x) {
49 int start = ceil(x * sf - support);
50 int end = floor(x * sf + support);
62 pd[x].startcoeff = num_coeffs;
64 for (sx = start; sx <= end; ++sx) {
65 double nd = (w > nw) ? (sx/sf - x) : (sx - x*sf);
66 double f = lanczos_tap(nd);
67 if (num_coeffs == size_coeffs) {
69 coeffs = (float *)realloc(coeffs, size_coeffs * sizeof(float));
72 coeffs[num_coeffs++] = f;
76 for (sx = start; sx <= end; ++sx) {
77 coeffs[pd[x].startcoeff + sx - start] /= sum;
82 filter->coeffs = coeffs;
85 void hscale(struct filter *filter, unsigned char *pix, float *npix, unsigned w, unsigned h, unsigned nw, unsigned sstride, unsigned dstride)
87 struct pix_desc *pd = filter->pd;
88 float *coeffs = filter->coeffs;
91 for (y = 0; y < h; ++y) {
92 unsigned char *sptr = pix + y*sstride;
93 float *dptr = npix + y*dstride;
96 for (x = 0; x < nw; ++x) {
97 float *cf = &coeffs[pd[x].startcoeff];
101 for (sx = pd[x].start; sx <= pd[x].end; ++sx) {
102 acc += sptr[sx] * *cf++;
106 for ( ; x < dstride; ++x) {
112 void vscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nh, unsigned dstride)
114 struct pix_desc *pd = (struct pix_desc *)malloc(nh * sizeof(struct pix_desc));
116 float *coeffs = (float *)malloc(size_coeffs * sizeof(float));
119 double sf = (double)h / (double)nh;
120 double support = (h > nh) ? (3.0 * sf) : (3.0 / sf);
122 /* calculate the filter */
123 for (y = 0; y < nh; ++y) {
124 int start = ceil(y * sf - support);
125 int end = floor(y * sf + support);
137 pd[y].startcoeff = num_coeffs;
139 for (sy = start; sy <= end; ++sy) {
140 double nd = (h > nh) ? (sy/sf - y) : (sy - y*sf);
141 double f = lanczos_tap(nd);
142 if (num_coeffs == size_coeffs) {
144 coeffs = (float *)realloc(coeffs, size_coeffs * sizeof(float));
147 coeffs[num_coeffs++] = f;
151 for (sy = start; sy <= end; ++sy) {
152 coeffs[pd[y].startcoeff + sy - start] /= sum;
156 #if CACHE_LINE_FACTOR > 1
157 for (x = 0; x < w; x += CACHE_LINE_FACTOR) {
158 float *sptr = pix + x;
159 unsigned char *dptr = npix + x;
160 for (y = 0; y < nh; ++y) {
162 float acc[CACHE_LINE_FACTOR];
163 for (i = 0; i < CACHE_LINE_FACTOR; ++i)
165 float *cf = &coeffs[pd[y].startcoeff];
168 for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
169 for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
170 acc[i] += sptr[sy * w + i] * *cf;
175 for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
179 else if (acc[i] > 255.0)
182 ch = (unsigned char)acc[i];
188 for (x = (x/CACHE_LINE_FACTOR)*CACHE_LINE_FACTOR; x < w; ++x) {
190 for (x = 0; x < w; ++x) {
192 float *sptr = pix + x;
193 unsigned char *dptr = npix + x;
194 for (y = 0; y < nh; ++y) {
196 float *cf = &coeffs[pd[y].startcoeff];
199 for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
200 acc += sptr[sy * w] * *cf++;
206 else if (acc > 255.0)
209 ch = (unsigned char)acc;
216 int main(int argc, char **argv)
218 unsigned nominal_w = atoi(argv[1]);
219 unsigned nominal_h = atoi(argv[2]);
221 unsigned samp_h0 = 2, samp_v0 = 2;
222 unsigned samp_h1 = 1, samp_v1 = 1;
223 unsigned samp_h2 = 1, samp_v2 = 1;
224 unsigned max_samp_h = 2, max_samp_v = 2;
226 unsigned nw0 = nominal_w * samp_h0 / max_samp_h, nh0 = nominal_h * samp_v0 / max_samp_v;
227 unsigned nw1 = nominal_w * samp_h1 / max_samp_h, nh1 = nominal_h * samp_v1 / max_samp_v;
228 unsigned nw2 = nominal_w * samp_h2 / max_samp_h, nh2 = nominal_h * samp_v2 / max_samp_v;
230 unsigned stride0 = (nw0 + DCTSIZE-1) & ~(DCTSIZE-1);
231 unsigned stride1 = (nw1 + DCTSIZE-1) & ~(DCTSIZE-1);
232 unsigned stride2 = (nw2 + DCTSIZE-1) & ~(DCTSIZE-1);
234 struct jpeg_decompress_struct dinfo;
235 struct jpeg_error_mgr jerr;
236 dinfo.err = jpeg_std_error(&jerr);
237 jpeg_create_decompress(&dinfo);
238 jpeg_stdio_src(&dinfo, stdin);
239 jpeg_read_header(&dinfo, TRUE);
240 dinfo.raw_data_out = TRUE;
241 jpeg_start_decompress(&dinfo);
243 fprintf(stderr, "Scaling using Lanczos filter:\n");
244 fprintf(stderr, " Y component: %ux%u -> %ux%u\n", dinfo.comp_info[0].width_in_blocks * DCTSIZE, dinfo.comp_info[0].height_in_blocks * DCTSIZE, nw0, nh0);
245 fprintf(stderr, " Cb component: %ux%u -> %ux%u\n", dinfo.comp_info[1].width_in_blocks * DCTSIZE, dinfo.comp_info[1].height_in_blocks * DCTSIZE, nw1, nh1);
246 fprintf(stderr, " Cr component: %ux%u -> %ux%u\n", dinfo.comp_info[2].width_in_blocks * DCTSIZE, dinfo.comp_info[2].height_in_blocks * DCTSIZE, nw2, nh2);
248 float *npix_y = (float *)malloc((dinfo.comp_info[0].height_in_blocks + dinfo.comp_info[0].v_samp_factor - 1) * DCTSIZE * stride0 * sizeof(float));
249 float *npix_cb = (float *)malloc((dinfo.comp_info[1].height_in_blocks + dinfo.comp_info[1].v_samp_factor - 1) * DCTSIZE * stride1 * sizeof(float));
250 float *npix_cr = (float *)malloc((dinfo.comp_info[2].height_in_blocks + dinfo.comp_info[2].v_samp_factor - 1) * DCTSIZE * stride2 * sizeof(float));
251 JSAMPLE *data_y = (unsigned char *)malloc(nh0 * stride0);
252 JSAMPLE *data_cb = (unsigned char *)malloc(nh1 * stride1);
253 JSAMPLE *data_cr = (unsigned char *)malloc(nh2 * stride2);
255 struct filter filt0, filt1, filt2;
257 hscale_calc_filter(&filt0,
258 /* w= */ dinfo.image_width * dinfo.comp_info[0].h_samp_factor / dinfo.max_h_samp_factor,
259 /* h= */ dinfo.comp_info[0].v_samp_factor * DCTSIZE,
262 hscale_calc_filter(&filt1,
263 /* w= */ dinfo.image_width * dinfo.comp_info[1].h_samp_factor / dinfo.max_h_samp_factor,
264 /* h= */ dinfo.comp_info[1].v_samp_factor * DCTSIZE,
267 hscale_calc_filter(&filt2,
268 /* w= */ dinfo.image_width * dinfo.comp_info[2].h_samp_factor / dinfo.max_h_samp_factor,
269 /* h= */ dinfo.comp_info[2].v_samp_factor * DCTSIZE,
273 int total_lines = 0, blocks = 0;
274 while (total_lines < dinfo.comp_info[0].height_in_blocks * DCTSIZE) {
275 unsigned max_lines = dinfo.max_v_samp_factor * DCTSIZE;
276 JSAMPLE tmp_y [(dinfo.comp_info[0].width_in_blocks * DCTSIZE) * (DCTSIZE * dinfo.comp_info[0].v_samp_factor)];
277 JSAMPLE tmp_cb[(dinfo.comp_info[1].width_in_blocks * DCTSIZE) * (DCTSIZE * dinfo.comp_info[1].v_samp_factor)];
278 JSAMPLE tmp_cr[(dinfo.comp_info[2].width_in_blocks * DCTSIZE) * (DCTSIZE * dinfo.comp_info[2].v_samp_factor)];
280 JSAMPROW y_row_ptrs[max_lines];
281 JSAMPROW cb_row_ptrs[max_lines];
282 JSAMPROW cr_row_ptrs[max_lines];
283 JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
286 for (i = 0; i < max_lines; ++i) {
287 y_row_ptrs[i] = tmp_y + i * dinfo.comp_info[0].width_in_blocks * DCTSIZE;
288 cb_row_ptrs[i] = tmp_cb + i * dinfo.comp_info[1].width_in_blocks * DCTSIZE;
289 cr_row_ptrs[i] = tmp_cr + i * dinfo.comp_info[2].width_in_blocks * DCTSIZE;
292 int lines = jpeg_read_raw_data(&dinfo, ptrs, max_lines);
296 hscale(/* filter= */ &filt0,
298 /* to= */ npix_y + blocks * DCTSIZE * dinfo.comp_info[0].v_samp_factor * stride0,
299 /* w= */ dinfo.image_width * dinfo.comp_info[0].h_samp_factor / dinfo.max_h_samp_factor,
300 /* h= */ dinfo.comp_info[0].v_samp_factor * DCTSIZE,
302 /* sstride= */ dinfo.comp_info[0].width_in_blocks * DCTSIZE,
303 /* dstride= */ stride0);
305 hscale(/* filter= */ &filt1,
307 /* to= */ npix_cb + blocks * DCTSIZE * dinfo.comp_info[1].v_samp_factor * stride1,
308 /* w= */ dinfo.image_width * dinfo.comp_info[1].h_samp_factor / dinfo.max_h_samp_factor,
309 /* h= */ dinfo.comp_info[1].v_samp_factor * DCTSIZE,
311 /* sstride= */ dinfo.comp_info[1].width_in_blocks * DCTSIZE,
312 /* dstride= */ stride1);
314 hscale(/* filter= */ &filt2,
316 /* to= */ npix_cr + blocks * DCTSIZE * dinfo.comp_info[2].v_samp_factor * stride2,
317 /* w= */ dinfo.image_width * dinfo.comp_info[2].h_samp_factor / dinfo.max_h_samp_factor,
318 /* h= */ dinfo.comp_info[2].v_samp_factor * DCTSIZE,
320 /* sstride= */ dinfo.comp_info[2].width_in_blocks * DCTSIZE,
321 /* dstride= */ stride2);
323 total_lines += max_lines;
327 vscale(npix_y, data_y, stride0, dinfo.image_height * dinfo.comp_info[0].v_samp_factor / dinfo.max_v_samp_factor, nh0, stride0);
328 vscale(npix_cb, data_cb, stride1, dinfo.image_height * dinfo.comp_info[1].v_samp_factor / dinfo.max_v_samp_factor, nh1, stride1);
329 vscale(npix_cr, data_cr, stride2, dinfo.image_height * dinfo.comp_info[2].v_samp_factor / dinfo.max_v_samp_factor, nh2, stride2);
330 jpeg_destroy_decompress(&dinfo);
332 struct jpeg_compress_struct cinfo;
333 cinfo.err = jpeg_std_error(&jerr);
334 jpeg_create_compress(&cinfo);
335 jpeg_stdio_dest(&cinfo, stdout);
336 cinfo.input_components = 3;
337 jpeg_set_defaults(&cinfo);
338 jpeg_set_quality(&cinfo, 85, FALSE);
339 cinfo.image_width = nominal_w;
340 cinfo.image_height = nominal_h;
341 cinfo.raw_data_in = TRUE;
342 jpeg_set_colorspace(&cinfo, JCS_YCbCr);
343 cinfo.comp_info[0].h_samp_factor = samp_h0;
344 cinfo.comp_info[0].v_samp_factor = samp_v0;
345 cinfo.comp_info[1].h_samp_factor = samp_h1;
346 cinfo.comp_info[1].v_samp_factor = samp_v1;
347 cinfo.comp_info[2].h_samp_factor = samp_h2;
348 cinfo.comp_info[2].v_samp_factor = samp_v2;
349 jpeg_start_compress(&cinfo, TRUE);
353 while (total_lines < cinfo.comp_info[0].height_in_blocks * DCTSIZE) {
354 unsigned max_lines = cinfo.max_v_samp_factor * DCTSIZE;
356 JSAMPROW y_row_ptrs[max_lines];
357 JSAMPROW cb_row_ptrs[max_lines];
358 JSAMPROW cr_row_ptrs[max_lines];
359 JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
362 for (i = 0; i < max_lines; ++i) {
363 // simple edge extension
364 int yline = i + blocks*DCTSIZE*cinfo.comp_info[0].v_samp_factor;
368 int cbline = i + blocks*DCTSIZE*cinfo.comp_info[1].v_samp_factor;
369 if (cbline > nh1 - 1)
372 int crline = i + blocks*DCTSIZE*cinfo.comp_info[2].v_samp_factor;
373 if (crline > nh2 - 1)
376 y_row_ptrs[i] = data_y + yline * stride0;
377 cb_row_ptrs[i] = data_cb + cbline * stride1;
378 cr_row_ptrs[i] = data_cr + crline * stride2;
381 total_lines += max_lines;
384 jpeg_write_raw_data(&cinfo, ptrs, max_lines);
386 jpeg_finish_compress(&cinfo);
387 jpeg_destroy_compress(&cinfo);