7 #define CACHE_LINE_FACTOR 8
11 // This is bad for very small x, should use power series instead.
18 double lanczos_tap(double x)
20 if (x < -3.0 || x > 3.0)
23 return sinc(-x*M_PI) * sinc(-x*M_PI / 3.0);
25 return sinc(x*M_PI) * sinc(x*M_PI / 3.0);
34 void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw, unsigned dstride)
36 struct pix_desc *pd = (struct pix_desc *)malloc(nw * sizeof(struct pix_desc));
38 float *coeffs = (float *)malloc(size_coeffs * sizeof(float));
41 double sf = (double)w / (double)nw;
42 double support = (w > nw) ? (3.0 * sf) : (3.0 / sf);
44 /* calculate the filter */
45 for (x = 0; x < nw; ++x) {
46 int start = ceil(x * sf - support);
47 int end = floor(x * sf + support);
59 pd[x].startcoeff = num_coeffs;
61 for (sx = start; sx <= end; ++sx) {
62 double nd = (w > nw) ? (sx/sf - x) : (sx - x*sf);
63 double f = lanczos_tap(nd);
64 if (num_coeffs == size_coeffs) {
66 coeffs = (float *)realloc(coeffs, size_coeffs * sizeof(float));
69 coeffs[num_coeffs++] = f;
73 for (sx = start; sx <= end; ++sx) {
74 coeffs[pd[x].startcoeff + sx - start] /= sum;
78 for (y = 0; y < h; ++y) {
79 float *sptr = pix + y*w;
80 unsigned char *dptr = npix + y*dstride;
82 for (x = 0; x < nw; ++x) {
84 float *cf = &coeffs[pd[x].startcoeff];
87 for (sx = pd[x].start; sx <= pd[x].end; ++sx) {
88 acc += sptr[sx] * *cf++;
96 ch = (unsigned char)acc;
99 for ( ; x < dstride; ++x) {
105 void vscale(unsigned char *pix, float *npix, unsigned w, unsigned h, unsigned nh, unsigned dstride)
107 struct pix_desc *pd = (struct pix_desc *)malloc(nh * sizeof(struct pix_desc));
109 float *coeffs = (float *)malloc(size_coeffs * sizeof(float));
112 double sf = (double)h / (double)nh;
113 double support = (h > nh) ? (3.0 * sf) : (3.0 / sf);
115 /* calculate the filter */
116 for (y = 0; y < nh; ++y) {
117 int start = ceil(y * sf - support);
118 int end = floor(y * sf + support);
130 pd[y].startcoeff = num_coeffs;
132 for (sy = start; sy <= end; ++sy) {
133 double nd = (h > nh) ? (sy/sf - y) : (sy - y*sf);
134 double f = lanczos_tap(nd);
135 if (num_coeffs == size_coeffs) {
137 coeffs = (float *)realloc(coeffs, size_coeffs * sizeof(float));
140 coeffs[num_coeffs++] = f;
144 for (sy = start; sy <= end; ++sy) {
145 coeffs[pd[y].startcoeff + sy - start] /= sum;
149 #if CACHE_LINE_FACTOR > 1
150 for (x = 0; x < w; x += CACHE_LINE_FACTOR) {
151 unsigned char *sptr = pix + x;
152 float *dptr = npix + x;
153 for (y = 0; y < nh; ++y) {
155 float acc[CACHE_LINE_FACTOR];
156 for (i = 0; i < CACHE_LINE_FACTOR; ++i)
158 float *cf = &coeffs[pd[y].startcoeff];
161 for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
162 //asm volatile ("prefetcht0 %0" :: "m" (sptr[(sy+1) * w]));
163 for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
164 acc[i] += sptr[sy * w + i] * *cf;
169 for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
175 for (x = (x/CACHE_LINE_FACTOR)*CACHE_LINE_FACTOR; x < w; ++x) {
177 for (x = 0; x < w; ++x) {
179 unsigned char *sptr = pix + x;
180 float *dptr = npix + x;
181 for (y = 0; y < nh; ++y) {
183 float *cf = &coeffs[pd[y].startcoeff];
186 for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
187 acc += sptr[sy * w] * *cf++;
196 int main(int argc, char **argv)
198 unsigned nominal_w = atoi(argv[1]);
199 unsigned nominal_h = atoi(argv[2]);
201 unsigned samp_h0 = 2, samp_v0 = 2;
202 unsigned samp_h1 = 1, samp_v1 = 1;
203 unsigned samp_h2 = 1, samp_v2 = 1;
204 unsigned max_samp_h = 2, max_samp_v = 2;
206 unsigned nw0 = nominal_w * samp_h0 / max_samp_h, nh0 = nominal_h * samp_v0 / max_samp_v;
207 unsigned nw1 = nominal_w * samp_h1 / max_samp_h, nh1 = nominal_h * samp_v1 / max_samp_v;
208 unsigned nw2 = nominal_w * samp_h2 / max_samp_h, nh2 = nominal_h * samp_v2 / max_samp_v;
210 unsigned stride0 = (nw0 + DCTSIZE-1) & ~(DCTSIZE-1);
211 unsigned stride1 = (nw1 + DCTSIZE-1) & ~(DCTSIZE-1);
212 unsigned stride2 = (nw2 + DCTSIZE-1) & ~(DCTSIZE-1);
214 struct jpeg_decompress_struct dinfo;
215 struct jpeg_error_mgr jerr;
216 dinfo.err = jpeg_std_error(&jerr);
217 jpeg_create_decompress(&dinfo);
218 jpeg_stdio_src(&dinfo, stdin);
219 jpeg_read_header(&dinfo, TRUE);
220 dinfo.raw_data_out = TRUE;
221 jpeg_start_decompress(&dinfo);
223 fprintf(stderr, "Scaling using Lanczos filter:\n");
224 fprintf(stderr, " Y component: %ux%u -> %ux%u\n", dinfo.comp_info[0].width_in_blocks * DCTSIZE, dinfo.comp_info[0].height_in_blocks * DCTSIZE, nw0, nh0);
225 fprintf(stderr, " Cb component: %ux%u -> %ux%u\n", dinfo.comp_info[1].width_in_blocks * DCTSIZE, dinfo.comp_info[1].height_in_blocks * DCTSIZE, nw1, nh1);
226 fprintf(stderr, " Cr component: %ux%u -> %ux%u\n", dinfo.comp_info[2].width_in_blocks * DCTSIZE, dinfo.comp_info[2].height_in_blocks * DCTSIZE, nw2, nh2);
228 JSAMPLE *data_y = (JSAMPLE*)malloc(dinfo.comp_info[0].height_in_blocks * dinfo.comp_info[0].width_in_blocks * DCTSIZE * DCTSIZE);
229 JSAMPLE *data_cb = (JSAMPLE*)malloc(dinfo.comp_info[1].height_in_blocks * dinfo.comp_info[1].width_in_blocks * DCTSIZE * DCTSIZE);
230 JSAMPLE *data_cr = (JSAMPLE*)malloc(dinfo.comp_info[2].height_in_blocks * dinfo.comp_info[2].width_in_blocks * DCTSIZE * DCTSIZE);
231 JSAMPLE *data_ny, *data_ncb, *data_ncr;
233 int total_lines = 0, blocks = 0;
234 while (total_lines < dinfo.comp_info[0].height_in_blocks * DCTSIZE) {
235 unsigned max_lines = dinfo.max_v_samp_factor * DCTSIZE;
237 JSAMPROW y_row_ptrs[max_lines];
238 JSAMPROW cb_row_ptrs[max_lines];
239 JSAMPROW cr_row_ptrs[max_lines];
240 JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
243 for (i = 0; i < max_lines; ++i) {
244 y_row_ptrs[i] = data_y + (i+blocks*DCTSIZE*dinfo.comp_info[0].v_samp_factor) * dinfo.comp_info[0].width_in_blocks * DCTSIZE;
245 cb_row_ptrs[i] = data_cb + (i+blocks*DCTSIZE*dinfo.comp_info[1].v_samp_factor) * dinfo.comp_info[1].width_in_blocks * DCTSIZE;
246 cr_row_ptrs[i] = data_cr + (i+blocks*DCTSIZE*dinfo.comp_info[2].v_samp_factor) * dinfo.comp_info[2].width_in_blocks * DCTSIZE;
249 total_lines += max_lines;
252 if (jpeg_read_raw_data(&dinfo, ptrs, max_lines) == 0)
257 float *npix = (float*)malloc(dinfo.comp_info[0].width_in_blocks * DCTSIZE * nh0 * sizeof(float));
258 vscale(data_y, npix, dinfo.comp_info[0].width_in_blocks * DCTSIZE, dinfo.comp_info[0].height_in_blocks * DCTSIZE, nh0, dinfo.comp_info[0].width_in_blocks * DCTSIZE);
259 data_ny = (unsigned char *)malloc(nw0 * stride0);
260 hscale(npix, data_ny, dinfo.comp_info[0].width_in_blocks * DCTSIZE, nh0, nw0, stride0);
264 float *npix = (float*)malloc(dinfo.comp_info[1].width_in_blocks * DCTSIZE * nh1 * sizeof(float));
265 vscale(data_cr, npix, dinfo.comp_info[1].width_in_blocks * DCTSIZE, dinfo.comp_info[1].height_in_blocks * DCTSIZE, nh1, dinfo.comp_info[1].width_in_blocks * DCTSIZE);
266 data_ncr = (unsigned char *)malloc(nw1 * stride1);
267 hscale(npix, data_ncr, dinfo.comp_info[1].width_in_blocks * DCTSIZE, nh1, nw1, stride1);
271 float *npix = (float*)malloc(dinfo.comp_info[2].width_in_blocks * DCTSIZE * nh2 * sizeof(float));
272 vscale(data_cb, npix, dinfo.comp_info[2].width_in_blocks * DCTSIZE, dinfo.comp_info[2].height_in_blocks * DCTSIZE, nh2, dinfo.comp_info[2].width_in_blocks * DCTSIZE);
273 data_ncb = (unsigned char *)malloc(nw2 * stride2);
274 hscale(npix, data_ncb, dinfo.comp_info[2].width_in_blocks * DCTSIZE, nh2, nw2, stride2);
277 jpeg_destroy_decompress(&dinfo);
279 struct jpeg_compress_struct cinfo;
280 cinfo.err = jpeg_std_error(&jerr);
281 jpeg_create_compress(&cinfo);
282 jpeg_stdio_dest(&cinfo, stdout);
283 cinfo.input_components = 3;
284 jpeg_set_defaults(&cinfo);
285 jpeg_set_quality(&cinfo, 85, FALSE);
286 cinfo.image_width = nominal_w;
287 cinfo.image_height = nominal_h;
288 cinfo.raw_data_in = TRUE;
289 jpeg_set_colorspace(&cinfo, JCS_YCbCr);
290 cinfo.comp_info[0].h_samp_factor = samp_h0;
291 cinfo.comp_info[0].v_samp_factor = samp_v0;
292 cinfo.comp_info[1].h_samp_factor = samp_h1;
293 cinfo.comp_info[1].v_samp_factor = samp_v1;
294 cinfo.comp_info[2].h_samp_factor = samp_h2;
295 cinfo.comp_info[2].v_samp_factor = samp_v2;
296 jpeg_start_compress(&cinfo, TRUE);
300 while (total_lines < cinfo.comp_info[0].height_in_blocks * DCTSIZE) {
301 unsigned max_lines = cinfo.max_v_samp_factor * DCTSIZE;
303 JSAMPROW y_row_ptrs[max_lines];
304 JSAMPROW cb_row_ptrs[max_lines];
305 JSAMPROW cr_row_ptrs[max_lines];
306 JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
309 for (i = 0; i < max_lines; ++i) {
310 // simple edge extension
311 int yline = i + blocks*DCTSIZE*cinfo.comp_info[0].v_samp_factor;
315 int cbline = i + blocks*DCTSIZE*cinfo.comp_info[1].v_samp_factor;
316 if (cbline > nh1 - 1)
319 int crline = i + blocks*DCTSIZE*cinfo.comp_info[2].v_samp_factor;
320 if (crline > nh2 - 1)
323 y_row_ptrs[i] = data_ny + yline * stride0;
324 cb_row_ptrs[i] = data_ncb + cbline * stride1;
325 cr_row_ptrs[i] = data_ncr + crline * stride2;
328 total_lines += max_lines;
331 jpeg_write_raw_data(&cinfo, ptrs, max_lines);
333 jpeg_finish_compress(&cinfo);
334 jpeg_destroy_compress(&cinfo);