]> git.sesse.net Git - qscale/blobdiff - qscale.c
Move JPEG loading into libqscale.
[qscale] / qscale.c
index ee70208c28e5307511324c73f6a3800084cb6ad8..440f9dee287328cb77e7761a5ac7a516d5c7e903 100644 (file)
--- a/qscale.c
+++ b/qscale.c
@@ -1,12 +1,43 @@
+/*
+ * qscale: Quick, high-quality JPEG-to-JPEG scaler.
+ * Copyright (C) 2008 Steinar H. Gunderson <sgunderson@bigfoot.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
 #include <stdio.h>
 #include <malloc.h>
 #include <math.h>
 #include <string.h>
 #include <stdlib.h>
 #include "jpeglib.h"
+#include "libqscale.h"
 
+/* The number of pixels to process at a time when scaling vertically. */
 #define CACHE_LINE_FACTOR 16
 
+/* Whether to use SSE for horizontal scaling or not (requires SSE3). */
+#define USE_HORIZONTAL_SSE 0
+
+/* Whether to use SSE for vertical scaling or not (requires only SSE1). */
+#define USE_VERTICAL_SSE 0
+
+#if USE_VERTICAL_SSE
+#undef CACHE_LINE_FACTOR
+#define CACHE_LINE_FACTOR 16
+#endif
+
 #ifndef M_PI
 #define M_PI 3.14159265358979323846264
 #endif
@@ -66,6 +97,7 @@ void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw
                        end = w - 1;
                }
 
+#if USE_HORIZONTAL_SSE
                /* round up so we get a multiple of four for the SSE code */
                int num = (end - start + 1);
                if (num % 4 != 0) {
@@ -78,6 +110,7 @@ void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw
                                end += 4 - (num % 4);
                        }
                }
+#endif
 
                pd[x].start = start;
                pd[x].end = end;
@@ -105,6 +138,7 @@ void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw
                unsigned char *dptr = npix + y*dstride;
                unsigned char ch;
                for (x = 0; x < nw; ++x) {
+#if USE_HORIZONTAL_SSE
                        int result;
                        float acc;
                        long tmp;
@@ -136,7 +170,16 @@ void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw
                                : "memory", "xmm1", "xmm2"
                        );
 
-#if 0
+                       *dptr++ = (unsigned char)result;
+#else
+                       float acc = 0.0;
+                       float *cf = &coeffs[pd[x].startcoeff];
+                       unsigned sx;
+                       
+                       for (sx = pd[x].start; sx <= pd[x].end; ++sx) {
+                               acc += sptr[sx] * *cf++;
+                       }
+
                        if (acc < 0.0)
                                ch = 0;
                        else if (acc > 255.0)
@@ -145,7 +188,6 @@ void hscale(float *pix, unsigned char *npix, unsigned w, unsigned h, unsigned nw
                                ch = (unsigned char)acc;
                        *dptr++ = ch;
 #endif
-                       *dptr++ = (unsigned char)result;
                }
                ch = dptr[-1];
                for ( ; x < dstride; ++x) {
@@ -203,25 +245,7 @@ void vscale(unsigned char *pix, float *npix, unsigned w, unsigned h, unsigned nh
                unsigned char *sptr = pix + x;
                float *dptr = npix + x;
                for (y = 0; y < nh; ++y) {
-#if 0
-                       int i;
-                       float acc[CACHE_LINE_FACTOR];
-                       for (i = 0; i < CACHE_LINE_FACTOR; ++i)
-                               acc[i] = 0.0;
-                       float *cf = &coeffs[pd[y].startcoeff];
-                       unsigned sy;
-               
-                       for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
-                               for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
-                                       acc[i] += sptr[sy * w + i] * *cf;
-                               }
-                               ++cf;
-                       }
-
-                       for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
-                               dptr[i] = acc[i];
-                       }
-#else
+#if USE_VERTICAL_SSE
                        /*
                         * xmm0 - xmm3: acc[0..15]
                         * xmm4: current filter coefficient
@@ -309,6 +333,24 @@ void vscale(unsigned char *pix, float *npix, unsigned w, unsigned h, unsigned nh
                                "r" (dptr)                           /* 4: dstptr base */
                                : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
                        );
+#else
+                       int i;
+                       float acc[CACHE_LINE_FACTOR];
+                       for (i = 0; i < CACHE_LINE_FACTOR; ++i)
+                               acc[i] = 0.0;
+                       float *cf = &coeffs[pd[y].startcoeff];
+                       unsigned sy;
+               
+                       for (sy = pd[y].start; sy <= pd[y].end; ++sy) {
+                               for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
+                                       acc[i] += sptr[sy * w + i] * *cf;
+                               }
+                               ++cf;
+                       }
+
+                       for (i = 0; i < CACHE_LINE_FACTOR; ++i) {
+                               dptr[i] = acc[i];
+                       }
 #endif
                        dptr += dstride;
                }
@@ -336,103 +378,73 @@ void vscale(unsigned char *pix, float *npix, unsigned w, unsigned h, unsigned nh
 
 int main(int argc, char **argv)
 {
+       /* user-settable parameters */
        unsigned nominal_w = atoi(argv[1]);
        unsigned nominal_h = atoi(argv[2]);
-
        unsigned samp_h0 = 2, samp_v0 = 2;
        unsigned samp_h1 = 1, samp_v1 = 1;
        unsigned samp_h2 = 1, samp_v2 = 1;
-       unsigned max_samp_h = 2, max_samp_v = 2;
+       unsigned jpeg_quality = 85;
+       /* end */
+
+       unsigned max_samp_h, max_samp_v;
+       max_samp_h = samp_h0;
+       if (samp_h1 > max_samp_h)
+               max_samp_h = samp_h1;
+       if (samp_h2 > max_samp_h)
+               max_samp_h = samp_h2;
+       
+       max_samp_v = samp_v0;
+       if (samp_v1 > max_samp_v)
+               max_samp_v = samp_v1;
+       if (samp_v2 > max_samp_v)
+               max_samp_v = samp_v2;
 
        unsigned nw0 = nominal_w * samp_h0 / max_samp_h, nh0 = nominal_h * samp_v0 / max_samp_v;
        unsigned nw1 = nominal_w * samp_h1 / max_samp_h, nh1 = nominal_h * samp_v1 / max_samp_v;
        unsigned nw2 = nominal_w * samp_h2 / max_samp_h, nh2 = nominal_h * samp_v2 / max_samp_v;
 
-       unsigned stride0 = (nw0 + DCTSIZE-1) & ~(DCTSIZE-1);
-       unsigned stride1 = (nw1 + DCTSIZE-1) & ~(DCTSIZE-1);
-       unsigned stride2 = (nw2 + DCTSIZE-1) & ~(DCTSIZE-1);
+       unsigned dstride0 = (nw0 + DCTSIZE-1) & ~(DCTSIZE-1);
+       unsigned dstride1 = (nw1 + DCTSIZE-1) & ~(DCTSIZE-1);
+       unsigned dstride2 = (nw2 + DCTSIZE-1) & ~(DCTSIZE-1);
 
-       struct jpeg_decompress_struct dinfo;
-       struct jpeg_error_mgr jerr;
-       dinfo.err = jpeg_std_error(&jerr);
-       jpeg_create_decompress(&dinfo);
-       jpeg_stdio_src(&dinfo, stdin);
-       jpeg_read_header(&dinfo, TRUE);
-       dinfo.raw_data_out = TRUE;
-       jpeg_start_decompress(&dinfo);
-
-       unsigned w0 = dinfo.image_width * dinfo.comp_info[0].h_samp_factor / dinfo.max_h_samp_factor;
-       unsigned h0 = dinfo.image_height * dinfo.comp_info[0].v_samp_factor / dinfo.max_v_samp_factor;
-
-       unsigned w1 = dinfo.image_width * dinfo.comp_info[1].h_samp_factor / dinfo.max_h_samp_factor;
-       unsigned h1 = dinfo.image_height * dinfo.comp_info[1].v_samp_factor / dinfo.max_v_samp_factor;
-
-       unsigned w2 = dinfo.image_width * dinfo.comp_info[2].h_samp_factor / dinfo.max_h_samp_factor;
-       unsigned h2 = dinfo.image_height * dinfo.comp_info[2].v_samp_factor / dinfo.max_v_samp_factor;
-
-       fprintf(stderr, "Scaling using Lanczos filter:\n");
-       fprintf(stderr, "  Y component: %ux%u -> %ux%u\n", dinfo.comp_info[0].width_in_blocks * DCTSIZE, dinfo.comp_info[0].height_in_blocks * DCTSIZE, nw0, nh0);
-       fprintf(stderr, "  Cb component: %ux%u -> %ux%u\n", dinfo.comp_info[1].width_in_blocks * DCTSIZE, dinfo.comp_info[1].height_in_blocks * DCTSIZE, nw1, nh1);
-       fprintf(stderr, "  Cr component: %ux%u -> %ux%u\n", dinfo.comp_info[2].width_in_blocks * DCTSIZE, dinfo.comp_info[2].height_in_blocks * DCTSIZE, nw2, nh2);
-
-       JSAMPLE *data_y  = (JSAMPLE*)memalign(16, dinfo.comp_info[0].height_in_blocks * dinfo.comp_info[0].width_in_blocks * DCTSIZE * DCTSIZE);
-       JSAMPLE *data_cb = (JSAMPLE*)memalign(16, dinfo.comp_info[1].height_in_blocks * dinfo.comp_info[1].width_in_blocks * DCTSIZE * DCTSIZE);
-       JSAMPLE *data_cr = (JSAMPLE*)memalign(16, dinfo.comp_info[2].height_in_blocks * dinfo.comp_info[2].width_in_blocks * DCTSIZE * DCTSIZE);
+       qscale_img *img = qscale_load_jpeg_from_stdio(stdin);
+       unsigned sstride0 = (img->w0 + DCTSIZE-1) & ~(DCTSIZE-1);
+       unsigned sstride1 = (img->w1 + DCTSIZE-1) & ~(DCTSIZE-1);
+       unsigned sstride2 = (img->w2 + DCTSIZE-1) & ~(DCTSIZE-1);
+       
        JSAMPLE *data_ny, *data_ncb, *data_ncr;
 
-       int total_lines = 0, blocks = 0;
-       while (total_lines < dinfo.comp_info[0].height_in_blocks * DCTSIZE) {
-               unsigned max_lines = dinfo.max_v_samp_factor * DCTSIZE;
-
-               JSAMPROW y_row_ptrs[max_lines];
-               JSAMPROW cb_row_ptrs[max_lines];
-               JSAMPROW cr_row_ptrs[max_lines];
-               JSAMPROW* ptrs[] = { y_row_ptrs, cb_row_ptrs, cr_row_ptrs };
-               int i;
-
-               for (i = 0; i < max_lines; ++i) {
-                       y_row_ptrs[i]  = data_y  + (i+blocks*DCTSIZE*dinfo.comp_info[0].v_samp_factor) * dinfo.comp_info[0].width_in_blocks * DCTSIZE;
-                       cb_row_ptrs[i] = data_cb + (i+blocks*DCTSIZE*dinfo.comp_info[1].v_samp_factor) * dinfo.comp_info[1].width_in_blocks * DCTSIZE;
-                       cr_row_ptrs[i] = data_cr + (i+blocks*DCTSIZE*dinfo.comp_info[2].v_samp_factor) * dinfo.comp_info[2].width_in_blocks * DCTSIZE;
-               }
-               
-               total_lines += max_lines;
-               ++blocks;
-
-               if (jpeg_read_raw_data(&dinfo, ptrs, max_lines) == 0)
-                       break;
-       }
-
        {
-               float *npix = (float*)memalign(16, dinfo.comp_info[0].width_in_blocks * DCTSIZE * nh0 * sizeof(float)); 
-               vscale(data_y, npix, dinfo.comp_info[0].width_in_blocks * DCTSIZE, h0, nh0, dinfo.comp_info[0].width_in_blocks * DCTSIZE);
-               data_ny = (unsigned char *)malloc(nw0 * stride0);
-               hscale(npix, data_ny, w0, nh0, nw0, dinfo.comp_info[0].width_in_blocks * DCTSIZE, stride0);
+               float *npix = (float*)memalign(16, sstride0 * nh0 * sizeof(float));
+               vscale(img->data_y, npix, sstride0, img->h0, nh0, sstride0);
+               data_ny = (unsigned char *)malloc(nh0 * dstride0);
+               hscale(npix, data_ny, img->w0, nh0, nw0, sstride0, dstride0);
                free(npix);
        }
        {
-               float *npix = (float*)memalign(16, dinfo.comp_info[1].width_in_blocks * DCTSIZE * nh1 * sizeof(float)); 
-               vscale(data_cr, npix, dinfo.comp_info[1].width_in_blocks * DCTSIZE, h1, nh1, dinfo.comp_info[1].width_in_blocks * DCTSIZE);
-               data_ncr = (unsigned char *)malloc(nw1 * stride1);
-               hscale(npix, data_ncr, w1, nh1, nw1, dinfo.comp_info[1].width_in_blocks * DCTSIZE, stride1);
+               float *npix = (float*)memalign(16, sstride1 * nh1 * sizeof(float));     
+               vscale(img->data_cr, npix, sstride1, img->h1, nh1, sstride1);
+               data_ncr = (unsigned char *)malloc(nh1 * dstride1);
+               hscale(npix, data_ncr, img->w1, nh1, nw1, sstride1, dstride1);
                free(npix);
        }
        {
-               float *npix = (float*)memalign(16, dinfo.comp_info[2].width_in_blocks * DCTSIZE * nh2 * sizeof(float)); 
-               vscale(data_cb, npix, dinfo.comp_info[2].width_in_blocks * DCTSIZE, h2, nh2, dinfo.comp_info[2].width_in_blocks * DCTSIZE);
-               data_ncb = (unsigned char *)malloc(nw2 * stride2);
-               hscale(npix, data_ncb, w2, nh2, nw2, dinfo.comp_info[2].width_in_blocks * DCTSIZE, stride2);
+               float *npix = (float*)memalign(16, sstride2 * nh2 * sizeof(float));     
+               vscale(img->data_cb, npix, sstride2, img->h2, nh2, sstride2);
+               data_ncb = (unsigned char *)malloc(nh2 * dstride2);
+               hscale(npix, data_ncb, img->w2, nh2, nw2, sstride2, dstride2);
                free(npix);
        }
-       jpeg_destroy_decompress(&dinfo);
        
        struct jpeg_compress_struct cinfo;
+       struct jpeg_error_mgr jerr;
        cinfo.err = jpeg_std_error(&jerr);
        jpeg_create_compress(&cinfo);
        jpeg_stdio_dest(&cinfo, stdout);
        cinfo.input_components = 3;
        jpeg_set_defaults(&cinfo);
-       jpeg_set_quality(&cinfo, 85, FALSE);
+       jpeg_set_quality(&cinfo, jpeg_quality, FALSE);
        cinfo.image_width = nominal_w;
        cinfo.image_height = nominal_h;
        cinfo.raw_data_in = TRUE;
@@ -445,8 +457,8 @@ int main(int argc, char **argv)
        cinfo.comp_info[2].v_samp_factor = samp_v2;
        jpeg_start_compress(&cinfo, TRUE);
 
-       total_lines = 0;
-       blocks = 0;
+       int total_lines = 0;
+       int blocks = 0;
        while (total_lines < cinfo.comp_info[0].height_in_blocks * DCTSIZE) {
                unsigned max_lines = cinfo.max_v_samp_factor * DCTSIZE;
 
@@ -470,9 +482,9 @@ int main(int argc, char **argv)
                        if (crline > nh2 - 1)
                                crline = nh2 - 1;
 
-                       y_row_ptrs[i]  = data_ny  + yline * stride0;
-                       cb_row_ptrs[i] = data_ncb + cbline * stride1;
-                       cr_row_ptrs[i] = data_ncr + crline * stride2;
+                       y_row_ptrs[i]  = data_ny  + yline * dstride0;
+                       cb_row_ptrs[i] = data_ncb + cbline * dstride1;
+                       cr_row_ptrs[i] = data_ncr + crline * dstride2;
                }
                
                total_lines += max_lines;
@@ -483,6 +495,8 @@ int main(int argc, char **argv)
        jpeg_finish_compress(&cinfo);
        jpeg_destroy_compress(&cinfo);
 
+       qscale_destroy(img);
+
        return 0;
 }