]> git.sesse.net Git - ffmpeg/blobdiff - libswscale/swscale.c
Merge remote branch 'qatar/master'
[ffmpeg] / libswscale / swscale.c
index 1e412b3a25f1043f0e97dbcde874361575565f73..d53af2771da5128305ceecf60cd461cd56df0328 100644 (file)
@@ -341,14 +341,47 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
   { 112, 16,104,  8,118, 22,110, 14,},
 }};
 
+uint16_t dither_scale[15][16]={
+{    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
+{    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
+{    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
+{    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
+{    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
+{    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
+{    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
+{    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
+{    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
+{    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
+{    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
+{    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
+{    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
+{    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
+{    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
+};
+
 static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
                                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
                                                     const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
-                                                    int dstW, int chrDstW, int big_endian)
+                                                    int dstW, int chrDstW, int big_endian, int output_bits)
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
-
+    int shift = 11 + 16 - output_bits;
+
+#define output_pixel(pos, val) \
+    if (big_endian) { \
+        if (output_bits == 16) { \
+            AV_WB16(pos, av_clip_uint16(val >> shift)); \
+        } else { \
+            AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+        } \
+    } else { \
+        if (output_bits == 16) { \
+            AV_WL16(pos, av_clip_uint16(val >> shift)); \
+        } else { \
+            AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
+        } \
+    }
     for (i = 0; i < dstW; i++) {
         int val = 1 << 10;
         int j;
@@ -356,11 +389,7 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
         for (j = 0; j < lumFilterSize; j++)
             val += lumSrc[j][i] * lumFilter[j];
 
-        if (big_endian) {
-            AV_WB16(&dest[i], av_clip_uint16(val >> 11));
-        } else {
-            AV_WL16(&dest[i], av_clip_uint16(val >> 11));
-        }
+        output_pixel(&dest[i], val);
     }
 
     if (uDest) {
@@ -374,13 +403,8 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
                 v += chrSrc[j][i + VOFW] * chrFilter[j];
             }
 
-            if (big_endian) {
-                AV_WB16(&uDest[i], av_clip_uint16(u >> 11));
-                AV_WB16(&vDest[i], av_clip_uint16(v >> 11));
-            } else {
-                AV_WL16(&uDest[i], av_clip_uint16(u >> 11));
-                AV_WL16(&vDest[i], av_clip_uint16(v >> 11));
-            }
+            output_pixel(&uDest[i], u);
+            output_pixel(&vDest[i], v);
         }
     }
 
@@ -392,11 +416,7 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
             for (j = 0; j < lumFilterSize; j++)
                 val += alpSrc[j][i] * lumFilter[j];
 
-            if (big_endian) {
-                AV_WB16(&aDest[i], av_clip_uint16(val >> 11));
-            } else {
-                AV_WL16(&aDest[i], av_clip_uint16(val >> 11));
-            }
+            output_pixel(&aDest[i], val);
         }
     }
 }
@@ -463,13 +483,13 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
                                    chrFilter, chrSrc, chrFilterSize,
                                    alpSrc,
                                    dest, uDest, vDest, aDest,
-                                   dstW, chrDstW, 1);
+                                   dstW, chrDstW, 1, 16);
         } else {
             yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
                                    chrFilter, chrSrc, chrFilterSize,
                                    alpSrc,
                                    dest, uDest, vDest, aDest,
-                                   dstW, chrDstW, 0);
+                                   dstW, chrDstW, 0, 16);
         }
     }
 }
@@ -1857,22 +1877,23 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
     return srcSliceH;
 }
 
-#define DITHER_COPY(dst, dstStride, src, srcStride, bswap)\
+#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+    uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
+    int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
     for (i = 0; i < height; i++) {\
-        int shift= src_depth-dst_depth;\
         uint8_t *dither= dithers[src_depth-9][i&7];\
         for (j = 0; j < length-7; j+=8){\
-            dst[j+0] = (bswap(src[j+0]) + dither[0])>>shift;\
-            dst[j+1] = (bswap(src[j+1]) + dither[1])>>shift;\
-            dst[j+2] = (bswap(src[j+2]) + dither[2])>>shift;\
-            dst[j+3] = (bswap(src[j+3]) + dither[3])>>shift;\
-            dst[j+4] = (bswap(src[j+4]) + dither[4])>>shift;\
-            dst[j+5] = (bswap(src[j+5]) + dither[5])>>shift;\
-            dst[j+6] = (bswap(src[j+6]) + dither[6])>>shift;\
-            dst[j+7] = (bswap(src[j+7]) + dither[7])>>shift;\
+            dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
+            dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
+            dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
+            dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
+            dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
+            dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
+            dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
+            dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
         }\
         for (; j < length; j++)\
-            dst[j] = (bswap(src[j]) + dither[j&7])>>shift;\
+            dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
         dst += dstStride;\
         src += srcStride;\
     }
@@ -1897,59 +1918,72 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
                 length*=2;
             fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
         } else {
-            if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)) {
+            if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
+               || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
+            ) {
                 const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
                 const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
-                uint16_t *srcPtr2 = (uint16_t*)srcPtr;
+                const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
                 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
 
                 if (dst_depth == 8) {
-                    DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, )
+                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
+                    } else {
+                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
+                    }
                 } else if (src_depth == 8) {
                     for (i = 0; i < height; i++) {
-                        for (j = 0; j < length; j++)
-                            dstPtr2[j] = (srcPtr[j]<<(dst_depth-8)) |
-                                (srcPtr[j]>>(2*8-dst_depth));
+                        if(isBE(c->dstFormat)){
+                            for (j = 0; j < length; j++)
+                                AV_WB16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |
+                                                     (srcPtr[j]>>(2*8-dst_depth)));
+                        } else {
+                            for (j = 0; j < length; j++)
+                                AV_WL16(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |
+                                                     (srcPtr[j]>>(2*8-dst_depth)));
+                        }
                         dstPtr2 += dstStride[plane]/2;
                         srcPtr  += srcStride[plane];
                     }
-                } else if (src_depth < dst_depth) {
+                } else if (src_depth <= dst_depth) {
                     for (i = 0; i < height; i++) {
-                        if(isBE(c->dstFormat)){
-                            for (j = 0; j < length; j++)
-                                AV_WB16(&dstPtr2[j], (srcPtr2[j]<<(dst_depth-src_depth)) |
-                                                     (srcPtr2[j]>>(2*src_depth-dst_depth)));
-                        }else{
-                            for (j = 0; j < length; j++)
-                                AV_WL16(&dstPtr2[j], (srcPtr2[j]<<(dst_depth-src_depth)) |
-                                                     (srcPtr2[j]>>(2*src_depth-dst_depth)));
+#define COPY_UP(r,w) \
+    for (j = 0; j < length; j++){ \
+        unsigned int v= r(&srcPtr2[j]);\
+        w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
+                       (v>>(2*src_depth-dst_depth)));\
+    }
+                        if(isBE(c->srcFormat)){
+                            if(isBE(c->dstFormat)){
+                                COPY_UP(AV_RB16, AV_WB16)
+                            } else {
+                                COPY_UP(AV_RB16, AV_WL16)
+                            }
+                        } else {
+                            if(isBE(c->dstFormat)){
+                                COPY_UP(AV_RL16, AV_WB16)
+                            } else {
+                                COPY_UP(AV_RL16, AV_WL16)
+                            }
                         }
                         dstPtr2 += dstStride[plane]/2;
                         srcPtr2 += srcStride[plane]/2;
                     }
                 } else {
                     if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
-                        DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, )
+                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
+                        } else {
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
+                        }
                     }else{
-                        DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16)
-                    }
-                }
-            } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
-                //FIXME add dither
-                if (!isBE(c->srcFormat)) srcPtr++;
-                for (i=0; i<height; i++) {
-                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
-                }
-            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
-                for (i=0; i<height; i++) {
-                    for (j=0; j<length; j++) {
-                        dstPtr[ j<<1   ] = srcPtr[j];
-                        dstPtr[(j<<1)+1] = srcPtr[j];
+                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
+                        } else {
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
+                        }
                     }
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
                 }
             } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
                   && isBE(c->srcFormat) != isBE(c->dstFormat)) {