X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libswscale%2Foutput.c;h=26b0ff3d480c7ada0c0618e08007c2ccb2a30cf5;hb=eae251ead9e380c722dce7ac3f4e97017bff9a7b;hp=0af2fffea4e83db7d3e978950b7949d78d9a90aa;hpb=217ad40aef9eeeff4e805dec3e980fb16aec3345;p=ffmpeg

diff --git a/libswscale/output.c b/libswscale/output.c
index 0af2fffea4e..26b0ff3d480 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -208,6 +208,105 @@ static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
     }
 }
 
+static av_always_inline void
+yuv2plane1_float_c_template(const int32_t *src, float *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 3;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = src[i] + (1 << (shift - 1));
+        output_pixel(&val_uint, val, 0, uint);
+        dest[i] = float_mult * (float)val_uint;
+    }
+}
+
+static av_always_inline void
+yuv2plane1_float_bswap_c_template(const int32_t *src, uint32_t *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 3;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = src[i] + (1 << (shift - 1));
+        output_pixel(&val_uint, val, 0, uint);
+        dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
+    }
+}
+
+static av_always_inline void
+yuv2planeX_float_c_template(const int16_t *filter, int filterSize, const int32_t **src,
+                            float *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 15;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, j, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = (1 << (shift - 1)) - 0x40000000;
+        for (j = 0; j < filterSize; ++j){
+            val += src[j][i] * (unsigned)filter[j];
+        }
+        output_pixel(&val_uint, val, 0x8000, int);
+        dest[i] = float_mult * (float)val_uint;
+    }
+}
+
+static av_always_inline void
+yuv2planeX_float_bswap_c_template(const int16_t *filter, int filterSize, const int32_t **src,
+                            uint32_t *dest, int dstW)
+{
+    static const int big_endian = HAVE_BIGENDIAN;
+    static const int shift = 15;
+    static const float float_mult = 1.0f / 65535.0f;
+    int i, j, val;
+    uint16_t val_uint;
+
+    for (i = 0; i < dstW; ++i){
+        val = (1 << (shift - 1)) - 0x40000000;
+        for (j = 0; j < filterSize; ++j){
+            val += src[j][i] * (unsigned)filter[j];
+        }
+        output_pixel(&val_uint, val, 0x8000, int);
+        dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
+    }
+}
+
+#define yuv2plane1_float(template, dest_type, BE_LE) \
+static void yuv2plane1_float ## BE_LE ## _c(const int16_t *src, uint8_t *dest, int dstW, \
+                                            const uint8_t *dither, int offset) \
+{ \
+    template((const int32_t *)src, (dest_type *)dest, dstW); \
+}
+
+#define yuv2planeX_float(template, dest_type, BE_LE) \
+static void yuv2planeX_float ## BE_LE ## _c(const int16_t *filter, int filterSize, \
+                                            const int16_t **src, uint8_t *dest, int dstW, \
+                                            const uint8_t *dither, int offset) \
+{ \
+    template(filter, filterSize, (const int32_t **)src, (dest_type *)dest, dstW); \
+}
+
+#if HAVE_BIGENDIAN
+yuv2plane1_float(yuv2plane1_float_c_template,       float,    BE)
+yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, LE)
+yuv2planeX_float(yuv2planeX_float_c_template,       float,    BE)
+yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, LE)
+#else
+yuv2plane1_float(yuv2plane1_float_c_template,       float,    LE)
+yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, BE)
+yuv2planeX_float(yuv2planeX_float_c_template,       float,    LE)
+yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, BE)
+#endif
+
 #undef output_pixel
 
 #define output_pixel(pos, val) \
@@ -311,7 +410,8 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
     const uint8_t *chrDither = c->chrDither8;
     int i;
 
-    if (dstFormat == AV_PIX_FMT_NV12)
+    if (dstFormat == AV_PIX_FMT_NV12 ||
+        dstFormat == AV_PIX_FMT_NV24)
         for (i=0; i<chrDstW; i++) {
             int u = chrDither[i & 7] << 12;
             int v = chrDither[(i + 3) & 7] << 12;
@@ -754,11 +854,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
                 V  = av_clip_uint8(V);
             }
 
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-            U  = av_clip_uint8(U);
-            V  = av_clip_uint8(V);
-
             output_pixels(i * 4, Y1, U, Y2, V);
         }
     } else {
@@ -776,11 +871,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
                 V  = av_clip_uint8(V);
             }
 
-            Y1 = av_clip_uint8(Y1);
-            Y2 = av_clip_uint8(Y2);
-            U  = av_clip_uint8(U);
-            V  = av_clip_uint8(V);
-
             output_pixels(i * 4, Y1, U, Y2, V);
         }
     }
@@ -801,6 +891,99 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
         AV_WL16(pos, val); \
     }
 
+static av_always_inline void
+yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
+                        const int32_t **lumSrc, int lumFilterSize,
+                        const int16_t *chrFilter, const int32_t **unused_chrUSrc,
+                        const int32_t **unused_chrVSrc, int unused_chrFilterSize,
+                        const int32_t **alpSrc, uint16_t *dest, int dstW,
+                        int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+{
+    int hasAlpha = !!alpSrc;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int Y = 1 << 18;
+        int64_t A = 0xffff<<14;
+
+        for (j = 0; j < lumFilterSize; j++)
+            Y += lumSrc[j][i] * lumFilter[j];
+
+        Y >>= 15;
+        Y = av_clip_uint16(Y);
+
+        if (hasAlpha) {
+            for (j = 0; j < lumFilterSize; j++)
+                A += alpSrc[j][i] * lumFilter[j];
+
+            A >>= 15;
+            A = av_clip_uint16(A);
+        }
+
+        output_pixel(&dest[2 * i    ], Y);
+        output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
+    }
+}
+
+static av_always_inline void
+yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
+                        const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
+                        const int32_t *abuf[2], uint16_t *dest, int dstW,
+                        int yalpha, int unused_uvalpha, int y,
+                        enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+{
+    int hasAlpha = abuf && abuf[0] && abuf[1];
+    const int32_t *buf0  = buf[0],  *buf1  = buf[1],
+    *abuf0 = hasAlpha ? abuf[0] : NULL,
+    *abuf1 = hasAlpha ? abuf[1] : NULL;
+    int  yalpha1 = 4096 - yalpha;
+    int i;
+
+    av_assert2(yalpha  <= 4096U);
+
+    for (i = 0; i < dstW; i++) {
+        int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 15;
+        int A;
+
+        Y = av_clip_uint16(Y);
+
+        if (hasAlpha) {
+            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 15;
+            A = av_clip_uint16(A);
+        }
+
+        output_pixel(&dest[2 * i    ], Y);
+        output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
+    }
+}
+
+static av_always_inline void
+yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
+                        const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
+                        const int32_t *abuf0, uint16_t *dest, int dstW,
+                        int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
+{
+    int hasAlpha = !!abuf0;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int Y = buf0[i] >> 3;/* 19 - 16 */
+        int A;
+
+        Y = av_clip_uint16(Y);
+
+        if (hasAlpha) {
+            A = abuf0[i] >> 3;
+            if (A & 0x100)
+                A = av_clip_uint16(A);
+        }
+
+        output_pixel(&dest[2 * i    ], Y);
+        output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
+    }
+}
+
 static av_always_inline void
 yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int32_t **lumSrc, int lumFilterSize,
@@ -1306,6 +1489,8 @@ YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
 YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
+YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
 
 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
@@ -2303,10 +2488,17 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
         } else
             av_assert0(0);
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
+        *yuv2planeX = yuv2planeX_floatBE_c;
+        *yuv2plane1 = yuv2plane1_floatBE_c;
+    } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
+        *yuv2planeX = yuv2planeX_floatLE_c;
+        *yuv2plane1 = yuv2plane1_floatLE_c;
     } else {
         *yuv2plane1 = yuv2plane1_8_c;
         *yuv2planeX = yuv2planeX_8_c;
-        if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
+        if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21 ||
+            dstFormat == AV_PIX_FMT_NV24 || dstFormat == AV_PIX_FMT_NV42)
             *yuv2nv12cX = yuv2nv12cX_c;
     }
 
@@ -2730,6 +2922,16 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
         *yuv2packed2 = yuv2ya8_2_c;
         *yuv2packedX = yuv2ya8_X_c;
         break;
+    case AV_PIX_FMT_YA16LE:
+        *yuv2packed1 = yuv2ya16le_1_c;
+        *yuv2packed2 = yuv2ya16le_2_c;
+        *yuv2packedX = yuv2ya16le_X_c;
+        break;
+    case AV_PIX_FMT_YA16BE:
+        *yuv2packed1 = yuv2ya16be_1_c;
+        *yuv2packed2 = yuv2ya16be_2_c;
+        *yuv2packedX = yuv2ya16be_X_c;
+        break;
     case AV_PIX_FMT_AYUV64LE:
         *yuv2packedX = yuv2ayuv64le_X_c;
         break;