vaapi_h264: Convert to use coded bitstream infrastructure

[ffmpeg] / libswscale / output.c
diff --git a/libswscale/output.c b/libswscale/output.c

index 4ea2e4858abce7334f11970d06d9f8c16e7ee82d..07edcfd4cc45bc2d1546c29334bae9745107dce4 100644 (file)
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -18,7 +18,6 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
-#include <assert.h>
  #include <math.h>
  #include <stdint.h>
  #include <stdio.h>
@@ -46,14 +45,14 @@ DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
  {  0,   4,   0,   4,   0,   4,   0,   4, },
  };
  
-DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[4][8] = {
  {  8,   4,  11,   7,   8,   4,  11,   7, },
  {  2,  14,   1,  13,   2,  14,   1,  13, },
  { 10,   6,   9,   5,  10,   6,   9,   5, },
  {  0,  12,   3,  15,   0,  12,   3,  15, },
  };
  
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[8][8] = {
  { 17,   9,  23,  15,  16,   8,  22,  14, },
  {  5,  29,   3,  27,   4,  28,   2,  26, },
  { 21,  13,  19,  11,  20,  12,  18,  10, },
@@ -64,7 +63,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
  {  1,  25,   7,  31,   0,  24,   6,  30, },
  };
  
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[8][8] = {
  {  0,  55,  14,  68,   3,  58,  17,  72, },
  { 37,  18,  50,  32,  40,  22,  54,  35, },
  {  9,  64,   5,  59,  13,  67,   8,  63, },
@@ -76,7 +75,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
  };
  
  #if 1
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
  {117,  62, 158, 103, 113,  58, 155, 100, },
  { 34, 199,  21, 186,  31, 196,  17, 182, },
  {144,  89, 131,  76, 141,  86, 127,  72, },
@@ -88,7 +87,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  };
  #elif 1
  // tries to correct a gamma of 1.5
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
  {  0, 143,  18, 200,   2, 156,  25, 215, },
  { 78,  28, 125,  64,  89,  36, 138,  74, },
  { 10, 180,   3, 161,  16, 195,   8, 175, },
@@ -100,7 +99,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  };
  #elif 1
  // tries to correct a gamma of 2.0
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
  {  0, 124,   8, 193,   0, 140,  12, 213, },
  { 55,  14, 104,  42,  66,  19, 119,  52, },
  {  3, 168,   1, 145,   6, 187,   3, 162, },
@@ -112,7 +111,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  };
  #else
  // tries to correct a gamma of 2.5
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
  {  0, 107,   3, 187,   0, 125,   6, 212, },
  { 39,   7,  86,  28,  49,  11, 102,  36, },
  {  1, 158,   0, 131,   3, 180,   1, 151, },
@@ -131,6 +130,9 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
          AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
      }
  
+// Shifting negative amounts is undefined in C
+#define SHIFT_LEFT(val, shift) ((val) * (1 << (shift)))
+
  static av_always_inline void
  yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
                           int big_endian, int output_bits)
@@ -232,6 +234,8 @@ yuv2NBPS( 9, BE, 1, 10, int16_t)
  yuv2NBPS( 9, LE, 0, 10, int16_t)
  yuv2NBPS(10, BE, 1, 10, int16_t)
  yuv2NBPS(10, LE, 0, 10, int16_t)
+yuv2NBPS(12, BE, 1, 10, int16_t)
+yuv2NBPS(12, LE, 0, 10, int16_t)
  yuv2NBPS(16, BE, 1, 16, int32_t)
  yuv2NBPS(16, LE, 0, 16, int32_t)
  
@@ -314,7 +318,7 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int16_t **alpSrc, uint8_t *dest, int dstW,
                        int y, enum AVPixelFormat target)
  {
-    const uint8_t * const d128=dither_8x8_220[y&7];
+    const uint8_t * const d128 = ff_dither_8x8_220[y&7];
      int i;
      unsigned acc = 0;
  
@@ -353,7 +357,7 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
                        enum AVPixelFormat target)
  {
      const int16_t *buf0  = buf[0],  *buf1  = buf[1];
-    const uint8_t * const d128 = dither_8x8_220[y & 7];
+    const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
      int  yalpha1 = 4096 - yalpha;
      int i;
  
@@ -387,7 +391,7 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
                        const int16_t *abuf0, uint8_t *dest, int dstW,
                        int uvalpha, int y, enum AVPixelFormat target)
  {
-    const uint8_t * const d128 = dither_8x8_220[y & 7];
+    const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
      int i;
  
      for (i = 0; i < dstW; i += 8) {
@@ -450,7 +454,12 @@ YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK)
          dest[pos + 1] = U;  \
          dest[pos + 2] = Y2; \
          dest[pos + 3] = V;  \
-    } else { \
+    } else if (target == AV_PIX_FMT_YVYU422) { \
+        dest[pos + 0] = Y1; \
+        dest[pos + 1] = V;  \
+        dest[pos + 2] = Y2; \
+        dest[pos + 3] = U;  \
+    } else { /* AV_PIX_FMT_UYVY422 */ \
          dest[pos + 0] = U;  \
          dest[pos + 1] = Y1; \
          dest[pos + 2] = V;  \
@@ -569,6 +578,7 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
  #undef output_pixels
  
  YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
+YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422)
  YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
  
  #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? R : B)
@@ -594,8 +604,8 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
          int j;
          int Y1 = -0x40000000;
          int Y2 = -0x40000000;
-        int U  = -128 << 23; // 19
-        int V  = -128 << 23;
+        int U  = SHIFT_LEFT(-128, 23); // 19
+        int V  = SHIFT_LEFT(-128, 23);
          int R, G, B;
  
          for (j = 0; j < lumFilterSize; j++) {
@@ -607,7 +617,7 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
              V += chrVSrc[j][i] * chrFilter[j];
          }
  
-        // 8bit: 12+15=27; 16-bit: 12+19=31
+        // 8 bits: 12+15=27; 16 bits: 12+19=31
          Y1 >>= 14; // 10
          Y1 += 0x10000;
          Y2 >>= 14;
@@ -615,20 +625,20 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
          U  >>= 14;
          V  >>= 14;
  
-        // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
+        // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits
          Y1 -= c->yuv2rgb_y_offset;
          Y2 -= c->yuv2rgb_y_offset;
          Y1 *= c->yuv2rgb_y_coeff;
          Y2 *= c->yuv2rgb_y_coeff;
          Y1 += 1 << 13; // 21
          Y2 += 1 << 13;
-        // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
+        // 8 bits: 17 + 13 bits = 30 bits, 16 bits: 17 + 13 bits = 30 bits
  
          R = V * c->yuv2rgb_v2r_coeff;
          G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
          B =                            U * c->yuv2rgb_u2b_coeff;
  
-        // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
+        // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
          output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
          output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
          output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
@@ -656,8 +666,8 @@ yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
      for (i = 0; i < ((dstW + 1) >> 1); i++) {
          int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
          int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
+        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + SHIFT_LEFT(-128, 23)) >> 14;
+        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + SHIFT_LEFT(-128, 23)) >> 14;
          int R, G, B;
  
          Y1 -= c->yuv2rgb_y_offset;
@@ -694,8 +704,8 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
          for (i = 0; i < ((dstW + 1) >> 1); i++) {
              int Y1 = (buf0[i * 2]    ) >> 2;
              int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
-            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
+            int U  = (ubuf0[i] + SHIFT_LEFT(-128, 11)) >> 2;
+            int V  = (vbuf0[i] + SHIFT_LEFT(-128, 11)) >> 2;
              int R, G, B;
  
              Y1 -= c->yuv2rgb_y_offset;
@@ -722,8 +732,8 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
          for (i = 0; i < ((dstW + 1) >> 1); i++) {
              int Y1 = (buf0[i * 2]    ) >> 2;
              int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
-            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
+            int U  = (ubuf0[i] + ubuf1[i] + SHIFT_LEFT(-128, 12)) >> 3;
+            int V  = (vbuf0[i] + vbuf1[i] + SHIFT_LEFT(-128, 12)) >> 3;
              int R, G, B;
  
              Y1 -= c->yuv2rgb_y_offset;
@@ -880,17 +890,17 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
              dg2 = dither_2x2_8[ y & 1     ][0];
              db2 = dither_2x2_8[(y & 1) ^ 1][1];
          } else {
-            dr1 = dither_4x4_16[ y & 3     ][0];
-            dg1 = dither_4x4_16[ y & 3     ][1];
-            db1 = dither_4x4_16[(y & 3) ^ 3][0];
-            dr2 = dither_4x4_16[ y & 3     ][1];
-            dg2 = dither_4x4_16[ y & 3     ][0];
-            db2 = dither_4x4_16[(y & 3) ^ 3][1];
+            dr1 = ff_dither_4x4_16[ y & 3     ][0];
+            dg1 = ff_dither_4x4_16[ y & 3     ][1];
+            db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
+            dr2 = ff_dither_4x4_16[ y & 3     ][1];
+            dg2 = ff_dither_4x4_16[ y & 3     ][0];
+            db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
          }
  
          dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
          dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
-    } else /* 8/4-bit */ {
+    } else /* 8/4 bits */ {
          uint8_t *dest = (uint8_t *) _dest;
          const uint8_t *r = (const uint8_t *) _r;
          const uint8_t *g = (const uint8_t *) _g;
@@ -898,15 +908,15 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
          int dr1, dg1, db1, dr2, dg2, db2;
  
          if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
-            const uint8_t * const d64 = dither_8x8_73[y & 7];
-            const uint8_t * const d32 = dither_8x8_32[y & 7];
+            const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
+            const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
              dr1 = dg1 = d32[(i * 2 + 0) & 7];
              db1 =       d64[(i * 2 + 0) & 7];
              dr2 = dg2 = d32[(i * 2 + 1) & 7];
              db2 =       d64[(i * 2 + 1) & 7];
          } else {
-            const uint8_t * const d64  = dither_8x8_73 [y & 7];
-            const uint8_t * const d128 = dither_8x8_220[y & 7];
+            const uint8_t * const d64  = ff_dither_8x8_73 [y & 7];
+            const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
              dr1 = db1 = d128[(i * 2 + 0) & 7];
              dg1 =        d64[(i * 2 + 0) & 7];
              dr2 = db2 = d128[(i * 2 + 1) & 7];
@@ -1165,8 +1175,8 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
      for (i = 0; i < dstW; i++) {
          int j;
          int Y = 0;
-        int U = -128 << 19;
-        int V = -128 << 19;
+        int U = SHIFT_LEFT(-128, 19);
+        int V = SHIFT_LEFT(-128, 19);
          int R, G, B, A;
  
          for (j = 0; j < lumFilterSize; j++) {
@@ -1269,8 +1279,11 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
                    const int16_t **alpSrc, uint8_t **dest,
                    int dstW, int y)
  {
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
      int i;
-    int hasAlpha = 0;
+    int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
+    uint16_t **dest16 = (uint16_t**)dest;
+    int SH = 22 + 8 - desc->comp[0].depth;
  
      for (i = 0; i < dstW; i++) {
          int j;
@@ -1316,9 +1329,28 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
              B = av_clip_uintp2(B, 30);
          }
  
-        dest[0][i] = G >> 22;
-        dest[1][i] = B >> 22;
-        dest[2][i] = R >> 22;
+        if (SH != 22) {
+            dest16[0][i] = G >> SH;
+            dest16[1][i] = B >> SH;
+            dest16[2][i] = R >> SH;
+            if (hasAlpha)
+                dest16[3][i] = A;
+        } else {
+            dest[0][i] = G >> 22;
+            dest[1][i] = B >> 22;
+            dest[2][i] = R >> 22;
+            if (hasAlpha)
+                dest[3][i] = A;
+        }
+    }
+    if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
+        for (i = 0; i < dstW; i++) {
+            dest16[0][i] = av_bswap16(dest16[0][i]);
+            dest16[1][i] = av_bswap16(dest16[1][i]);
+            dest16[2][i] = av_bswap16(dest16[2][i]);
+            if (hasAlpha)
+                dest16[3][i] = av_bswap16(dest16[3][i]);
+        }
      }
  }
  
@@ -1337,13 +1369,18 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
      if (is16BPS(dstFormat)) {
          *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
          *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
-    } else if (is9_OR_10BPS(dstFormat)) {
-        if (desc->comp[0].depth_minus1 == 8) {
+    } else if (is9_15BPS(dstFormat)) {
+        if (desc->comp[0].depth == 9) {
              *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
              *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
-        } else {
+        } else if (desc->comp[0].depth == 10) {
              *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
              *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
+        } else if (desc->comp[0].depth == 12) {
+            *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c  : yuv2planeX_12LE_c;
+            *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c  : yuv2plane1_12LE_c;
+        } else {
+            assert(0);
          }
      } else {
          *yuv2plane1 = yuv2plane1_8_c;
@@ -1417,6 +1454,19 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
              *yuv2packedX = yuv2bgr24_full_X_c;
              break;
          case AV_PIX_FMT_GBRP:
+        case AV_PIX_FMT_GBRP9BE:
+        case AV_PIX_FMT_GBRP9LE:
+        case AV_PIX_FMT_GBRP10BE:
+        case AV_PIX_FMT_GBRP10LE:
+        case AV_PIX_FMT_GBRP12BE:
+        case AV_PIX_FMT_GBRP12LE:
+        case AV_PIX_FMT_GBRAP10BE:
+        case AV_PIX_FMT_GBRAP10LE:
+        case AV_PIX_FMT_GBRAP12BE:
+        case AV_PIX_FMT_GBRAP12LE:
+        case AV_PIX_FMT_GBRP16BE:
+        case AV_PIX_FMT_GBRP16LE:
+        case AV_PIX_FMT_GBRAP:
              *yuv2anyX = yuv2gbrp_full_X_c;
              break;
          }
@@ -1554,6 +1604,11 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
          *yuv2packed2 = yuv2yuyv422_2_c;
          *yuv2packedX = yuv2yuyv422_X_c;
          break;
+    case AV_PIX_FMT_YVYU422:
+        *yuv2packed1 = yuv2yvyu422_1_c;
+        *yuv2packed2 = yuv2yvyu422_2_c;
+        *yuv2packedX = yuv2yvyu422_X_c;
+        break;
      case AV_PIX_FMT_UYVY422:
          *yuv2packed1 = yuv2uyvy422_1_c;
          *yuv2packed2 = yuv2uyvy422_2_c;