avcodec/mobiclip: Check that Motion vectors are within the input frame

[ffmpeg] / libavcodec / tiff.c
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c

index 411bf4dd40b0d1d493e792d6d84d95fc3fef6ed3..2e4546421888e1539a027c380d7caf4548bd2e62 100644 (file)
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -79,12 +79,13 @@ typedef struct TiffContext {
      int fill_order;
      uint32_t res[4];
      int is_thumbnail;
+    unsigned last_tag;
  
      int is_bayer;
      uint8_t pattern[4];
      unsigned black_level;
      unsigned white_level;
-    const uint16_t *dng_lut; // Pointer to DNG linearization table
+    uint16_t dng_lut[65536];
  
      uint32_t sub_ifd;
      uint16_t cur_page;
@@ -393,7 +394,7 @@ static int tiff_uncompress(uint8_t *dst, unsigned long *len, const uint8_t *src,
      z_stream zstream = { 0 };
      int zret;
  
-    zstream.next_in   = (uint8_t *)src;
+    zstream.next_in   = src;
      zstream.avail_in  = size;
      zstream.next_out  = dst;
      zstream.avail_out = *len;
@@ -589,7 +590,7 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
          av_assert0(s->bpp == 24);
      }
      if (s->is_bayer) {
-        width = (s->bpp * s->width + 7) >> 3;
+        av_assert0(width == (s->bpp * s->width + 7) >> 3);
      }
      if (p->format == AV_PIX_FMT_GRAY12) {
          av_fast_padded_malloc(&s->yuv_line, &s->yuv_line_size, width);
@@ -679,6 +680,9 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
          return 0;
      }
  
+    if (is_dng && stride == 0)
+        return AVERROR_INVALIDDATA;
+
      for (line = 0; line < lines; line++) {
          if (src - ssrc > size) {
              av_log(s->avctx, AV_LOG_ERROR, "Source data overread\n");
@@ -704,18 +708,20 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
  
              /* Color processing for DNG images with uncompressed strips (non-tiled) */
              if (is_dng) {
-                int is_u16, pixel_size_bytes, pixel_size_bits;
+                int is_u16, pixel_size_bytes, pixel_size_bits, elements;
  
-                is_u16 = (s->bpp > 8);
+                is_u16 = (s->bpp / s->bppcount > 8);
                  pixel_size_bits = (is_u16 ? 16 : 8);
                  pixel_size_bytes = (is_u16 ? sizeof(uint16_t) : sizeof(uint8_t));
  
+                elements = width / pixel_size_bytes * pixel_size_bits / s->bpp * s->bppcount; // need to account for [1, 16] bpp
+                av_assert0 (elements * pixel_size_bytes <= FFABS(stride));
                  dng_blit(s,
                           dst,
                           0, // no stride, only 1 line
                           dst,
                           0, // no stride, only 1 line
-                         width / pixel_size_bytes * pixel_size_bits / s->bpp * s->bppcount, // need to account for [1, 16] bpp
+                         elements,
                           1,
                           0, // single-component variation is only preset in JPEG-encoded DNGs
                           is_u16);
@@ -773,16 +779,8 @@ static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
      return 0;
  }
  
-static float av_always_inline linear_to_srgb(float value) {
-    if (value <= 0.0031308f)
-        return value * 12.92f;
-    else
-        return powf(value * 1.055f, 1.0f / 2.4f) - 0.055f;
-}
-
  /**
   * Map stored raw sensor values into linear reference values (see: DNG Specification - Chapter 5)
- * Then convert to sRGB color space.
   */
  static uint16_t av_always_inline dng_process_color16(uint16_t value,
                                                       const uint16_t *lut,
@@ -800,8 +798,7 @@ static uint16_t av_always_inline dng_process_color16(uint16_t value,
      // Color scaling
      value_norm = (float)value * scale_factor;
  
-    // Color space conversion (sRGB)
-    value = av_clip_uint16_c((uint16_t)(linear_to_srgb(value_norm) * 0xFFFF));
+    value = av_clip_uint16_c(value_norm * 65535);
  
      return value;
  }
@@ -863,8 +860,11 @@ static void dng_blit(TiffContext *s, uint8_t *dst, int dst_stride,
              }
          } else {
              for (line = 0; line < height; line++) {
+                uint8_t *dst_u8 = dst;
+                const uint8_t *src_u8 = src;
+
                  for (col = 0; col < width; col++)
-                    *dst++ = dng_process_color8(*src++, s->dng_lut, s->black_level, scale_factor);
+                    *dst_u8++ = dng_process_color8(*src_u8++, s->dng_lut, s->black_level, scale_factor);
  
                  dst += dst_stride;
                  src += src_stride;
@@ -883,6 +883,9 @@ static int dng_decode_jpeg(AVCodecContext *avctx, AVFrame *frame,
      int is_single_comp, is_u16, pixel_size;
      int ret;
  
+    if (tile_byte_count < 0 || tile_byte_count > bytestream2_get_bytes_left(&s->gb))
+        return AVERROR_INVALIDDATA;
+
      /* Prepare a packet and send to the MJPEG decoder */
      av_init_packet(&jpkt);
      jpkt.data = (uint8_t*)s->gb.buffer;
@@ -912,12 +915,28 @@ static int dng_decode_jpeg(AVCodecContext *avctx, AVFrame *frame,
              return 0;
      }
  
+    is_u16 = (s->bpp > 8);
+
      /* Copy the outputted tile's pixels from 'jpgframe' to 'frame' (final buffer) */
  
+    if (s->jpgframe->width  != s->avctx_mjpeg->width  ||
+        s->jpgframe->height != s->avctx_mjpeg->height ||
+        s->jpgframe->format != s->avctx_mjpeg->pix_fmt)
+        return AVERROR_INVALIDDATA;
+
      /* See dng_blit for explanation */
-    is_single_comp = (s->avctx_mjpeg->width == w * 2 && s->avctx_mjpeg->height == h / 2);
+    if (s->avctx_mjpeg->width  == w * 2 &&
+        s->avctx_mjpeg->height == h / 2 &&
+        s->avctx_mjpeg->pix_fmt == AV_PIX_FMT_GRAY16LE) {
+        is_single_comp = 1;
+    } else if (s->avctx_mjpeg->width  >= w &&
+               s->avctx_mjpeg->height >= h &&
+               s->avctx_mjpeg->pix_fmt == (is_u16 ? AV_PIX_FMT_GRAY16 : AV_PIX_FMT_GRAY8)
+              ) {
+        is_single_comp = 0;
+    } else
+        return AVERROR_INVALIDDATA;
  
-    is_u16 = (s->bpp > 8);
      pixel_size = (is_u16 ? sizeof(uint16_t) : sizeof(uint8_t));
  
      if (is_single_comp && !is_u16) {
@@ -1088,50 +1107,19 @@ static int init_image(TiffContext *s, ThreadFrame *frame)
      case 10101:
      case 10121:
      case 10141:
-        switch (AV_RL32(s->pattern)) {
-        case 0x02010100:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_RGGB16LE : AV_PIX_FMT_BAYER_RGGB16BE;
-            break;
-        case 0x00010102:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_BGGR16LE : AV_PIX_FMT_BAYER_BGGR16BE;
-            break;
-        case 0x01000201:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GBRG16LE : AV_PIX_FMT_BAYER_GBRG16BE;
-            break;
-        case 0x01020001:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GRBG16LE : AV_PIX_FMT_BAYER_GRBG16BE;
-            break;
-        default:
-            av_log(s->avctx, AV_LOG_ERROR, "Unsupported Bayer pattern: 0x%X\n",
-                   AV_RL32(s->pattern));
-            return AVERROR_PATCHWELCOME;
-        }
-        /* Force endianness as mentioned in 'DNG Specification: Chapter 3: BitsPerSample'
-           NOTE: The spec actually specifies big-endian, not sure why we need little-endian, but
-                 such images don't work otherwise. Examples are images produced by Zenmuse X7. */
-        if ((s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG)
-            && (s->bpp != 8 && s->bpp != 16 && s->bpp != 32)) {
-            switch (s->avctx->pix_fmt) {
-            case AV_PIX_FMT_BAYER_RGGB16BE: s->avctx->pix_fmt = AV_PIX_FMT_BAYER_RGGB16LE; break;
-            case AV_PIX_FMT_BAYER_BGGR16BE: s->avctx->pix_fmt = AV_PIX_FMT_BAYER_BGGR16LE; break;
-            case AV_PIX_FMT_BAYER_GBRG16BE: s->avctx->pix_fmt = AV_PIX_FMT_BAYER_GBRG16LE; break;
-            case AV_PIX_FMT_BAYER_GRBG16BE: s->avctx->pix_fmt = AV_PIX_FMT_BAYER_GRBG16LE; break;
-            }
-        }
-        break;
      case 10161:
          switch (AV_RL32(s->pattern)) {
          case 0x02010100:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_RGGB16LE : AV_PIX_FMT_BAYER_RGGB16BE;
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_RGGB16;
              break;
          case 0x00010102:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_BGGR16LE : AV_PIX_FMT_BAYER_BGGR16BE;
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_BGGR16;
              break;
          case 0x01000201:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GBRG16LE : AV_PIX_FMT_BAYER_GBRG16BE;
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_GBRG16;
              break;
          case 0x01020001:
-            s->avctx->pix_fmt = s->le ? AV_PIX_FMT_BAYER_GRBG16LE : AV_PIX_FMT_BAYER_GRBG16BE;
+            s->avctx->pix_fmt = AV_PIX_FMT_BAYER_GRBG16;
              break;
          default:
              av_log(s->avctx, AV_LOG_ERROR, "Unsupported Bayer pattern: 0x%X\n",
@@ -1258,6 +1246,8 @@ static void set_sar(TiffContext *s, unsigned tag, unsigned num, unsigned den)
  
  static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
  {
+    AVFrameSideData *sd;
+    GetByteContext gb_temp;
      unsigned tag, type, count, off, value = 0, value2 = 1; // value2 is a denominator so init. to 1
      int i, start;
      int pos;
@@ -1268,6 +1258,12 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
      if (ret < 0) {
          goto end;
      }
+    if (tag <= s->last_tag)
+        return AVERROR_INVALIDDATA;
+
+    // We ignore TIFF_STRIP_SIZE as it is sometimes in the logic but wrong order around TIFF_STRIP_OFFS
+    if (tag != TIFF_STRIP_SIZE)
+        s->last_tag = tag;
  
      off = bytestream2_tell(&s->gb);
      if (count == 1) {
@@ -1280,6 +1276,11 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
          case TIFF_RATIONAL:
              value  = ff_tget(&s->gb, TIFF_LONG, s->le);
              value2 = ff_tget(&s->gb, TIFF_LONG, s->le);
+            if (!value2) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid denominator in rational\n");
+                return AVERROR_INVALIDDATA;
+            }
+
              break;
          case TIFF_STRING:
              if (count <= 4) {
@@ -1301,7 +1302,7 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
          s->height = value;
          break;
      case TIFF_BPP:
-        if (count > 5U) {
+        if (count > 5 || count <= 0) {
              av_log(s->avctx, AV_LOG_ERROR,
                     "This format is not supported (bpp=%d, %d components)\n",
                     value, count);
@@ -1332,9 +1333,9 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
                     "Samples per pixel requires a single value, many provided\n");
              return AVERROR_INVALIDDATA;
          }
-        if (value > 5U) {
+        if (value > 5 || value <= 0) {
              av_log(s->avctx, AV_LOG_ERROR,
-                   "Samples per pixel %d is too large\n", value);
+                   "Invalid samples per pixel %d\n", value);
              return AVERROR_INVALIDDATA;
          }
          if (s->bppcount == 1)
@@ -1444,23 +1445,21 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
          else if (count > 1)
              s->sub_ifd = ff_tget(&s->gb, TIFF_LONG, s->le); /** Only get the first SubIFD */
          break;
-    case DNG_LINEARIZATION_TABLE: {
-        uint32_t lut_offset = value;
-        uint32_t lut_size = count;
-        uint32_t lut_wanted_size = 1 << s->bpp;
-        if (lut_wanted_size != lut_size)
-            av_log(s->avctx, AV_LOG_WARNING, "DNG contains LUT with invalid size (%"PRIu32"), disabling LUT\n", lut_size);
-        else if (lut_offset >= bytestream2_size(&s->gb))
-            av_log(s->avctx, AV_LOG_WARNING, "DNG contains LUT with invalid offset (%"PRIu32"), disabling LUT\n", lut_offset);
-        else
-            s->dng_lut = (uint16_t*)(s->gb.buffer + lut_offset);
+    case DNG_LINEARIZATION_TABLE:
+        if (count > FF_ARRAY_ELEMS(s->dng_lut))
+            return AVERROR_INVALIDDATA;
+        for (int i = 0; i < count; i++)
+            s->dng_lut[i] = ff_tget(&s->gb, type, s->le);
          break;
-    }
      case DNG_BLACK_LEVEL:
          if (count > 1) {    /* Use the first value in the pattern (assume they're all the same) */
              if (type == TIFF_RATIONAL) {
                  value  = ff_tget(&s->gb, TIFF_LONG, s->le);
                  value2 = ff_tget(&s->gb, TIFF_LONG, s->le);
+                if (!value2) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Invalid black level denominator\n");
+                    return AVERROR_INVALIDDATA;
+                }
  
                  s->black_level = value / value2;
              } else
@@ -1496,6 +1495,7 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
          case TIFF_PHOTOMETRIC_SEPARATED:
          case TIFF_PHOTOMETRIC_YCBCR:
          case TIFF_PHOTOMETRIC_CFA:
+        case TIFF_PHOTOMETRIC_LINEAR_RAW: // Used by DNG images
              s->photometric = value;
              break;
          case TIFF_PHOTOMETRIC_ALPHA_MASK:
@@ -1504,7 +1504,6 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
          case TIFF_PHOTOMETRIC_ITU_LAB:
          case TIFF_PHOTOMETRIC_LOG_L:
          case TIFF_PHOTOMETRIC_LOG_LUV:
-        case TIFF_PHOTOMETRIC_LINEAR_RAW:
              avpriv_report_missing_feature(s->avctx,
                                            "PhotometricInterpretation 0x%04X",
                                            value);
@@ -1591,7 +1590,7 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
          break;
      case TIFF_GEO_KEY_DIRECTORY:
          if (s->geotag_count) {
-            avpriv_request_sample(s->avctx, "Multiple geo key directories\n");
+            avpriv_request_sample(s->avctx, "Multiple geo key directories");
              return AVERROR_INVALIDDATA;
          }
          ADD_METADATA(1, "GeoTIFF_Version", NULL);
@@ -1682,6 +1681,19 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
              }
          }
          break;
+    case TIFF_ICC_PROFILE:
+        gb_temp = s->gb;
+        bytestream2_seek(&gb_temp, SEEK_SET, off);
+
+        if (bytestream2_get_bytes_left(&gb_temp) < count)
+            return AVERROR_INVALIDDATA;
+
+        sd = av_frame_new_side_data(frame, AV_FRAME_DATA_ICC_PROFILE, count);
+        if (!sd)
+            return AVERROR(ENOMEM);
+
+        bytestream2_get_bufferu(&gb_temp, sd->data, count);
+        break;
      case TIFF_ARTIST:
          ADD_METADATA(count, "artist", NULL);
          break;
@@ -1776,6 +1788,8 @@ static int decode_frame(AVCodecContext *avctx,
      GetByteContext stripsizes;
      GetByteContext stripdata;
      int retry_for_subifd, retry_for_page;
+    int is_dng;
+    int has_tile_bits, has_strip_bits;
  
      bytestream2_init(&s->gb, avpkt->data, avpkt->size);
  
@@ -1802,7 +1816,11 @@ again:
      s->is_tiled    = 0;
      s->is_jpeg     = 0;
      s->cur_page    = 0;
-    s->dng_lut     = NULL;
+    s->last_tag    = 0;
+
+    for (i = 0; i < 65536; i++)
+        s->dng_lut[i] = i;
+
      free_geotags(s);
  
      // Reset these offsets so we can tell if they were set this frame
@@ -1842,7 +1860,7 @@ again:
              return AVERROR_INVALIDDATA;
          }
          if (off <= last_off) {
-            avpriv_request_sample(s->avctx, "non increasing IFD offset\n");
+            avpriv_request_sample(s->avctx, "non increasing IFD offset");
              return AVERROR_INVALIDDATA;
          }
          if (off >= UINT_MAX - 14 || avpkt->size < off + 14) {
@@ -1853,6 +1871,10 @@ again:
          goto again;
      }
  
+    /* At this point we've decided on which (Sub)IFD to process */
+
+    is_dng = (s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG);
+
      for (i = 0; i<s->geotag_count; i++) {
          const char *keyname = get_geokey_name(s->geotags[i].key);
          if (!keyname) {
@@ -1870,10 +1892,40 @@ again:
          }
      }
  
+    if (is_dng) {
+        int bps;
+
+        if (s->bpp % s->bppcount)
+            return AVERROR_INVALIDDATA;
+        bps = s->bpp / s->bppcount;
+        if (bps < 8 || bps > 32)
+            return AVERROR_INVALIDDATA;
+
+        if (s->white_level == 0)
+            s->white_level = (1LL << bps) - 1; /* Default value as per the spec */
+
+        if (s->white_level <= s->black_level) {
+            av_log(avctx, AV_LOG_ERROR, "BlackLevel (%"PRId32") must be less than WhiteLevel (%"PRId32")\n",
+                s->black_level, s->white_level);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (s->planar)
+            return AVERROR_PATCHWELCOME;
+    }
+
      if (!s->is_tiled && !s->strippos && !s->stripoff) {
          av_log(avctx, AV_LOG_ERROR, "Image data is missing\n");
          return AVERROR_INVALIDDATA;
      }
+
+    has_tile_bits  = s->is_tiled || s->tile_byte_counts_offset || s->tile_offsets_offset || s->tile_width || s->tile_length || s->tile_count;
+    has_strip_bits = s->strippos || s->strips || s->stripoff || s->rps || s->sot || s->sstype || s->stripsize || s->stripsizesoff;
+
+    if (has_tile_bits && has_strip_bits) {
+        av_log(avctx, AV_LOG_WARNING, "Tiled TIFF is not allowed to strip\n");
+    }
+
      /* now we have the data and may start decoding */
      if ((ret = init_image(s, &frame)) < 0)
          return ret;
@@ -1903,9 +1955,16 @@ again:
          }
      }
  
+    if (s->photometric == TIFF_PHOTOMETRIC_LINEAR_RAW ||
+        s->photometric == TIFF_PHOTOMETRIC_CFA) {
+        p->color_trc = AVCOL_TRC_LINEAR;
+    } else if (s->photometric == TIFF_PHOTOMETRIC_BLACK_IS_ZERO) {
+        p->color_trc = AVCOL_TRC_GAMMA22;
+    }
+
      /* Handle DNG images with JPEG-compressed tiles */
  
-    if ((s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG) && s->is_tiled) {
+    if (is_dng && s->is_tiled) {
          if (!s->is_jpeg) {
              avpriv_report_missing_feature(avctx, "DNG uncompressed tiled images");
              return AVERROR_PATCHWELCOME;
@@ -2063,8 +2122,7 @@ again:
          FFSWAP(int,      p->linesize[0], p->linesize[1]);
      }
  
-    if (s->is_bayer && s->white_level && s->bpp == 16 &&
-        !(s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG)) {
+    if (s->is_bayer && s->white_level && s->bpp == 16 && !is_dng) {
          uint16_t *dst = (uint16_t *)p->data[0];
          for (i = 0; i < s->height; i++) {
              for (j = 0; j < s->width; j++)
@@ -2112,8 +2170,6 @@ static av_cold int tiff_init(AVCodecContext *avctx)
      s->avctx_mjpeg->idct_algo = avctx->idct_algo;
      ret = ff_codec_open2_recursive(s->avctx_mjpeg, codec, NULL);
      if (ret < 0) {
-        av_frame_free(&s->jpgframe);
-        avcodec_free_context(&s->avctx_mjpeg);
          return ret;
      }
  
@@ -2162,7 +2218,7 @@ AVCodec ff_tiff_decoder = {
      .init           = tiff_init,
      .close          = tiff_end,
      .decode         = decode_frame,
-    .init_thread_copy = ONLY_IF_THREADS_ENABLED(tiff_init),
      .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
      .priv_class     = &tiff_decoder_class,
  };