aarch64: vp9: Add NEON optimizations of VP9 MC functions

[ffmpeg] / libavcodec / ffv1dec.c
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c

index f9b094dab7c18e37727a5e9e15391aef7f1cd5ed..d3169ec7c52ae400797ffe4adaaf1440dc01e60a 100644 (file)
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -30,6 +30,7 @@
  #include "libavutil/crc.h"
  #include "libavutil/opt.h"
  #include "libavutil/imgutils.h"
+#include "libavutil/timer.h"
  #include "avcodec.h"
  #include "internal.h"
  #include "get_bits.h"
@@ -79,15 +80,10 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
      assert(k <= 8);
  
      v = get_sr_golomb(gb, k, 12, bits);
-    av_dlog(NULL, "v:%d bias:%d error:%d drift:%d count:%d k:%d",
+    ff_dlog(NULL, "v:%d bias:%d error:%d drift:%d count:%d k:%d",
              v, state->bias, state->error_sum, state->drift, state->count, k);
  
-#if 0 // JPEG LS
-    if (k == 0 && 2 * state->drift <= -state->count)
-        v ^= (-1);
-#else
      v ^= ((2 * state->drift + state->count) >> 31);
-#endif
  
      ret = fold(v + state->bias, bits);
  
@@ -119,7 +115,7 @@ static av_always_inline void decode_line(FFV1Context *s, int w,
  
          av_assert2(context < p->context_count);
  
-        if (s->ac) {
+        if (s->ac != AC_GOLOMB_RICE) {
              diff = get_symbol_inline(c, p->state[context], 1);
          } else {
              if (context == 0 && run_mode == 0)
@@ -154,7 +150,7 @@ static av_always_inline void decode_line(FFV1Context *s, int w,
              } else
                  diff = get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
  
-            av_dlog(s->avctx, "count:%d index:%d, mode:%d, x:%d pos:%d\n",
+            ff_dlog(s->avctx, "count:%d index:%d, mode:%d, x:%d pos:%d\n",
                      run_count, run_index, run_mode, x, get_bits_count(&s->gb));
          }
  
@@ -273,7 +269,7 @@ static int decode_slice_header(FFV1Context *f, FFV1Context *fs)
      unsigned ps, i, context_count;
      memset(state, 128, sizeof(state));
  
-    if (fs->ac > 1) {
+    if (fs->ac == AC_RANGE_CUSTOM_TAB) {
          for (i = 1; i < 256; i++) {
              fs->c.one_state[i]        = f->state_transition[i];
              fs->c.zero_state[256 - i] = 256 - fs->c.one_state[i];
@@ -327,6 +323,14 @@ static int decode_slice_header(FFV1Context *f, FFV1Context *fs)
      f->cur->sample_aspect_ratio.num = get_symbol(c, state, 0);
      f->cur->sample_aspect_ratio.den = get_symbol(c, state, 0);
  
+    if (av_image_check_sar(f->width, f->height,
+                           f->cur->sample_aspect_ratio) < 0) {
+        av_log(f->avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
+               f->cur->sample_aspect_ratio.num,
+               f->cur->sample_aspect_ratio.den);
+        f->cur->sample_aspect_ratio = (AVRational){ 0, 1 };
+    }
+
      return 0;
  }
  
@@ -335,7 +339,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
      FFV1Context *fs = *(void **)arg;
      FFV1Context *f  = fs->avctx->priv_data;
      int width, height, x, y, ret;
-    const int ps = (av_pix_fmt_desc_get(c->pix_fmt)->flags & PIX_FMT_PLANAR)
+    const int ps = (av_pix_fmt_desc_get(c->pix_fmt)->flags & AV_PIX_FMT_FLAG_PLANAR)
                     ? (c->bits_per_raw_sample > 8) + 1
                     : 4;
      AVFrame *const p = f->cur;
@@ -355,7 +359,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
      x      = fs->slice_x;
      y      = fs->slice_y;
  
-    if (!fs->ac) {
+    if (fs->ac == AC_GOLOMB_RICE) {
          if (f->version == 3 && f->minor_version > 1 || f->version > 3)
              get_rac(&fs->c, (uint8_t[]) { 129 });
          fs->ac_byte_count = f->version > 2 || (!x && !y) ? fs->c.bytestream - fs->c.bytestream_start - 1 : 0;
@@ -366,8 +370,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
  
      av_assert1(width && height);
      if (f->colorspace == 0) {
-        const int chroma_width  = -((-width) >> f->chroma_h_shift);
-        const int chroma_height = -((-height) >> f->chroma_v_shift);
+        const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
+        const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
          const int cx            = x >> f->chroma_h_shift;
          const int cy            = y >> f->chroma_v_shift;
          decode_plane(fs, p->data[0] + ps * x + y * p->linesize[0], width,
@@ -392,7 +396,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
                                 p->data[2] + ps * x + y * p->linesize[2] };
          decode_rgb_frame(fs, planes, width, height, p->linesize);
      }
-    if (fs->ac && f->version > 2) {
+    if (fs->ac != AC_GOLOMB_RICE && f->version > 2) {
          int v;
          get_rac(&fs->c, (uint8_t[]) { 129 });
          v = fs->c.bytestream_end - fs->c.bytestream - 2 - 5 * f->ec;
@@ -468,9 +472,9 @@ static int read_extra_header(FFV1Context *f)
          c->bytestream_end -= 4;
          f->minor_version   = get_symbol(c, state, 0);
      }
-    f->ac = f->avctx->coder_type = get_symbol(c, state, 0);
+    f->ac = get_symbol(c, state, 0);
  
-    if (f->ac > 1) {
+    if (f->ac == AC_RANGE_CUSTOM_TAB) {
          for (i = 1; i < 256; i++)
              f->state_transition[i] = get_symbol(c, state, 1) + c->one_state[i];
      }
@@ -528,6 +532,13 @@ static int read_extra_header(FFV1Context *f)
          }
      }
  
+    av_log(f->avctx, AV_LOG_VERBOSE,
+           "FFV1 version %d.%d colorspace %d - %d bits - %d/%d planes, %s transparent - tile geometry %dx%d - %s\n",
+           f->version, f->minor_version, f->colorspace, f->avctx->bits_per_raw_sample,
+           f->plane_count, f->chroma_planes, f->transparency ? "" : "not",
+           f->num_h_slices, f->num_v_slices,
+           f->ec ? "per-slice crc" : "no crc");
+
      return 0;
  }
  
@@ -541,6 +552,7 @@ static int read_header(FFV1Context *f)
      memset(state, 128, sizeof(state));
  
      if (f->version < 2) {
+        int chroma_planes, chroma_h_shift, chroma_v_shift, transparency, colorspace, bits_per_raw_sample;
          unsigned v = get_symbol(c, state, 0);
          if (v > 1) {
              av_log(f->avctx, AV_LOG_ERROR,
@@ -549,27 +561,50 @@ static int read_header(FFV1Context *f)
          }
          f->version = v;
  
-        f->ac = f->avctx->coder_type = get_symbol(c, state, 0);
+        f->ac = get_symbol(c, state, 0);
  
-        if (f->ac > 1) {
+        if (f->ac == AC_RANGE_CUSTOM_TAB) {
              for (i = 1; i < 256; i++)
                  f->state_transition[i] =
                      get_symbol(c, state, 1) + c->one_state[i];
          }
  
-        f->colorspace = get_symbol(c, state, 0); //YUV cs type
+        colorspace          = get_symbol(c, state, 0); //YUV cs type
+        bits_per_raw_sample = f->version > 0 ? get_symbol(c, state, 0) : f->avctx->bits_per_raw_sample;
+        chroma_planes       = get_rac(c, state);
+        chroma_h_shift      = get_symbol(c, state, 0);
+        chroma_v_shift      = get_symbol(c, state, 0);
+        transparency        = get_rac(c, state);
+
+        if (f->plane_count) {
+            if (colorspace          != f->colorspace                 ||
+                bits_per_raw_sample != f->avctx->bits_per_raw_sample ||
+                chroma_planes       != f->chroma_planes              ||
+                chroma_h_shift      != f->chroma_h_shift             ||
+                chroma_v_shift      != f->chroma_v_shift             ||
+                transparency        != f->transparency) {
+                av_log(f->avctx, AV_LOG_ERROR, "Invalid change of global parameters\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
  
-        if (f->version > 0)
-            f->avctx->bits_per_raw_sample = get_symbol(c, state, 0);
+        f->colorspace                 = colorspace;
+        f->avctx->bits_per_raw_sample = bits_per_raw_sample;
+        f->chroma_planes              = chroma_planes;
+        f->chroma_h_shift             = chroma_h_shift;
+        f->chroma_v_shift             = chroma_v_shift;
+        f->transparency               = transparency;
  
-        f->chroma_planes  = get_rac(c, state);
-        f->chroma_h_shift = get_symbol(c, state, 0);
-        f->chroma_v_shift = get_symbol(c, state, 0);
-        f->transparency   = get_rac(c, state);
          f->plane_count    = 2 + f->transparency;
      }
  
      if (f->colorspace == 0) {
+        if (f->transparency && f->avctx->bits_per_raw_sample > 8) {
+            av_log(f->avctx, AV_LOG_ERROR,
+                   "Transparency not supported for bit depth %d\n",
+                   f->avctx->bits_per_raw_sample);
+            return AVERROR(ENOSYS);
+        }
          if (!f->transparency && !f->chroma_planes) {
              if (f->avctx->bits_per_raw_sample <= 8)
                  f->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
@@ -668,7 +703,13 @@ static int read_header(FFV1Context *f)
                     "chroma subsampling not supported in this colorspace\n");
              return AVERROR(ENOSYS);
          }
+        if (f->transparency) {
+            av_log(f->avctx, AV_LOG_ERROR,
+                   "Transparency not supported in this colorspace\n");
+                   return AVERROR(ENOSYS);
+        }
          switch (f->avctx->bits_per_raw_sample) {
+        case 0:
          case 8:
              f->avctx->pix_fmt = AV_PIX_FMT_RGB32;
              break;
@@ -689,7 +730,7 @@ static int read_header(FFV1Context *f)
          return AVERROR(ENOSYS);
      }
  
-    av_dlog(f->avctx, "%d %d %d\n",
+    ff_dlog(f->avctx, "%d %d %d\n",
              f->chroma_h_shift, f->chroma_v_shift, f->avctx->pix_fmt);
      if (f->version < 2) {
          context_count = read_quant_tables(c, f->quant_table);
@@ -734,8 +775,8 @@ static int read_header(FFV1Context *f)
  
              fs->slice_x      /= f->num_h_slices;
              fs->slice_y      /= f->num_v_slices;
-            fs->slice_width  /= f->num_h_slices - fs->slice_x;
-            fs->slice_height /= f->num_v_slices - fs->slice_y;
+            fs->slice_width  = fs->slice_width  / f->num_h_slices - fs->slice_x;
+            fs->slice_height = fs->slice_height / f->num_v_slices - fs->slice_y;
              if ((unsigned)fs->slice_width > f->width ||
                  (unsigned)fs->slice_height > f->height)
                  return AVERROR_INVALIDDATA;
@@ -783,6 +824,10 @@ static av_cold int ffv1_decode_init(AVCodecContext *avctx)
  
      ffv1_common_init(avctx);
  
+    f->last_picture = av_frame_alloc();
+    if (!f->last_picture)
+        return AVERROR(ENOMEM);
+
      if (avctx->extradata && (ret = read_extra_header(f)) < 0)
          return ret;
  
@@ -795,13 +840,13 @@ static av_cold int ffv1_decode_init(AVCodecContext *avctx)
  static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
                               int *got_frame, AVPacket *avpkt)
  {
-    const uint8_t *buf  = avpkt->data;
+    uint8_t *buf        = avpkt->data;
      int buf_size        = avpkt->size;
      FFV1Context *f      = avctx->priv_data;
      RangeCoder *const c = &f->slice_context[0]->c;
      int i, ret;
      uint8_t keystate = 128;
-    const uint8_t *buf_p;
+    uint8_t *buf_p;
      AVFrame *const p    = data;
  
      f->cur = p;
@@ -863,7 +908,7 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
          if (i) {
              ff_init_range_decoder(&fs->c, buf_p, v);
          } else
-            fs->c.bytestream_end = (uint8_t *)(buf_p + v);
+            fs->c.bytestream_end = buf_p + v;
  
          fs->cur = p;
      }
@@ -875,7 +920,7 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
      for (i = f->slice_count - 1; i >= 0; i--) {
          FFV1Context *fs = f->slice_context[i];
          int j;
-        if (fs->slice_damaged && f->last_picture.data[0]) {
+        if (fs->slice_damaged && f->last_picture->data[0]) {
              const uint8_t *src[4];
              uint8_t *dst[4];
              for (j = 0; j < 4; j++) {
@@ -883,12 +928,12 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
                  int sv = (j == 1 || j == 2) ? f->chroma_v_shift : 0;
                  dst[j] = p->data[j] + p->linesize[j] *
                           (fs->slice_y >> sv) + (fs->slice_x >> sh);
-                src[j] = f->last_picture.data[j] +
-                         f->last_picture.linesize[j] *
+                src[j] = f->last_picture->data[j] +
+                         f->last_picture->linesize[j] *
                           (fs->slice_y >> sv) + (fs->slice_x >> sh);
              }
-            av_image_copy(dst, p->linesize, (const uint8_t **)src,
-                          f->last_picture.linesize,
+            av_image_copy(dst, p->linesize, src,
+                          f->last_picture->linesize,
                            avctx->pix_fmt, fs->slice_width,
                            fs->slice_height);
          }
@@ -896,8 +941,8 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
  
      f->picture_number++;
  
-    av_frame_unref(&f->last_picture);
-    if ((ret = av_frame_ref(&f->last_picture, p)) < 0)
+    av_frame_unref(f->last_picture);
+    if ((ret = av_frame_ref(f->last_picture, p)) < 0)
          return ret;
      f->cur = NULL;
  
@@ -906,15 +951,26 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
      return buf_size;
  }
  
+static av_cold int ffv1_decode_close(AVCodecContext *avctx)
+{
+    FFV1Context *s = avctx->priv_data;;
+
+    av_frame_free(&s->last_picture);
+
+    ffv1_close(avctx);
+
+    return 0;
+}
+
  AVCodec ff_ffv1_decoder = {
      .name           = "ffv1",
+    .long_name      = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
      .type           = AVMEDIA_TYPE_VIDEO,
      .id             = AV_CODEC_ID_FFV1,
      .priv_data_size = sizeof(FFV1Context),
      .init           = ffv1_decode_init,
-    .close          = ffv1_close,
+    .close          = ffv1_decode_close,
      .decode         = ffv1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ |
-                      CODEC_CAP_SLICE_THREADS,
-    .long_name      = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
+    .capabilities   = AV_CODEC_CAP_DR1 /*| AV_CODEC_CAP_DRAW_HORIZ_BAND*/ |
+                      AV_CODEC_CAP_SLICE_THREADS,
  };