Merge commit '9b30f8dd8fa5bef5f16904cb98745b4a58f8f776'

[ffmpeg] / libavcodec / svq3.c
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c

index 57205c6ad1127c12ae5e8a6e87b3b1b25a7d0e53..608f8172e218cbe7212d745b2c00c56bc40694be 100644 (file)
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -47,12 +47,11 @@
  #include "avcodec.h"
  #include "mpegutils.h"
  #include "h264.h"
-
-#include "h264data.h" // FIXME FIXME FIXME
-
  #include "h264_mvpred.h"
+#include "h264data.h"
  #include "golomb.h"
  #include "hpeldsp.h"
+#include "mathops.h"
  #include "rectangle.h"
  #include "tpeldsp.h"
  #include "vdpau_internal.h"
@@ -76,10 +75,12 @@ typedef struct SVQ3Context {
      H264Picture *cur_pic;
      H264Picture *next_pic;
      H264Picture *last_pic;
+    GetBitContext gb;
+    uint8_t *slice_buf;
+    int slice_size;
      int halfpel_flag;
      int thirdpel_flag;
      int has_watermark;
-    int next_slice_index;
      uint32_t watermark_key;
      uint8_t *buf;
      int buf_size;
@@ -242,8 +243,9 @@ void ff_svq3_add_idct_c(uint8_t *dst, int16_t *block,
  static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
                                      int index, const int type)
  {
-    static const uint8_t *const scan_patterns[4] =
-    { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
+    static const uint8_t *const scan_patterns[4] = {
+        luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
+    };
  
      int run, level, sign, limit;
      unsigned vlc;
@@ -487,6 +489,118 @@ static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
      return 0;
  }
  
+static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
+                                                    int mb_type, const int *block_offset,
+                                                    int linesize, uint8_t *dest_y)
+{
+    int i;
+    if (!IS_INTRA4x4(mb_type)) {
+        for (i = 0; i < 16; i++)
+            if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
+                uint8_t *const ptr = dest_y + block_offset[i];
+                ff_svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
+                                   sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
+            }
+    }
+}
+
+static av_always_inline int dctcoef_get(int16_t *mb, int index)
+{
+    return AV_RN16A(mb + index);
+}
+
+static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
+                                                       H264SliceContext *sl,
+                                                       int mb_type,
+                                                       const int *block_offset,
+                                                       int linesize,
+                                                       uint8_t *dest_y)
+{
+    int i;
+    int qscale = sl->qscale;
+
+    if (IS_INTRA4x4(mb_type)) {
+        for (i = 0; i < 16; i++) {
+            uint8_t *const ptr = dest_y + block_offset[i];
+            const int dir      = sl->intra4x4_pred_mode_cache[scan8[i]];
+
+            uint8_t *topright;
+            int nnz, tr;
+            if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
+                const int topright_avail = (sl->topright_samples_available << i) & 0x8000;
+                av_assert2(sl->mb_y || linesize <= block_offset[i]);
+                if (!topright_avail) {
+                    tr       = ptr[3 - linesize] * 0x01010101u;
+                    topright = (uint8_t *)&tr;
+                } else
+                    topright = ptr + 4 - linesize;
+            } else
+                topright = NULL;
+
+            h->hpc.pred4x4[dir](ptr, topright, linesize);
+            nnz = sl->non_zero_count_cache[scan8[i]];
+            if (nnz) {
+                ff_svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
+            }
+        }
+    } else {
+        h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize);
+        ff_svq3_luma_dc_dequant_idct_c(sl->mb,
+                                       sl->mb_luma_dc[0], qscale);
+    }
+}
+
+static void hl_decode_mb(const H264Context *h, H264SliceContext *sl)
+{
+    const int mb_x    = sl->mb_x;
+    const int mb_y    = sl->mb_y;
+    const int mb_xy   = sl->mb_xy;
+    const int mb_type = h->cur_pic.mb_type[mb_xy];
+    uint8_t *dest_y, *dest_cb, *dest_cr;
+    int linesize, uvlinesize;
+    int i, j;
+    const int *block_offset = &h->block_offset[0];
+    const int block_h   = 16 >> h->chroma_y_shift;
+
+    dest_y  = h->cur_pic.f->data[0] + (mb_x     + mb_y * sl->linesize)  * 16;
+    dest_cb = h->cur_pic.f->data[1] +  mb_x * 8 + mb_y * sl->uvlinesize * block_h;
+    dest_cr = h->cur_pic.f->data[2] +  mb_x * 8 + mb_y * sl->uvlinesize * block_h;
+
+    h->vdsp.prefetch(dest_y  + (sl->mb_x & 3) * 4 * sl->linesize   + 64, sl->linesize,      4);
+    h->vdsp.prefetch(dest_cb + (sl->mb_x & 7)     * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
+
+    h->list_counts[mb_xy] = sl->list_count;
+
+    linesize   = sl->mb_linesize   = sl->linesize;
+    uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
+
+    if (IS_INTRA(mb_type)) {
+        h->hpc.pred8x8[sl->chroma_pred_mode](dest_cb, uvlinesize);
+        h->hpc.pred8x8[sl->chroma_pred_mode](dest_cr, uvlinesize);
+
+        hl_decode_mb_predict_luma(h, sl, mb_type, block_offset, linesize, dest_y);
+    }
+
+    hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
+
+    if (sl->cbp & 0x30) {
+        uint8_t *dest[2] = { dest_cb, dest_cr };
+        h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
+                                               h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][sl->chroma_qp[0]][0]);
+        h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
+                                               h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][sl->chroma_qp[1]][0]);
+        for (j = 1; j < 3; j++) {
+            for (i = j * 16; i < j * 16 + 4; i++)
+                if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
+                    uint8_t *const ptr = dest[j - 1] + block_offset[i];
+                    ff_svq3_add_idct_c(ptr, sl->mb + i * 16,
+                                       uvlinesize,
+                                       ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
+                }
+        }
+    }
+}
+
  static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
  {
      H264Context *h = &s->h;
@@ -666,7 +780,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
  
          mb_type = MB_TYPE_INTRA4x4;
      } else {                      /* INTRA16x16 */
-        dir = i_mb_type_info[mb_type - 8].pred_mode;
+        dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
          dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
  
          if ((sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, dir, 0)) < 0) {
@@ -674,7 +788,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
              return sl->intra16x16_pred_mode;
          }
  
-        cbp     = i_mb_type_info[mb_type - 8].cbp;
+        cbp     = ff_h264_i_mb_type_info[mb_type - 8].cbp;
          mb_type = MB_TYPE_INTRA16x16;
      }
  
@@ -702,8 +816,8 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
              return -1;
          }
  
-        cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc]
-                                : golomb_to_inter_cbp[vlc];
+        cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
+                                : ff_h264_golomb_to_inter_cbp[vlc];
      }
      if (IS_INTRA16x16(mb_type) ||
          (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
@@ -787,37 +901,43 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
      int i, header;
      unsigned slice_id;
  
-    header = get_bits(&h->gb, 8);
+    header = get_bits(&s->gb, 8);
  
      if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
          /* TODO: what? */
          av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
          return -1;
      } else {
+        int slice_bits, slice_bytes, slice_length;
          int length = header >> 5 & 3;
  
-        s->next_slice_index = get_bits_count(&h->gb) +
-                              8 * show_bits(&h->gb, 8 * length) +
-                              8 * length;
+        slice_length = show_bits(&s->gb, 8 * length);
+        slice_bits   = slice_length * 8;
+        slice_bytes  = slice_length + length - 1;
  
-        if (s->next_slice_index > h->gb.size_in_bits) {
+        if (slice_bytes > get_bits_left(&s->gb)) {
              av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
              return -1;
          }
  
-        h->gb.size_in_bits = s->next_slice_index - 8 * (length - 1);
-        skip_bits(&h->gb, 8);
+        skip_bits(&s->gb, 8);
+
+        av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!s->slice_buf)
+            return AVERROR(ENOMEM);
+
+        memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
+
+        init_get_bits(&h->gb, s->slice_buf, slice_bits);
  
          if (s->watermark_key) {
-            uint32_t header = AV_RL32(&h->gb.buffer[(get_bits_count(&h->gb) >> 3) + 1]);
-            AV_WL32(&h->gb.buffer[(get_bits_count(&h->gb) >> 3) + 1],
-                    header ^ s->watermark_key);
+            uint32_t header = AV_RL32(&h->gb.buffer[1]);
+            AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
          }
          if (length > 0) {
-            memmove((uint8_t *) &h->gb.buffer[get_bits_count(&h->gb) >> 3],
-                    &h->gb.buffer[h->gb.size_in_bits >> 3], length - 1);
+            memmove(s->slice_buf, &s->slice_buf[slice_length], length - 1);
          }
-        skip_bits_long(&h->gb, 0);
+        skip_bits_long(&s->gb, slice_bytes * 8);
      }
  
      if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
@@ -825,7 +945,7 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
          return -1;
      }
  
-    sl->slice_type = golomb_to_pict_type[slice_id];
+    sl->slice_type = ff_h264_golomb_to_pict_type[slice_id];
  
      if ((header & 0x9F) == 2) {
          i              = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
@@ -1194,7 +1314,9 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
          buf = avpkt->data;
      }
  
-    init_get_bits(&h->gb, buf, 8 * buf_size);
+    ret = init_get_bits(&s->gb, buf, 8 * buf_size);
+    if (ret < 0)
+        return ret;
  
      if (svq3_decode_slice_header(avctx))
          return -1;
@@ -1310,15 +1432,13 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
              unsigned mb_type;
              sl->mb_xy = sl->mb_x + sl->mb_y * h->mb_stride;
  
-            if ((get_bits_count(&h->gb) + 7) >= h->gb.size_in_bits &&
-                ((get_bits_count(&h->gb) & 7) == 0 ||
-                 show_bits(&h->gb, -get_bits_count(&h->gb) & 7) == 0)) {
-                skip_bits(&h->gb, s->next_slice_index - get_bits_count(&h->gb));
-                h->gb.size_in_bits = 8 * buf_size;
-
-                if (svq3_decode_slice_header(avctx))
-                    return -1;
+            if ((get_bits_left(&h->gb)) <= 7) {
+                if (((get_bits_count(&h->gb) & 7) == 0 ||
+                    show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
  
+                    if (svq3_decode_slice_header(avctx))
+                        return -1;
+                }
                  /* TODO: support s->mb_skip_run */
              }
  
@@ -1335,7 +1455,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
              }
  
              if (mb_type != 0 || sl->cbp)
-                ff_h264_hl_decode_mb(h, &h->slice_ctx[0]);
+                hl_decode_mb(h, &h->slice_ctx[0]);
  
              if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
                  h->cur_pic.mb_type[sl->mb_x + sl->mb_y * h->mb_stride] =
@@ -1394,6 +1514,7 @@ static av_cold int svq3_decode_end(AVCodecContext *avctx)
      av_freep(&s->cur_pic);
      av_freep(&s->next_pic);
      av_freep(&s->last_pic);
+    av_freep(&s->slice_buf);
  
      memset(&h->cur_pic, 0, sizeof(h->cur_pic));