git.sesse.net Git - ffmpeg/blob - libavcodec/h264.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... decoder
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * H.264 / AVC / MPEG4 part10 codec.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include "libavutil/avassert.h"
  29 #include "libavutil/imgutils.h"
  30 #include "libavutil/stereo3d.h"
  31 #include "libavutil/timer.h"
  32 #include "internal.h"
  33 #include "cabac.h"
  34 #include "cabac_functions.h"
  35 #include "dsputil.h"
  36 #include "error_resilience.h"
  37 #include "avcodec.h"
  38 #include "mpegvideo.h"
  39 #include "h264.h"
  40 #include "h264data.h"
  41 #include "h264chroma.h"
  42 #include "h264_mvpred.h"
  43 #include "golomb.h"
  44 #include "mathops.h"
  45 #include "rectangle.h"
  46 #include "svq3.h"
  47 #include "thread.h"
  48
  49 #include <assert.h>
  50
  51 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
  52
  53 static const uint8_t rem6[QP_MAX_NUM + 1] = {
  54     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
  55     3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
  56     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
  57 };
  58
  59 static const uint8_t div6[QP_MAX_NUM + 1] = {
  60     0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
  61     3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
  62     7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
  63 };
  64
  65 static const uint8_t field_scan[16] = {
  66     0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
  67     0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
  68     2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
  69     3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
  70 };
  71
  72 static const uint8_t field_scan8x8[64] = {
  73     0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
  74     1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
  75     2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
  76     0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8,
  77     2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8,
  78     2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8,
  79     2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8,
  80     3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8,
  81     3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8,
  82     4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8,
  83     4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8,
  84     5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8,
  85     5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8,
  86     7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
  87     6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
  88     7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
  89 };
  90
  91 static const uint8_t field_scan8x8_cavlc[64] = {
  92     0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
  93     2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
  94     3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
  95     5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8,
  96     0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8,
  97     1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8,
  98     3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8,
  99     5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8,
 100     0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8,
 101     1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8,
 102     3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8,
 103     5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8,
 104     1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8,
 105     1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
 106     3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
 107     6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
 108 };
 109
 110 // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
 111 static const uint8_t zigzag_scan8x8_cavlc[64] = {
 112     0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
 113     4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
 114     3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
 115     2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8,
 116     1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8,
 117     3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8,
 118     2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8,
 119     3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8,
 120     0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8,
 121     2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8,
 122     1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8,
 123     4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8,
 124     0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8,
 125     1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
 126     0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
 127     5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
 128 };
 129
 130 static const uint8_t dequant4_coeff_init[6][3] = {
 131     { 10, 13, 16 },
 132     { 11, 14, 18 },
 133     { 13, 16, 20 },
 134     { 14, 18, 23 },
 135     { 16, 20, 25 },
 136     { 18, 23, 29 },
 137 };
 138
 139 static const uint8_t dequant8_coeff_init_scan[16] = {
 140     0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
 141 };
 142
 143 static const uint8_t dequant8_coeff_init[6][6] = {
 144     { 20, 18, 32, 19, 25, 24 },
 145     { 22, 19, 35, 21, 28, 26 },
 146     { 26, 23, 42, 24, 33, 31 },
 147     { 28, 25, 45, 26, 35, 33 },
 148     { 32, 28, 51, 30, 40, 38 },
 149     { 36, 32, 58, 34, 46, 43 },
 150 };
 151
 152 static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420[] = {
 153 #if CONFIG_H264_DXVA2_HWACCEL
 154     AV_PIX_FMT_DXVA2_VLD,
 155 #endif
 156 #if CONFIG_H264_VAAPI_HWACCEL
 157     AV_PIX_FMT_VAAPI_VLD,
 158 #endif
 159 #if CONFIG_H264_VDA_HWACCEL
 160     AV_PIX_FMT_VDA_VLD,
 161 #endif
 162 #if CONFIG_H264_VDPAU_HWACCEL
 163     AV_PIX_FMT_VDPAU,
 164 #endif
 165     AV_PIX_FMT_YUV420P,
 166     AV_PIX_FMT_NONE
 167 };
 168
 169 static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420[] = {
 170 #if CONFIG_H264_DXVA2_HWACCEL
 171     AV_PIX_FMT_DXVA2_VLD,
 172 #endif
 173 #if CONFIG_H264_VAAPI_HWACCEL
 174     AV_PIX_FMT_VAAPI_VLD,
 175 #endif
 176 #if CONFIG_H264_VDA_HWACCEL
 177     AV_PIX_FMT_VDA_VLD,
 178 #endif
 179 #if CONFIG_H264_VDPAU_HWACCEL
 180     AV_PIX_FMT_VDPAU,
 181 #endif
 182     AV_PIX_FMT_YUVJ420P,
 183     AV_PIX_FMT_NONE
 184 };
 185
 186 static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
 187                               int (*mv)[2][4][2],
 188                               int mb_x, int mb_y, int mb_intra, int mb_skipped)
 189 {
 190     H264Context *h = opaque;
 191
 192     h->mb_x  = mb_x;
 193     h->mb_y  = mb_y;
 194     h->mb_xy = mb_x + mb_y * h->mb_stride;
 195     memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
 196     assert(ref >= 0);
 197     /* FIXME: It is possible albeit uncommon that slice references
 198      * differ between slices. We take the easy approach and ignore
 199      * it for now. If this turns out to have any relevance in
 200      * practice then correct remapping should be added. */
 201     if (ref >= h->ref_count[0])
 202         ref = 0;
 203     fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy],
 204                    2, 2, 2, ref, 1);
 205     fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
 206     fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
 207                    pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
 208     assert(!FRAME_MBAFF(h));
 209     ff_h264_hl_decode_mb(h);
 210 }
 211
 212 void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
 213 {
 214     AVCodecContext *avctx = h->avctx;
 215     AVFrame *cur  = &h->cur_pic.f;
 216     AVFrame *last = h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0].f : NULL;
 217     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 218     int vshift = desc->log2_chroma_h;
 219     const int field_pic = h->picture_structure != PICT_FRAME;
 220     if (field_pic) {
 221         height <<= 1;
 222         y      <<= 1;
 223     }
 224
 225     height = FFMIN(height, avctx->height - y);
 226
 227     if (field_pic && h->first_field && !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD))
 228         return;
 229
 230     if (avctx->draw_horiz_band) {
 231         AVFrame *src;
 232         int offset[AV_NUM_DATA_POINTERS];
 233         int i;
 234
 235         if (cur->pict_type == AV_PICTURE_TYPE_B || h->low_delay ||
 236             (avctx->slice_flags & SLICE_FLAG_CODED_ORDER))
 237             src = cur;
 238         else if (last)
 239             src = last;
 240         else
 241             return;
 242
 243         offset[0] = y * src->linesize[0];
 244         offset[1] =
 245         offset[2] = (y >> vshift) * src->linesize[1];
 246         for (i = 3; i < AV_NUM_DATA_POINTERS; i++)
 247             offset[i] = 0;
 248
 249         emms_c();
 250
 251         avctx->draw_horiz_band(avctx, src, offset,
 252                                y, h->picture_structure, height);
 253     }
 254 }
 255
 256 static void unref_picture(H264Context *h, H264Picture *pic)
 257 {
 258     int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
 259     int i;
 260
 261     if (!pic->f.buf[0])
 262         return;
 263
 264     ff_thread_release_buffer(h->avctx, &pic->tf);
 265     av_buffer_unref(&pic->hwaccel_priv_buf);
 266
 267     av_buffer_unref(&pic->qscale_table_buf);
 268     av_buffer_unref(&pic->mb_type_buf);
 269     for (i = 0; i < 2; i++) {
 270         av_buffer_unref(&pic->motion_val_buf[i]);
 271         av_buffer_unref(&pic->ref_index_buf[i]);
 272     }
 273
 274     memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
 275 }
 276
 277 static void release_unused_pictures(H264Context *h, int remove_current)
 278 {
 279     int i;
 280
 281     /* release non reference frames */
 282     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
 283         if (h->DPB[i].f.buf[0] && !h->DPB[i].reference &&
 284             (remove_current || &h->DPB[i] != h->cur_pic_ptr)) {
 285             unref_picture(h, &h->DPB[i]);
 286         }
 287     }
 288 }
 289
 290 static int ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
 291 {
 292     int ret, i;
 293
 294     av_assert0(!dst->f.buf[0]);
 295     av_assert0(src->f.buf[0]);
 296
 297     src->tf.f = &src->f;
 298     dst->tf.f = &dst->f;
 299     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
 300     if (ret < 0)
 301         goto fail;
 302
 303     dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
 304     dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
 305     if (!dst->qscale_table_buf || !dst->mb_type_buf)
 306         goto fail;
 307     dst->qscale_table = src->qscale_table;
 308     dst->mb_type      = src->mb_type;
 309
 310     for (i = 0; i < 2; i++) {
 311         dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]);
 312         dst->ref_index_buf[i]  = av_buffer_ref(src->ref_index_buf[i]);
 313         if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i])
 314             goto fail;
 315         dst->motion_val[i] = src->motion_val[i];
 316         dst->ref_index[i]  = src->ref_index[i];
 317     }
 318
 319     if (src->hwaccel_picture_private) {
 320         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
 321         if (!dst->hwaccel_priv_buf)
 322             goto fail;
 323         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
 324     }
 325
 326     for (i = 0; i < 2; i++)
 327         dst->field_poc[i] = src->field_poc[i];
 328
 329     memcpy(dst->ref_poc,   src->ref_poc,   sizeof(src->ref_poc));
 330     memcpy(dst->ref_count, src->ref_count, sizeof(src->ref_count));
 331
 332     dst->poc           = src->poc;
 333     dst->frame_num     = src->frame_num;
 334     dst->mmco_reset    = src->mmco_reset;
 335     dst->pic_id        = src->pic_id;
 336     dst->long_ref      = src->long_ref;
 337     dst->mbaff         = src->mbaff;
 338     dst->field_picture = src->field_picture;
 339     dst->needs_realloc = src->needs_realloc;
 340     dst->reference     = src->reference;
 341     dst->recovered     = src->recovered;
 342
 343     return 0;
 344 fail:
 345     unref_picture(h, dst);
 346     return ret;
 347 }
 348
 349 static int alloc_scratch_buffers(H264Context *h, int linesize)
 350 {
 351     int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
 352
 353     if (h->bipred_scratchpad)
 354         return 0;
 355
 356     h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
 357     // edge emu needs blocksize + filter length - 1
 358     // (= 21x21 for  h264)
 359     h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21);
 360     h->me.scratchpad   = av_mallocz(alloc_size * 2 * 16 * 2);
 361
 362     if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) {
 363         av_freep(&h->bipred_scratchpad);
 364         av_freep(&h->edge_emu_buffer);
 365         av_freep(&h->me.scratchpad);
 366         return AVERROR(ENOMEM);
 367     }
 368
 369     h->me.temp = h->me.scratchpad;
 370
 371     return 0;
 372 }
 373
 374 static int init_table_pools(H264Context *h)
 375 {
 376     const int big_mb_num    = h->mb_stride * (h->mb_height + 1) + 1;
 377     const int mb_array_size = h->mb_stride * h->mb_height;
 378     const int b4_stride     = h->mb_width * 4 + 1;
 379     const int b4_array_size = b4_stride * h->mb_height * 4;
 380
 381     h->qscale_table_pool = av_buffer_pool_init(big_mb_num + h->mb_stride,
 382                                                av_buffer_allocz);
 383     h->mb_type_pool      = av_buffer_pool_init((big_mb_num + h->mb_stride) *
 384                                                sizeof(uint32_t), av_buffer_allocz);
 385     h->motion_val_pool = av_buffer_pool_init(2 * (b4_array_size + 4) *
 386                                              sizeof(int16_t), av_buffer_allocz);
 387     h->ref_index_pool  = av_buffer_pool_init(4 * mb_array_size, av_buffer_allocz);
 388
 389     if (!h->qscale_table_pool || !h->mb_type_pool || !h->motion_val_pool ||
 390         !h->ref_index_pool) {
 391         av_buffer_pool_uninit(&h->qscale_table_pool);
 392         av_buffer_pool_uninit(&h->mb_type_pool);
 393         av_buffer_pool_uninit(&h->motion_val_pool);
 394         av_buffer_pool_uninit(&h->ref_index_pool);
 395         return AVERROR(ENOMEM);
 396     }
 397
 398     return 0;
 399 }
 400
 401 static int alloc_picture(H264Context *h, H264Picture *pic)
 402 {
 403     int i, ret = 0;
 404
 405     av_assert0(!pic->f.data[0]);
 406
 407     pic->tf.f = &pic->f;
 408     ret = ff_thread_get_buffer(h->avctx, &pic->tf, pic->reference ?
 409                                                    AV_GET_BUFFER_FLAG_REF : 0);
 410     if (ret < 0)
 411         goto fail;
 412
 413     h->linesize   = pic->f.linesize[0];
 414     h->uvlinesize = pic->f.linesize[1];
 415
 416     if (h->avctx->hwaccel) {
 417         const AVHWAccel *hwaccel = h->avctx->hwaccel;
 418         av_assert0(!pic->hwaccel_picture_private);
 419         if (hwaccel->priv_data_size) {
 420             pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->priv_data_size);
 421             if (!pic->hwaccel_priv_buf)
 422                 return AVERROR(ENOMEM);
 423             pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
 424         }
 425     }
 426
 427     if (!h->qscale_table_pool) {
 428         ret = init_table_pools(h);
 429         if (ret < 0)
 430             goto fail;
 431     }
 432
 433     pic->qscale_table_buf = av_buffer_pool_get(h->qscale_table_pool);
 434     pic->mb_type_buf      = av_buffer_pool_get(h->mb_type_pool);
 435     if (!pic->qscale_table_buf || !pic->mb_type_buf)
 436         goto fail;
 437
 438     pic->mb_type      = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
 439     pic->qscale_table = pic->qscale_table_buf->data + 2 * h->mb_stride + 1;
 440
 441     for (i = 0; i < 2; i++) {
 442         pic->motion_val_buf[i] = av_buffer_pool_get(h->motion_val_pool);
 443         pic->ref_index_buf[i]  = av_buffer_pool_get(h->ref_index_pool);
 444         if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i])
 445             goto fail;
 446
 447         pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
 448         pic->ref_index[i]  = pic->ref_index_buf[i]->data;
 449     }
 450
 451     return 0;
 452 fail:
 453     unref_picture(h, pic);
 454     return (ret < 0) ? ret : AVERROR(ENOMEM);
 455 }
 456
 457 static inline int pic_is_unused(H264Context *h, H264Picture *pic)
 458 {
 459     if (!pic->f.buf[0])
 460         return 1;
 461     if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF))
 462         return 1;
 463     return 0;
 464 }
 465
 466 static int find_unused_picture(H264Context *h)
 467 {
 468     int i;
 469
 470     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
 471         if (pic_is_unused(h, &h->DPB[i]))
 472             break;
 473     }
 474     if (i == MAX_PICTURE_COUNT)
 475         return AVERROR_INVALIDDATA;
 476
 477     if (h->DPB[i].needs_realloc) {
 478         h->DPB[i].needs_realloc = 0;
 479         unref_picture(h, &h->DPB[i]);
 480     }
 481
 482     return i;
 483 }
 484
 485 /**
 486  * Check if the top & left blocks are available if needed and
 487  * change the dc mode so it only uses the available blocks.
 488  */
 489 int ff_h264_check_intra4x4_pred_mode(H264Context *h)
 490 {
 491     static const int8_t top[12] = {
 492         -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
 493     };
 494     static const int8_t left[12] = {
 495         0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
 496     };
 497     int i;
 498
 499     if (!(h->top_samples_available & 0x8000)) {
 500         for (i = 0; i < 4; i++) {
 501             int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
 502             if (status < 0) {
 503                 av_log(h->avctx, AV_LOG_ERROR,
 504                        "top block unavailable for requested intra4x4 mode %d at %d %d\n",
 505                        status, h->mb_x, h->mb_y);
 506                 return AVERROR_INVALIDDATA;
 507             } else if (status) {
 508                 h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
 509             }
 510         }
 511     }
 512
 513     if ((h->left_samples_available & 0x8888) != 0x8888) {
 514         static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
 515         for (i = 0; i < 4; i++)
 516             if (!(h->left_samples_available & mask[i])) {
 517                 int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
 518                 if (status < 0) {
 519                     av_log(h->avctx, AV_LOG_ERROR,
 520                            "left block unavailable for requested intra4x4 mode %d at %d %d\n",
 521                            status, h->mb_x, h->mb_y);
 522                     return AVERROR_INVALIDDATA;
 523                 } else if (status) {
 524                     h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
 525                 }
 526             }
 527     }
 528
 529     return 0;
 530 } // FIXME cleanup like ff_h264_check_intra_pred_mode
 531
 532 /**
 533  * Check if the top & left blocks are available if needed and
 534  * change the dc mode so it only uses the available blocks.
 535  */
 536 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
 537 {
 538     static const int8_t top[4]  = { LEFT_DC_PRED8x8, 1, -1, -1 };
 539     static const int8_t left[5] = { TOP_DC_PRED8x8, -1,  2, -1, DC_128_PRED8x8 };
 540
 541     if (mode > 3U) {
 542         av_log(h->avctx, AV_LOG_ERROR,
 543                "out of range intra chroma pred mode at %d %d\n",
 544                h->mb_x, h->mb_y);
 545         return AVERROR_INVALIDDATA;
 546     }
 547
 548     if (!(h->top_samples_available & 0x8000)) {
 549         mode = top[mode];
 550         if (mode < 0) {
 551             av_log(h->avctx, AV_LOG_ERROR,
 552                    "top block unavailable for requested intra mode at %d %d\n",
 553                    h->mb_x, h->mb_y);
 554             return AVERROR_INVALIDDATA;
 555         }
 556     }
 557
 558     if ((h->left_samples_available & 0x8080) != 0x8080) {
 559         mode = left[mode];
 560         if (is_chroma && (h->left_samples_available & 0x8080)) {
 561             // mad cow disease mode, aka MBAFF + constrained_intra_pred
 562             mode = ALZHEIMER_DC_L0T_PRED8x8 +
 563                    (!(h->left_samples_available & 0x8000)) +
 564                    2 * (mode == DC_128_PRED8x8);
 565         }
 566         if (mode < 0) {
 567             av_log(h->avctx, AV_LOG_ERROR,
 568                    "left block unavailable for requested intra mode at %d %d\n",
 569                    h->mb_x, h->mb_y);
 570             return AVERROR_INVALIDDATA;
 571         }
 572     }
 573
 574     return mode;
 575 }
 576
 577 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
 578                                   int *dst_length, int *consumed, int length)
 579 {
 580     int i, si, di;
 581     uint8_t *dst;
 582     int bufidx;
 583
 584     // src[0]&0x80; // forbidden bit
 585     h->nal_ref_idc   = src[0] >> 5;
 586     h->nal_unit_type = src[0] & 0x1F;
 587
 588     src++;
 589     length--;
 590
 591 #define STARTCODE_TEST                                                  \
 592     if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {         \
 593         if (src[i + 2] != 3) {                                          \
 594             /* startcode, so we must be past the end */                 \
 595             length = i;                                                 \
 596         }                                                               \
 597         break;                                                          \
 598     }
 599
 600 #if HAVE_FAST_UNALIGNED
 601 #define FIND_FIRST_ZERO                                                 \
 602     if (i > 0 && !src[i])                                               \
 603         i--;                                                            \
 604     while (src[i])                                                      \
 605         i++
 606
 607 #if HAVE_FAST_64BIT
 608     for (i = 0; i + 1 < length; i += 9) {
 609         if (!((~AV_RN64A(src + i) &
 610                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
 611               0x8000800080008080ULL))
 612             continue;
 613         FIND_FIRST_ZERO;
 614         STARTCODE_TEST;
 615         i -= 7;
 616     }
 617 #else
 618     for (i = 0; i + 1 < length; i += 5) {
 619         if (!((~AV_RN32A(src + i) &
 620                (AV_RN32A(src + i) - 0x01000101U)) &
 621               0x80008080U))
 622             continue;
 623         FIND_FIRST_ZERO;
 624         STARTCODE_TEST;
 625         i -= 3;
 626     }
 627 #endif
 628 #else
 629     for (i = 0; i + 1 < length; i += 2) {
 630         if (src[i])
 631             continue;
 632         if (i > 0 && src[i - 1] == 0)
 633             i--;
 634         STARTCODE_TEST;
 635     }
 636 #endif
 637
 638     if (i >= length - 1) { // no escaped 0
 639         *dst_length = length;
 640         *consumed   = length + 1; // +1 for the header
 641         return src;
 642     }
 643
 644     // use second escape buffer for inter data
 645     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
 646     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx],
 647                    length + FF_INPUT_BUFFER_PADDING_SIZE);
 648     dst = h->rbsp_buffer[bufidx];
 649
 650     if (dst == NULL)
 651         return NULL;
 652
 653     memcpy(dst, src, i);
 654     si = di = i;
 655     while (si + 2 < length) {
 656         // remove escapes (very rare 1:2^22)
 657         if (src[si + 2] > 3) {
 658             dst[di++] = src[si++];
 659             dst[di++] = src[si++];
 660         } else if (src[si] == 0 && src[si + 1] == 0) {
 661             if (src[si + 2] == 3) { // escape
 662                 dst[di++]  = 0;
 663                 dst[di++]  = 0;
 664                 si        += 3;
 665                 continue;
 666             } else // next start code
 667                 goto nsc;
 668         }
 669
 670         dst[di++] = src[si++];
 671     }
 672     while (si < length)
 673         dst[di++] = src[si++];
 674
 675 nsc:
 676     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 677
 678     *dst_length = di;
 679     *consumed   = si + 1; // +1 for the header
 680     /* FIXME store exact number of bits in the getbitcontext
 681      * (it is needed for decoding) */
 682     return dst;
 683 }
 684
 685 /**
 686  * Identify the exact end of the bitstream
 687  * @return the length of the trailing, or 0 if damaged
 688  */
 689 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
 690 {
 691     int v = *src;
 692     int r;
 693
 694     tprintf(h->avctx, "rbsp trailing %X\n", v);
 695
 696     for (r = 1; r < 9; r++) {
 697         if (v & 1)
 698             return r;
 699         v >>= 1;
 700     }
 701     return 0;
 702 }
 703
 704 static inline int get_lowest_part_list_y(H264Context *h, H264Picture *pic, int n,
 705                                          int height, int y_offset, int list)
 706 {
 707     int raw_my             = h->mv_cache[list][scan8[n]][1];
 708     int filter_height_up   = (raw_my & 3) ? 2 : 0;
 709     int filter_height_down = (raw_my & 3) ? 3 : 0;
 710     int full_my            = (raw_my >> 2) + y_offset;
 711     int top                = full_my - filter_height_up;
 712     int bottom             = full_my + filter_height_down + height;
 713
 714     return FFMAX(abs(top), bottom);
 715 }
 716
 717 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
 718                                      int height, int y_offset, int list0,
 719                                      int list1, int *nrefs)
 720 {
 721     int my;
 722
 723     y_offset += 16 * (h->mb_y >> MB_FIELD(h));
 724
 725     if (list0) {
 726         int ref_n = h->ref_cache[0][scan8[n]];
 727         H264Picture *ref = &h->ref_list[0][ref_n];
 728
 729         // Error resilience puts the current picture in the ref list.
 730         // Don't try to wait on these as it will cause a deadlock.
 731         // Fields can wait on each other, though.
 732         if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
 733             (ref->reference & 3) != h->picture_structure) {
 734             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
 735             if (refs[0][ref_n] < 0)
 736                 nrefs[0] += 1;
 737             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
 738         }
 739     }
 740
 741     if (list1) {
 742         int ref_n    = h->ref_cache[1][scan8[n]];
 743         H264Picture *ref = &h->ref_list[1][ref_n];
 744
 745         if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
 746             (ref->reference & 3) != h->picture_structure) {
 747             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
 748             if (refs[1][ref_n] < 0)
 749                 nrefs[1] += 1;
 750             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
 751         }
 752     }
 753 }
 754
 755 /**
 756  * Wait until all reference frames are available for MC operations.
 757  *
 758  * @param h the H264 context
 759  */
 760 static void await_references(H264Context *h)
 761 {
 762     const int mb_xy   = h->mb_xy;
 763     const int mb_type = h->cur_pic.mb_type[mb_xy];
 764     int refs[2][48];
 765     int nrefs[2] = { 0 };
 766     int ref, list;
 767
 768     memset(refs, -1, sizeof(refs));
 769
 770     if (IS_16X16(mb_type)) {
 771         get_lowest_part_y(h, refs, 0, 16, 0,
 772                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
 773     } else if (IS_16X8(mb_type)) {
 774         get_lowest_part_y(h, refs, 0, 8, 0,
 775                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
 776         get_lowest_part_y(h, refs, 8, 8, 8,
 777                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
 778     } else if (IS_8X16(mb_type)) {
 779         get_lowest_part_y(h, refs, 0, 16, 0,
 780                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
 781         get_lowest_part_y(h, refs, 4, 16, 0,
 782                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
 783     } else {
 784         int i;
 785
 786         assert(IS_8X8(mb_type));
 787
 788         for (i = 0; i < 4; i++) {
 789             const int sub_mb_type = h->sub_mb_type[i];
 790             const int n           = 4 * i;
 791             int y_offset          = (i & 2) << 2;
 792
 793             if (IS_SUB_8X8(sub_mb_type)) {
 794                 get_lowest_part_y(h, refs, n, 8, y_offset,
 795                                   IS_DIR(sub_mb_type, 0, 0),
 796                                   IS_DIR(sub_mb_type, 0, 1),
 797                                   nrefs);
 798             } else if (IS_SUB_8X4(sub_mb_type)) {
 799                 get_lowest_part_y(h, refs, n, 4, y_offset,
 800                                   IS_DIR(sub_mb_type, 0, 0),
 801                                   IS_DIR(sub_mb_type, 0, 1),
 802                                   nrefs);
 803                 get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
 804                                   IS_DIR(sub_mb_type, 0, 0),
 805                                   IS_DIR(sub_mb_type, 0, 1),
 806                                   nrefs);
 807             } else if (IS_SUB_4X8(sub_mb_type)) {
 808                 get_lowest_part_y(h, refs, n, 8, y_offset,
 809                                   IS_DIR(sub_mb_type, 0, 0),
 810                                   IS_DIR(sub_mb_type, 0, 1),
 811                                   nrefs);
 812                 get_lowest_part_y(h, refs, n + 1, 8, y_offset,
 813                                   IS_DIR(sub_mb_type, 0, 0),
 814                                   IS_DIR(sub_mb_type, 0, 1),
 815                                   nrefs);
 816             } else {
 817                 int j;
 818                 assert(IS_SUB_4X4(sub_mb_type));
 819                 for (j = 0; j < 4; j++) {
 820                     int sub_y_offset = y_offset + 2 * (j & 2);
 821                     get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
 822                                       IS_DIR(sub_mb_type, 0, 0),
 823                                       IS_DIR(sub_mb_type, 0, 1),
 824                                       nrefs);
 825                 }
 826             }
 827         }
 828     }
 829
 830     for (list = h->list_count - 1; list >= 0; list--)
 831         for (ref = 0; ref < 48 && nrefs[list]; ref++) {
 832             int row = refs[list][ref];
 833             if (row >= 0) {
 834                 H264Picture *ref_pic  = &h->ref_list[list][ref];
 835                 int ref_field         = ref_pic->reference - 1;
 836                 int ref_field_picture = ref_pic->field_picture;
 837                 int pic_height        = 16 * h->mb_height >> ref_field_picture;
 838
 839                 row <<= MB_MBAFF(h);
 840                 nrefs[list]--;
 841
 842                 if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields
 843                     ff_thread_await_progress(&ref_pic->tf,
 844                                              FFMIN((row >> 1) - !(row & 1),
 845                                                    pic_height - 1),
 846                                              1);
 847                     ff_thread_await_progress(&ref_pic->tf,
 848                                              FFMIN((row >> 1), pic_height - 1),
 849                                              0);
 850                 } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame
 851                     ff_thread_await_progress(&ref_pic->tf,
 852                                              FFMIN(row * 2 + ref_field,
 853                                                    pic_height - 1),
 854                                              0);
 855                 } else if (FIELD_PICTURE(h)) {
 856                     ff_thread_await_progress(&ref_pic->tf,
 857                                              FFMIN(row, pic_height - 1),
 858                                              ref_field);
 859                 } else {
 860                     ff_thread_await_progress(&ref_pic->tf,
 861                                              FFMIN(row, pic_height - 1),
 862                                              0);
 863                 }
 864             }
 865         }
 866 }
 867
 868 static av_always_inline void mc_dir_part(H264Context *h, H264Picture *pic,
 869                                          int n, int square, int height,
 870                                          int delta, int list,
 871                                          uint8_t *dest_y, uint8_t *dest_cb,
 872                                          uint8_t *dest_cr,
 873                                          int src_x_offset, int src_y_offset,
 874                                          qpel_mc_func *qpix_op,
 875                                          h264_chroma_mc_func chroma_op,
 876                                          int pixel_shift, int chroma_idc)
 877 {
 878     const int mx      = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
 879     int my            = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
 880     const int luma_xy = (mx & 3) + ((my & 3) << 2);
 881     ptrdiff_t offset  = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
 882     uint8_t *src_y    = pic->f.data[0] + offset;
 883     uint8_t *src_cb, *src_cr;
 884     int extra_width  = 0;
 885     int extra_height = 0;
 886     int emu = 0;
 887     const int full_mx    = mx >> 2;
 888     const int full_my    = my >> 2;
 889     const int pic_width  = 16 * h->mb_width;
 890     const int pic_height = 16 * h->mb_height >> MB_FIELD(h);
 891     int ysh;
 892
 893     if (mx & 7)
 894         extra_width -= 3;
 895     if (my & 7)
 896         extra_height -= 3;
 897
 898     if (full_mx                <          0 - extra_width  ||
 899         full_my                <          0 - extra_height ||
 900         full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
 901         full_my + 16 /*FIXME*/ > pic_height + extra_height) {
 902         h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
 903                                  src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
 904                                  h->mb_linesize, h->mb_linesize,
 905                                  16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
 906                                  full_my - 2, pic_width, pic_height);
 907         src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
 908         emu   = 1;
 909     }
 910
 911     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
 912     if (!square)
 913         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
 914
 915     if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY)
 916         return;
 917
 918     if (chroma_idc == 3 /* yuv444 */) {
 919         src_cb = pic->f.data[1] + offset;
 920         if (emu) {
 921             h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
 922                                      src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
 923                                      h->mb_linesize, h->mb_linesize,
 924                                      16 + 5, 16 + 5 /*FIXME*/,
 925                                      full_mx - 2, full_my - 2,
 926                                      pic_width, pic_height);
 927             src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
 928         }
 929         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
 930         if (!square)
 931             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
 932
 933         src_cr = pic->f.data[2] + offset;
 934         if (emu) {
 935             h->vdsp.emulated_edge_mc(h->edge_emu_buffer,
 936                                      src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
 937                                      h->mb_linesize, h->mb_linesize,
 938                                      16 + 5, 16 + 5 /*FIXME*/,
 939                                      full_mx - 2, full_my - 2,
 940                                      pic_width, pic_height);
 941             src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
 942         }
 943         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
 944         if (!square)
 945             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
 946         return;
 947     }
 948
 949     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
 950     if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(h)) {
 951         // chroma offset when predicting from a field of opposite parity
 952         my  += 2 * ((h->mb_y & 1) - (pic->reference - 1));
 953         emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
 954     }
 955
 956     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
 957              (my >> ysh) * h->mb_uvlinesize;
 958     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
 959              (my >> ysh) * h->mb_uvlinesize;
 960
 961     if (emu) {
 962         h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb,
 963                                  h->mb_uvlinesize, h->mb_uvlinesize,
 964                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
 965                                  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
 966         src_cb = h->edge_emu_buffer;
 967     }
 968     chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
 969               height >> (chroma_idc == 1 /* yuv420 */),
 970               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
 971
 972     if (emu) {
 973         h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr,
 974                                  h->mb_uvlinesize, h->mb_uvlinesize,
 975                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
 976                                  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
 977         src_cr = h->edge_emu_buffer;
 978     }
 979     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
 980               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
 981 }
 982
 983 static av_always_inline void mc_part_std(H264Context *h, int n, int square,
 984                                          int height, int delta,
 985                                          uint8_t *dest_y, uint8_t *dest_cb,
 986                                          uint8_t *dest_cr,
 987                                          int x_offset, int y_offset,
 988                                          qpel_mc_func *qpix_put,
 989                                          h264_chroma_mc_func chroma_put,
 990                                          qpel_mc_func *qpix_avg,
 991                                          h264_chroma_mc_func chroma_avg,
 992                                          int list0, int list1,
 993                                          int pixel_shift, int chroma_idc)
 994 {
 995     qpel_mc_func *qpix_op         = qpix_put;
 996     h264_chroma_mc_func chroma_op = chroma_put;
 997
 998     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 999     if (chroma_idc == 3 /* yuv444 */) {
1000         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1001         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1002     } else if (chroma_idc == 2 /* yuv422 */) {
1003         dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1004         dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1005     } else { /* yuv420 */
1006         dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1007         dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1008     }
1009     x_offset += 8 * h->mb_x;
1010     y_offset += 8 * (h->mb_y >> MB_FIELD(h));
1011
1012     if (list0) {
1013         H264Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
1014         mc_dir_part(h, ref, n, square, height, delta, 0,
1015                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
1016                     qpix_op, chroma_op, pixel_shift, chroma_idc);
1017
1018         qpix_op   = qpix_avg;
1019         chroma_op = chroma_avg;
1020     }
1021
1022     if (list1) {
1023         H264Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
1024         mc_dir_part(h, ref, n, square, height, delta, 1,
1025                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
1026                     qpix_op, chroma_op, pixel_shift, chroma_idc);
1027     }
1028 }
1029
1030 static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
1031                                               int height, int delta,
1032                                               uint8_t *dest_y, uint8_t *dest_cb,
1033                                               uint8_t *dest_cr,
1034                                               int x_offset, int y_offset,
1035                                               qpel_mc_func *qpix_put,
1036                                               h264_chroma_mc_func chroma_put,
1037                                               h264_weight_func luma_weight_op,
1038                                               h264_weight_func chroma_weight_op,
1039                                               h264_biweight_func luma_weight_avg,
1040                                               h264_biweight_func chroma_weight_avg,
1041                                               int list0, int list1,
1042                                               int pixel_shift, int chroma_idc)
1043 {
1044     int chroma_height;
1045
1046     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1047     if (chroma_idc == 3 /* yuv444 */) {
1048         chroma_height     = height;
1049         chroma_weight_avg = luma_weight_avg;
1050         chroma_weight_op  = luma_weight_op;
1051         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1052         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
1053     } else if (chroma_idc == 2 /* yuv422 */) {
1054         chroma_height = height;
1055         dest_cb      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1056         dest_cr      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
1057     } else { /* yuv420 */
1058         chroma_height = height >> 1;
1059         dest_cb      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1060         dest_cr      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
1061     }
1062     x_offset += 8 * h->mb_x;
1063     y_offset += 8 * (h->mb_y >> MB_FIELD(h));
1064
1065     if (list0 && list1) {
1066         /* don't optimize for luma-only case, since B-frames usually
1067          * use implicit weights => chroma too. */
1068         uint8_t *tmp_cb = h->bipred_scratchpad;
1069         uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
1070         uint8_t *tmp_y  = h->bipred_scratchpad + 16 * h->mb_uvlinesize;
1071         int refn0       = h->ref_cache[0][scan8[n]];
1072         int refn1       = h->ref_cache[1][scan8[n]];
1073
1074         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
1075                     dest_y, dest_cb, dest_cr,
1076                     x_offset, y_offset, qpix_put, chroma_put,
1077                     pixel_shift, chroma_idc);
1078         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
1079                     tmp_y, tmp_cb, tmp_cr,
1080                     x_offset, y_offset, qpix_put, chroma_put,
1081                     pixel_shift, chroma_idc);
1082
1083         if (h->use_weight == 2) {
1084             int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1];
1085             int weight1 = 64 - weight0;
1086             luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
1087                             height, 5, weight0, weight1, 0);
1088             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
1089                               chroma_height, 5, weight0, weight1, 0);
1090             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
1091                               chroma_height, 5, weight0, weight1, 0);
1092         } else {
1093             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
1094                             h->luma_log2_weight_denom,
1095                             h->luma_weight[refn0][0][0],
1096                             h->luma_weight[refn1][1][0],
1097                             h->luma_weight[refn0][0][1] +
1098                             h->luma_weight[refn1][1][1]);
1099             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
1100                               h->chroma_log2_weight_denom,
1101                               h->chroma_weight[refn0][0][0][0],
1102                               h->chroma_weight[refn1][1][0][0],
1103                               h->chroma_weight[refn0][0][0][1] +
1104                               h->chroma_weight[refn1][1][0][1]);
1105             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
1106                               h->chroma_log2_weight_denom,
1107                               h->chroma_weight[refn0][0][1][0],
1108                               h->chroma_weight[refn1][1][1][0],
1109                               h->chroma_weight[refn0][0][1][1] +
1110                               h->chroma_weight[refn1][1][1][1]);
1111         }
1112     } else {
1113         int list     = list1 ? 1 : 0;
1114         int refn     = h->ref_cache[list][scan8[n]];
1115         H264Picture *ref = &h->ref_list[list][refn];
1116         mc_dir_part(h, ref, n, square, height, delta, list,
1117                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
1118                     qpix_put, chroma_put, pixel_shift, chroma_idc);
1119
1120         luma_weight_op(dest_y, h->mb_linesize, height,
1121                        h->luma_log2_weight_denom,
1122                        h->luma_weight[refn][list][0],
1123                        h->luma_weight[refn][list][1]);
1124         if (h->use_weight_chroma) {
1125             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
1126                              h->chroma_log2_weight_denom,
1127                              h->chroma_weight[refn][list][0][0],
1128                              h->chroma_weight[refn][list][0][1]);
1129             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
1130                              h->chroma_log2_weight_denom,
1131                              h->chroma_weight[refn][list][1][0],
1132                              h->chroma_weight[refn][list][1][1]);
1133         }
1134     }
1135 }
1136
1137 static av_always_inline void prefetch_motion(H264Context *h, int list,
1138                                              int pixel_shift, int chroma_idc)
1139 {
1140     /* fetch pixels for estimated mv 4 macroblocks ahead
1141      * optimized for 64byte cache lines */
1142     const int refn = h->ref_cache[list][scan8[0]];
1143     if (refn >= 0) {
1144         const int mx  = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * h->mb_x + 8;
1145         const int my  = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * h->mb_y;
1146         uint8_t **src = h->ref_list[list][refn].f.data;
1147         int off       = (mx << pixel_shift) +
1148                         (my + (h->mb_x & 3) * 4) * h->mb_linesize +
1149                         (64 << pixel_shift);
1150         h->vdsp.prefetch(src[0] + off, h->linesize, 4);
1151         if (chroma_idc == 3 /* yuv444 */) {
1152             h->vdsp.prefetch(src[1] + off, h->linesize, 4);
1153             h->vdsp.prefetch(src[2] + off, h->linesize, 4);
1154         } else {
1155             off = ((mx >> 1) << pixel_shift) +
1156                   ((my >> 1) + (h->mb_x & 7)) * h->uvlinesize +
1157                   (64 << pixel_shift);
1158             h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1159         }
1160     }
1161 }
1162
1163 static void free_tables(H264Context *h, int free_rbsp)
1164 {
1165     int i;
1166     H264Context *hx;
1167
1168     av_freep(&h->intra4x4_pred_mode);
1169     av_freep(&h->chroma_pred_mode_table);
1170     av_freep(&h->cbp_table);
1171     av_freep(&h->mvd_table[0]);
1172     av_freep(&h->mvd_table[1]);
1173     av_freep(&h->direct_table);
1174     av_freep(&h->non_zero_count);
1175     av_freep(&h->slice_table_base);
1176     h->slice_table = NULL;
1177     av_freep(&h->list_counts);
1178
1179     av_freep(&h->mb2b_xy);
1180     av_freep(&h->mb2br_xy);
1181
1182     av_buffer_pool_uninit(&h->qscale_table_pool);
1183     av_buffer_pool_uninit(&h->mb_type_pool);
1184     av_buffer_pool_uninit(&h->motion_val_pool);
1185     av_buffer_pool_uninit(&h->ref_index_pool);
1186
1187     if (free_rbsp && h->DPB) {
1188         for (i = 0; i < MAX_PICTURE_COUNT; i++)
1189             unref_picture(h, &h->DPB[i]);
1190         av_freep(&h->DPB);
1191     } else if (h->DPB) {
1192         for (i = 0; i < MAX_PICTURE_COUNT; i++)
1193             h->DPB[i].needs_realloc = 1;
1194     }
1195
1196     h->cur_pic_ptr = NULL;
1197
1198     for (i = 0; i < MAX_THREADS; i++) {
1199         hx = h->thread_context[i];
1200         if (!hx)
1201             continue;
1202         av_freep(&hx->top_borders[1]);
1203         av_freep(&hx->top_borders[0]);
1204         av_freep(&hx->bipred_scratchpad);
1205         av_freep(&hx->edge_emu_buffer);
1206         av_freep(&hx->dc_val_base);
1207         av_freep(&hx->me.scratchpad);
1208         av_freep(&hx->er.mb_index2xy);
1209         av_freep(&hx->er.error_status_table);
1210         av_freep(&hx->er.er_temp_buffer);
1211         av_freep(&hx->er.mbintra_table);
1212         av_freep(&hx->er.mbskip_table);
1213
1214         if (free_rbsp) {
1215             av_freep(&hx->rbsp_buffer[1]);
1216             av_freep(&hx->rbsp_buffer[0]);
1217             hx->rbsp_buffer_size[0] = 0;
1218             hx->rbsp_buffer_size[1] = 0;
1219         }
1220         if (i)
1221             av_freep(&h->thread_context[i]);
1222     }
1223 }
1224
1225 static void init_dequant8_coeff_table(H264Context *h)
1226 {
1227     int i, j, q, x;
1228     const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
1229
1230     for (i = 0; i < 6; i++) {
1231         h->dequant8_coeff[i] = h->dequant8_buffer[i];
1232         for (j = 0; j < i; j++)
1233             if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
1234                         64 * sizeof(uint8_t))) {
1235                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
1236                 break;
1237             }
1238         if (j < i)
1239             continue;
1240
1241         for (q = 0; q < max_qp + 1; q++) {
1242             int shift = div6[q];
1243             int idx   = rem6[q];
1244             for (x = 0; x < 64; x++)
1245                 h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
1246                     ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
1247                      h->pps.scaling_matrix8[i][x]) << shift;
1248         }
1249     }
1250 }
1251
1252 static void init_dequant4_coeff_table(H264Context *h)
1253 {
1254     int i, j, q, x;
1255     const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
1256     for (i = 0; i < 6; i++) {
1257         h->dequant4_coeff[i] = h->dequant4_buffer[i];
1258         for (j = 0; j < i; j++)
1259             if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
1260                         16 * sizeof(uint8_t))) {
1261                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1262                 break;
1263             }
1264         if (j < i)
1265             continue;
1266
1267         for (q = 0; q < max_qp + 1; q++) {
1268             int shift = div6[q] + 2;
1269             int idx   = rem6[q];
1270             for (x = 0; x < 16; x++)
1271                 h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
1272                     ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
1273                      h->pps.scaling_matrix4[i][x]) << shift;
1274         }
1275     }
1276 }
1277
1278 static void init_dequant_tables(H264Context *h)
1279 {
1280     int i, x;
1281     init_dequant4_coeff_table(h);
1282     if (h->pps.transform_8x8_mode)
1283         init_dequant8_coeff_table(h);
1284     if (h->sps.transform_bypass) {
1285         for (i = 0; i < 6; i++)
1286             for (x = 0; x < 16; x++)
1287                 h->dequant4_coeff[i][0][x] = 1 << 6;
1288         if (h->pps.transform_8x8_mode)
1289             for (i = 0; i < 6; i++)
1290                 for (x = 0; x < 64; x++)
1291                     h->dequant8_coeff[i][0][x] = 1 << 6;
1292     }
1293 }
1294
1295 int ff_h264_alloc_tables(H264Context *h)
1296 {
1297     const int big_mb_num = h->mb_stride * (h->mb_height + 1);
1298     const int row_mb_num = h->mb_stride * 2 * h->avctx->thread_count;
1299     int x, y, i;
1300
1301     FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode,
1302                       row_mb_num * 8 * sizeof(uint8_t), fail)
1303     FF_ALLOCZ_OR_GOTO(h->avctx, h->non_zero_count,
1304                       big_mb_num * 48 * sizeof(uint8_t), fail)
1305     FF_ALLOCZ_OR_GOTO(h->avctx, h->slice_table_base,
1306                       (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base), fail)
1307     FF_ALLOCZ_OR_GOTO(h->avctx, h->cbp_table,
1308                       big_mb_num * sizeof(uint16_t), fail)
1309     FF_ALLOCZ_OR_GOTO(h->avctx, h->chroma_pred_mode_table,
1310                       big_mb_num * sizeof(uint8_t), fail)
1311     FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[0],
1312                       16 * row_mb_num * sizeof(uint8_t), fail);
1313     FF_ALLOCZ_OR_GOTO(h->avctx, h->mvd_table[1],
1314                       16 * row_mb_num * sizeof(uint8_t), fail);
1315     FF_ALLOCZ_OR_GOTO(h->avctx, h->direct_table,
1316                       4 * big_mb_num * sizeof(uint8_t), fail);
1317     FF_ALLOCZ_OR_GOTO(h->avctx, h->list_counts,
1318                       big_mb_num * sizeof(uint8_t), fail)
1319
1320     memset(h->slice_table_base, -1,
1321            (big_mb_num + h->mb_stride) * sizeof(*h->slice_table_base));
1322     h->slice_table = h->slice_table_base + h->mb_stride * 2 + 1;
1323
1324     FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2b_xy,
1325                       big_mb_num * sizeof(uint32_t), fail);
1326     FF_ALLOCZ_OR_GOTO(h->avctx, h->mb2br_xy,
1327                       big_mb_num * sizeof(uint32_t), fail);
1328     for (y = 0; y < h->mb_height; y++)
1329         for (x = 0; x < h->mb_width; x++) {
1330             const int mb_xy = x + y * h->mb_stride;
1331             const int b_xy  = 4 * x + 4 * y * h->b_stride;
1332
1333             h->mb2b_xy[mb_xy]  = b_xy;
1334             h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * h->mb_stride)));
1335         }
1336
1337     if (!h->dequant4_coeff[0])
1338         init_dequant_tables(h);
1339
1340     if (!h->DPB) {
1341         h->DPB = av_mallocz_array(MAX_PICTURE_COUNT, sizeof(*h->DPB));
1342         if (!h->DPB)
1343             return AVERROR(ENOMEM);
1344         for (i = 0; i < MAX_PICTURE_COUNT; i++)
1345             av_frame_unref(&h->DPB[i].f);
1346         av_frame_unref(&h->cur_pic.f);
1347     }
1348
1349     return 0;
1350
1351 fail:
1352     free_tables(h, 1);
1353     return AVERROR(ENOMEM);
1354 }
1355
1356 /**
1357  * Mimic alloc_tables(), but for every context thread.
1358  */
1359 static void clone_tables(H264Context *dst, H264Context *src, int i)
1360 {
1361     dst->intra4x4_pred_mode     = src->intra4x4_pred_mode + i * 8 * 2 * src->mb_stride;
1362     dst->non_zero_count         = src->non_zero_count;
1363     dst->slice_table            = src->slice_table;
1364     dst->cbp_table              = src->cbp_table;
1365     dst->mb2b_xy                = src->mb2b_xy;
1366     dst->mb2br_xy               = src->mb2br_xy;
1367     dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
1368     dst->mvd_table[0]           = src->mvd_table[0] + i * 8 * 2 * src->mb_stride;
1369     dst->mvd_table[1]           = src->mvd_table[1] + i * 8 * 2 * src->mb_stride;
1370     dst->direct_table           = src->direct_table;
1371     dst->list_counts            = src->list_counts;
1372     dst->DPB                    = src->DPB;
1373     dst->cur_pic_ptr            = src->cur_pic_ptr;
1374     dst->cur_pic                = src->cur_pic;
1375     dst->bipred_scratchpad      = NULL;
1376     dst->edge_emu_buffer        = NULL;
1377     dst->me.scratchpad          = NULL;
1378     ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma,
1379                       src->sps.chroma_format_idc);
1380 }
1381
1382 /**
1383  * Init context
1384  * Allocate buffers which are not shared amongst multiple threads.
1385  */
1386 static int context_init(H264Context *h)
1387 {
1388     ERContext *er = &h->er;
1389     int mb_array_size = h->mb_height * h->mb_stride;
1390     int y_size  = (2 * h->mb_width + 1) * (2 * h->mb_height + 1);
1391     int c_size  = h->mb_stride * (h->mb_height + 1);
1392     int yc_size = y_size + 2   * c_size;
1393     int x, y, i;
1394
1395     FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[0],
1396                       h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1397     FF_ALLOCZ_OR_GOTO(h->avctx, h->top_borders[1],
1398                       h->mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1399
1400     h->ref_cache[0][scan8[5]  + 1] =
1401     h->ref_cache[0][scan8[7]  + 1] =
1402     h->ref_cache[0][scan8[13] + 1] =
1403     h->ref_cache[1][scan8[5]  + 1] =
1404     h->ref_cache[1][scan8[7]  + 1] =
1405     h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
1406
1407     if (CONFIG_ERROR_RESILIENCE) {
1408         /* init ER */
1409         er->avctx          = h->avctx;
1410         er->dsp            = &h->dsp;
1411         er->decode_mb      = h264_er_decode_mb;
1412         er->opaque         = h;
1413         er->quarter_sample = 1;
1414
1415         er->mb_num      = h->mb_num;
1416         er->mb_width    = h->mb_width;
1417         er->mb_height   = h->mb_height;
1418         er->mb_stride   = h->mb_stride;
1419         er->b8_stride   = h->mb_width * 2 + 1;
1420
1421         FF_ALLOCZ_OR_GOTO(h->avctx, er->mb_index2xy, (h->mb_num + 1) * sizeof(int),
1422                           fail); // error ressilience code looks cleaner with this
1423         for (y = 0; y < h->mb_height; y++)
1424             for (x = 0; x < h->mb_width; x++)
1425                 er->mb_index2xy[x + y * h->mb_width] = x + y * h->mb_stride;
1426
1427         er->mb_index2xy[h->mb_height * h->mb_width] = (h->mb_height - 1) *
1428                                                       h->mb_stride + h->mb_width;
1429
1430         FF_ALLOCZ_OR_GOTO(h->avctx, er->error_status_table,
1431                           mb_array_size * sizeof(uint8_t), fail);
1432
1433         FF_ALLOC_OR_GOTO(h->avctx, er->mbintra_table, mb_array_size, fail);
1434         memset(er->mbintra_table, 1, mb_array_size);
1435
1436         FF_ALLOCZ_OR_GOTO(h->avctx, er->mbskip_table, mb_array_size + 2, fail);
1437
1438         FF_ALLOC_OR_GOTO(h->avctx, er->er_temp_buffer, h->mb_height * h->mb_stride,
1439                          fail);
1440
1441         FF_ALLOCZ_OR_GOTO(h->avctx, h->dc_val_base, yc_size * sizeof(int16_t), fail);
1442         er->dc_val[0] = h->dc_val_base + h->mb_width * 2 + 2;
1443         er->dc_val[1] = h->dc_val_base + y_size + h->mb_stride + 1;
1444         er->dc_val[2] = er->dc_val[1] + c_size;
1445         for (i = 0; i < yc_size; i++)
1446             h->dc_val_base[i] = 1024;
1447     }
1448
1449     return 0;
1450
1451 fail:
1452     return AVERROR(ENOMEM); // free_tables will clean up for us
1453 }
1454
1455 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
1456                             int parse_extradata);
1457
1458 int ff_h264_decode_extradata(H264Context *h)
1459 {
1460     AVCodecContext *avctx = h->avctx;
1461     int ret;
1462
1463     if (avctx->extradata[0] == 1) {
1464         int i, cnt, nalsize;
1465         unsigned char *p = avctx->extradata;
1466
1467         h->is_avc = 1;
1468
1469         if (avctx->extradata_size < 7) {
1470             av_log(avctx, AV_LOG_ERROR,
1471                    "avcC %d too short\n", avctx->extradata_size);
1472             return AVERROR_INVALIDDATA;
1473         }
1474         /* sps and pps in the avcC always have length coded with 2 bytes,
1475          * so put a fake nal_length_size = 2 while parsing them */
1476         h->nal_length_size = 2;
1477         // Decode sps from avcC
1478         cnt = *(p + 5) & 0x1f; // Number of sps
1479         p  += 6;
1480         for (i = 0; i < cnt; i++) {
1481             nalsize = AV_RB16(p) + 2;
1482             if (p - avctx->extradata + nalsize > avctx->extradata_size)
1483                 return AVERROR_INVALIDDATA;
1484             ret = decode_nal_units(h, p, nalsize, 1);
1485             if (ret < 0) {
1486                 av_log(avctx, AV_LOG_ERROR,
1487                        "Decoding sps %d from avcC failed\n", i);
1488                 return ret;
1489             }
1490             p += nalsize;
1491         }
1492         // Decode pps from avcC
1493         cnt = *(p++); // Number of pps
1494         for (i = 0; i < cnt; i++) {
1495             nalsize = AV_RB16(p) + 2;
1496             if (p - avctx->extradata + nalsize > avctx->extradata_size)
1497                 return AVERROR_INVALIDDATA;
1498             ret = decode_nal_units(h, p, nalsize, 1);
1499             if (ret < 0) {
1500                 av_log(avctx, AV_LOG_ERROR,
1501                        "Decoding pps %d from avcC failed\n", i);
1502                 return ret;
1503             }
1504             p += nalsize;
1505         }
1506         // Now store right nal length size, that will be used to parse all other nals
1507         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
1508     } else {
1509         h->is_avc = 0;
1510         ret = decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1);
1511         if (ret < 0)
1512             return ret;
1513     }
1514     return 0;
1515 }
1516
1517 av_cold int ff_h264_decode_init(AVCodecContext *avctx)
1518 {
1519     H264Context *h = avctx->priv_data;
1520     int i;
1521     int ret;
1522
1523     h->avctx = avctx;
1524
1525     h->bit_depth_luma    = 8;
1526     h->chroma_format_idc = 1;
1527
1528     ff_h264dsp_init(&h->h264dsp, 8, 1);
1529     ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
1530     ff_h264qpel_init(&h->h264qpel, 8);
1531     ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1);
1532
1533     h->dequant_coeff_pps = -1;
1534
1535     /* needed so that IDCT permutation is known early */
1536     if (CONFIG_ERROR_RESILIENCE)
1537         ff_dsputil_init(&h->dsp, h->avctx);
1538     ff_videodsp_init(&h->vdsp, 8);
1539
1540     memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1541     memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1542
1543     h->picture_structure   = PICT_FRAME;
1544     h->slice_context_count = 1;
1545     h->workaround_bugs     = avctx->workaround_bugs;
1546     h->flags               = avctx->flags;
1547
1548     /* set defaults */
1549     // s->decode_mb = ff_h263_decode_mb;
1550     if (!avctx->has_b_frames)
1551         h->low_delay = 1;
1552
1553     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
1554
1555     ff_h264_decode_init_vlc();
1556
1557     ff_init_cabac_states();
1558
1559     h->pixel_shift        = 0;
1560     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
1561
1562     h->thread_context[0] = h;
1563     h->outputed_poc      = h->next_outputed_poc = INT_MIN;
1564     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1565         h->last_pocs[i] = INT_MIN;
1566     h->prev_poc_msb = 1 << 16;
1567     h->x264_build   = -1;
1568     ff_h264_reset_sei(h);
1569     h->recovery_frame = -1;
1570     h->frame_recovered = 0;
1571     if (avctx->codec_id == AV_CODEC_ID_H264) {
1572         if (avctx->ticks_per_frame == 1)
1573             h->avctx->time_base.den *= 2;
1574         avctx->ticks_per_frame = 2;
1575     }
1576
1577     if (avctx->extradata_size > 0 && avctx->extradata) {
1578        ret = ff_h264_decode_extradata(h);
1579        if (ret < 0)
1580            return ret;
1581     }
1582
1583     if (h->sps.bitstream_restriction_flag &&
1584         h->avctx->has_b_frames < h->sps.num_reorder_frames) {
1585         h->avctx->has_b_frames = h->sps.num_reorder_frames;
1586         h->low_delay           = 0;
1587     }
1588
1589     avctx->internal->allocate_progress = 1;
1590
1591     return 0;
1592 }
1593
1594 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
1595 #undef REBASE_PICTURE
1596 #define REBASE_PICTURE(pic, new_ctx, old_ctx)             \
1597     ((pic && pic >= old_ctx->DPB &&                       \
1598       pic < old_ctx->DPB + MAX_PICTURE_COUNT) ?           \
1599      &new_ctx->DPB[pic - old_ctx->DPB] : NULL)
1600
1601 static void copy_picture_range(H264Picture **to, H264Picture **from, int count,
1602                                H264Context *new_base,
1603                                H264Context *old_base)
1604 {
1605     int i;
1606
1607     for (i = 0; i < count; i++) {
1608         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
1609                 IN_RANGE(from[i], old_base->DPB,
1610                          sizeof(H264Picture) * MAX_PICTURE_COUNT) ||
1611                 !from[i]));
1612         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
1613     }
1614 }
1615
1616 static int copy_parameter_set(void **to, void **from, int count, int size)
1617 {
1618     int i;
1619
1620     for (i = 0; i < count; i++) {
1621         if (to[i] && !from[i]) {
1622             av_freep(&to[i]);
1623         } else if (from[i] && !to[i]) {
1624             to[i] = av_malloc(size);
1625             if (!to[i])
1626                 return AVERROR(ENOMEM);
1627         }
1628
1629         if (from[i])
1630             memcpy(to[i], from[i], size);
1631     }
1632
1633     return 0;
1634 }
1635
1636 static int decode_init_thread_copy(AVCodecContext *avctx)
1637 {
1638     H264Context *h = avctx->priv_data;
1639
1640     if (!avctx->internal->is_copy)
1641         return 0;
1642     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1643     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1644
1645     h->context_initialized = 0;
1646
1647     return 0;
1648 }
1649
1650 #define copy_fields(to, from, start_field, end_field)                   \
1651     memcpy(&to->start_field, &from->start_field,                        \
1652            (char *)&to->end_field - (char *)&to->start_field)
1653
1654 static int h264_slice_header_init(H264Context *, int);
1655
1656 static int h264_set_parameter_from_sps(H264Context *h);
1657
1658 static int decode_update_thread_context(AVCodecContext *dst,
1659                                         const AVCodecContext *src)
1660 {
1661     H264Context *h = dst->priv_data, *h1 = src->priv_data;
1662     int inited = h->context_initialized, err = 0;
1663     int context_reinitialized = 0;
1664     int i, ret;
1665
1666     if (dst == src || !h1->context_initialized)
1667         return 0;
1668
1669     if (inited &&
1670         (h->width                 != h1->width                 ||
1671          h->height                != h1->height                ||
1672          h->mb_width              != h1->mb_width              ||
1673          h->mb_height             != h1->mb_height             ||
1674          h->sps.bit_depth_luma    != h1->sps.bit_depth_luma    ||
1675          h->sps.chroma_format_idc != h1->sps.chroma_format_idc ||
1676          h->sps.colorspace        != h1->sps.colorspace)) {
1677
1678         /* set bits_per_raw_sample to the previous value. the check for changed
1679          * bit depth in h264_set_parameter_from_sps() uses it and sets it to
1680          * the current value */
1681         h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
1682
1683         av_freep(&h->bipred_scratchpad);
1684
1685         h->width     = h1->width;
1686         h->height    = h1->height;
1687         h->mb_height = h1->mb_height;
1688         h->mb_width  = h1->mb_width;
1689         h->mb_num    = h1->mb_num;
1690         h->mb_stride = h1->mb_stride;
1691         h->b_stride  = h1->b_stride;
1692
1693         if ((err = h264_slice_header_init(h, 1)) < 0) {
1694             av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed");
1695             return err;
1696         }
1697         context_reinitialized = 1;
1698
1699         /* update linesize on resize. The decoder doesn't
1700          * necessarily call h264_frame_start in the new thread */
1701         h->linesize   = h1->linesize;
1702         h->uvlinesize = h1->uvlinesize;
1703
1704         /* copy block_offset since frame_start may not be called */
1705         memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
1706     }
1707
1708     if (!inited) {
1709         for (i = 0; i < MAX_SPS_COUNT; i++)
1710             av_freep(h->sps_buffers + i);
1711
1712         for (i = 0; i < MAX_PPS_COUNT; i++)
1713             av_freep(h->pps_buffers + i);
1714
1715         memcpy(h, h1, sizeof(*h1));
1716         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1717         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1718         memset(&h->er, 0, sizeof(h->er));
1719         memset(&h->me, 0, sizeof(h->me));
1720         memset(&h->mb, 0, sizeof(h->mb));
1721         memset(&h->mb_luma_dc, 0, sizeof(h->mb_luma_dc));
1722         memset(&h->mb_padding, 0, sizeof(h->mb_padding));
1723         h->context_initialized = 0;
1724
1725         memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1726         av_frame_unref(&h->cur_pic.f);
1727         h->cur_pic.tf.f = &h->cur_pic.f;
1728
1729         h->avctx             = dst;
1730         h->DPB               = NULL;
1731         h->qscale_table_pool = NULL;
1732         h->mb_type_pool      = NULL;
1733         h->ref_index_pool    = NULL;
1734         h->motion_val_pool   = NULL;
1735
1736         ret = ff_h264_alloc_tables(h);
1737         if (ret < 0) {
1738             av_log(dst, AV_LOG_ERROR, "Could not allocate memory\n");
1739             return ret;
1740         }
1741         ret = context_init(h);
1742         if (ret < 0) {
1743             av_log(dst, AV_LOG_ERROR, "context_init() failed.\n");
1744             return ret;
1745         }
1746
1747         for (i = 0; i < 2; i++) {
1748             h->rbsp_buffer[i]      = NULL;
1749             h->rbsp_buffer_size[i] = 0;
1750         }
1751         h->bipred_scratchpad = NULL;
1752         h->edge_emu_buffer   = NULL;
1753
1754         h->thread_context[0] = h;
1755
1756         h->context_initialized = 1;
1757     }
1758
1759     h->avctx->coded_height  = h1->avctx->coded_height;
1760     h->avctx->coded_width   = h1->avctx->coded_width;
1761     h->avctx->width         = h1->avctx->width;
1762     h->avctx->height        = h1->avctx->height;
1763     h->coded_picture_number = h1->coded_picture_number;
1764     h->first_field          = h1->first_field;
1765     h->picture_structure    = h1->picture_structure;
1766     h->qscale               = h1->qscale;
1767     h->droppable            = h1->droppable;
1768     h->low_delay            = h1->low_delay;
1769
1770     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1771         unref_picture(h, &h->DPB[i]);
1772         if (h1->DPB[i].f.buf[0] &&
1773             (ret = ref_picture(h, &h->DPB[i], &h1->DPB[i])) < 0)
1774             return ret;
1775     }
1776
1777     h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1);
1778     unref_picture(h, &h->cur_pic);
1779     if ((ret = ref_picture(h, &h->cur_pic, &h1->cur_pic)) < 0)
1780         return ret;
1781
1782     h->workaround_bugs = h1->workaround_bugs;
1783     h->low_delay       = h1->low_delay;
1784     h->droppable       = h1->droppable;
1785
1786     /* frame_start may not be called for the next thread (if it's decoding
1787      * a bottom field) so this has to be allocated here */
1788     err = alloc_scratch_buffers(h, h1->linesize);
1789     if (err < 0)
1790         return err;
1791
1792     // extradata/NAL handling
1793     h->is_avc = h1->is_avc;
1794
1795     // SPS/PPS
1796     if ((ret = copy_parameter_set((void **)h->sps_buffers,
1797                                   (void **)h1->sps_buffers,
1798                                   MAX_SPS_COUNT, sizeof(SPS))) < 0)
1799         return ret;
1800     h->sps = h1->sps;
1801     if ((ret = copy_parameter_set((void **)h->pps_buffers,
1802                                   (void **)h1->pps_buffers,
1803                                   MAX_PPS_COUNT, sizeof(PPS))) < 0)
1804         return ret;
1805     h->pps = h1->pps;
1806
1807     // Dequantization matrices
1808     // FIXME these are big - can they be only copied when PPS changes?
1809     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
1810
1811     for (i = 0; i < 6; i++)
1812         h->dequant4_coeff[i] = h->dequant4_buffer[0] +
1813                                (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
1814
1815     for (i = 0; i < 6; i++)
1816         h->dequant8_coeff[i] = h->dequant8_buffer[0] +
1817                                (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
1818
1819     h->dequant_coeff_pps = h1->dequant_coeff_pps;
1820
1821     // POC timing
1822     copy_fields(h, h1, poc_lsb, redundant_pic_count);
1823
1824     // reference lists
1825     copy_fields(h, h1, short_ref, cabac_init_idc);
1826
1827     copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1);
1828     copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1);
1829     copy_picture_range(h->delayed_pic, h1->delayed_pic,
1830                        MAX_DELAYED_PIC_COUNT + 2, h, h1);
1831
1832     h->last_slice_type = h1->last_slice_type;
1833
1834     if (context_reinitialized)
1835         h264_set_parameter_from_sps(h);
1836
1837     if (!h->cur_pic_ptr)
1838         return 0;
1839
1840     if (!h->droppable) {
1841         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
1842         h->prev_poc_msb = h->poc_msb;
1843         h->prev_poc_lsb = h->poc_lsb;
1844     }
1845     h->prev_frame_num_offset = h->frame_num_offset;
1846     h->prev_frame_num        = h->frame_num;
1847     h->outputed_poc          = h->next_outputed_poc;
1848
1849     h->recovery_frame        = h1->recovery_frame;
1850     h->frame_recovered       = h1->frame_recovered;
1851
1852     return err;
1853 }
1854
1855 static int h264_frame_start(H264Context *h)
1856 {
1857     H264Picture *pic;
1858     int i, ret;
1859     const int pixel_shift = h->pixel_shift;
1860
1861     release_unused_pictures(h, 1);
1862     h->cur_pic_ptr = NULL;
1863
1864     i = find_unused_picture(h);
1865     if (i < 0) {
1866         av_log(h->avctx, AV_LOG_ERROR, "no frame buffer available\n");
1867         return i;
1868     }
1869     pic = &h->DPB[i];
1870
1871     pic->reference              = h->droppable ? 0 : h->picture_structure;
1872     pic->f.coded_picture_number = h->coded_picture_number++;
1873     pic->field_picture          = h->picture_structure != PICT_FRAME;
1874     /*
1875      * Zero key_frame here; IDR markings per slice in frame or fields are ORed
1876      * in later.
1877      * See decode_nal_units().
1878      */
1879     pic->f.key_frame = 0;
1880     pic->mmco_reset  = 0;
1881     pic->recovered   = 0;
1882
1883     if ((ret = alloc_picture(h, pic)) < 0)
1884         return ret;
1885
1886     h->cur_pic_ptr = pic;
1887     unref_picture(h, &h->cur_pic);
1888     if ((ret = ref_picture(h, &h->cur_pic, h->cur_pic_ptr)) < 0)
1889         return ret;
1890
1891     if (CONFIG_ERROR_RESILIENCE)
1892         ff_er_frame_start(&h->er);
1893
1894     assert(h->linesize && h->uvlinesize);
1895
1896     for (i = 0; i < 16; i++) {
1897         h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1898         h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1899     }
1900     for (i = 0; i < 16; i++) {
1901         h->block_offset[16 + i]      =
1902         h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1903         h->block_offset[48 + 16 + i] =
1904         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1905     }
1906
1907     /* can't be in alloc_tables because linesize isn't known there.
1908      * FIXME: redo bipred weight to not require extra buffer? */
1909     for (i = 0; i < h->slice_context_count; i++)
1910         if (h->thread_context[i]) {
1911             ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
1912             if (ret < 0)
1913                 return ret;
1914         }
1915
1916     /* Some macroblocks can be accessed before they're available in case
1917      * of lost slices, MBAFF or threading. */
1918     memset(h->slice_table, -1,
1919            (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
1920
1921     // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1922     //             s->current_picture.f.reference /* || h->contains_intra */ || 1;
1923
1924     /* We mark the current picture as non-reference after allocating it, so
1925      * that if we break out due to an error it can be released automatically
1926      * in the next ff_MPV_frame_start().
1927      */
1928     h->cur_pic_ptr->reference = 0;
1929
1930     h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX;
1931
1932     h->next_output_pic = NULL;
1933
1934     assert(h->cur_pic_ptr->long_ref == 0);
1935
1936     return 0;
1937 }
1938
1939 /**
1940  * Run setup operations that must be run after slice header decoding.
1941  * This includes finding the next displayed frame.
1942  *
1943  * @param h h264 master context
1944  * @param setup_finished enough NALs have been read that we can call
1945  * ff_thread_finish_setup()
1946  */
1947 static void decode_postinit(H264Context *h, int setup_finished)
1948 {
1949     H264Picture *out = h->cur_pic_ptr;
1950     H264Picture *cur = h->cur_pic_ptr;
1951     int i, pics, out_of_order, out_idx;
1952     int invalid = 0, cnt = 0;
1953
1954     h->cur_pic_ptr->f.pict_type = h->pict_type;
1955
1956     if (h->next_output_pic)
1957         return;
1958
1959     if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) {
1960         /* FIXME: if we have two PAFF fields in one packet, we can't start
1961          * the next thread here. If we have one field per packet, we can.
1962          * The check in decode_nal_units() is not good enough to find this
1963          * yet, so we assume the worst for now. */
1964         // if (setup_finished)
1965         //    ff_thread_finish_setup(h->avctx);
1966         return;
1967     }
1968
1969     cur->f.interlaced_frame = 0;
1970     cur->f.repeat_pict      = 0;
1971
1972     /* Signal interlacing information externally. */
1973     /* Prioritize picture timing SEI information over used
1974      * decoding process if it exists. */
1975
1976     if (h->sps.pic_struct_present_flag) {
1977         switch (h->sei_pic_struct) {
1978         case SEI_PIC_STRUCT_FRAME:
1979             break;
1980         case SEI_PIC_STRUCT_TOP_FIELD:
1981         case SEI_PIC_STRUCT_BOTTOM_FIELD:
1982             cur->f.interlaced_frame = 1;
1983             break;
1984         case SEI_PIC_STRUCT_TOP_BOTTOM:
1985         case SEI_PIC_STRUCT_BOTTOM_TOP:
1986             if (FIELD_OR_MBAFF_PICTURE(h))
1987                 cur->f.interlaced_frame = 1;
1988             else
1989                 // try to flag soft telecine progressive
1990                 cur->f.interlaced_frame = h->prev_interlaced_frame;
1991             break;
1992         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
1993         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
1994             /* Signal the possibility of telecined film externally
1995              * (pic_struct 5,6). From these hints, let the applications
1996              * decide if they apply deinterlacing. */
1997             cur->f.repeat_pict = 1;
1998             break;
1999         case SEI_PIC_STRUCT_FRAME_DOUBLING:
2000             cur->f.repeat_pict = 2;
2001             break;
2002         case SEI_PIC_STRUCT_FRAME_TRIPLING:
2003             cur->f.repeat_pict = 4;
2004             break;
2005         }
2006
2007         if ((h->sei_ct_type & 3) &&
2008             h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
2009             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
2010     } else {
2011         /* Derive interlacing flag from used decoding process. */
2012         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE(h);
2013     }
2014     h->prev_interlaced_frame = cur->f.interlaced_frame;
2015
2016     if (cur->field_poc[0] != cur->field_poc[1]) {
2017         /* Derive top_field_first from field pocs. */
2018         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
2019     } else {
2020         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
2021             /* Use picture timing SEI information. Even if it is a
2022              * information of a past frame, better than nothing. */
2023             if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ||
2024                 h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
2025                 cur->f.top_field_first = 1;
2026             else
2027                 cur->f.top_field_first = 0;
2028         } else {
2029             /* Most likely progressive */
2030             cur->f.top_field_first = 0;
2031         }
2032     }
2033
2034     if (h->sei_frame_packing_present &&
2035         h->frame_packing_arrangement_type >= 0 &&
2036         h->frame_packing_arrangement_type <= 6 &&
2037         h->content_interpretation_type > 0 &&
2038         h->content_interpretation_type < 3) {
2039         AVStereo3D *stereo = av_stereo3d_create_side_data(&cur->f);
2040         if (!stereo)
2041             return;
2042
2043         switch (h->frame_packing_arrangement_type) {
2044         case 0:
2045             stereo->type = AV_STEREO3D_CHECKERBOARD;
2046             break;
2047         case 1:
2048             stereo->type = AV_STEREO3D_LINES;
2049             break;
2050         case 2:
2051             stereo->type = AV_STEREO3D_COLUMNS;
2052             break;
2053         case 3:
2054             if (h->quincunx_subsampling)
2055                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2056             else
2057                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2058             break;
2059         case 4:
2060             stereo->type = AV_STEREO3D_TOPBOTTOM;
2061             break;
2062         case 5:
2063             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2064             break;
2065         case 6:
2066             stereo->type = AV_STEREO3D_2D;
2067             break;
2068         }
2069
2070         if (h->content_interpretation_type == 2)
2071             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2072     }
2073
2074     // FIXME do something with unavailable reference frames
2075
2076     /* Sort B-frames into display order */
2077
2078     if (h->sps.bitstream_restriction_flag &&
2079         h->avctx->has_b_frames < h->sps.num_reorder_frames) {
2080         h->avctx->has_b_frames = h->sps.num_reorder_frames;
2081         h->low_delay           = 0;
2082     }
2083
2084     if (h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT &&
2085         !h->sps.bitstream_restriction_flag) {
2086         h->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
2087         h->low_delay           = 0;
2088     }
2089
2090     pics = 0;
2091     while (h->delayed_pic[pics])
2092         pics++;
2093
2094     assert(pics <= MAX_DELAYED_PIC_COUNT);
2095
2096     h->delayed_pic[pics++] = cur;
2097     if (cur->reference == 0)
2098         cur->reference = DELAYED_PIC_REF;
2099
2100     /* Frame reordering. This code takes pictures from coding order and sorts
2101      * them by their incremental POC value into display order. It supports POC
2102      * gaps, MMCO reset codes and random resets.
2103      * A "display group" can start either with a IDR frame (f.key_frame = 1),
2104      * and/or can be closed down with a MMCO reset code. In sequences where
2105      * there is no delay, we can't detect that (since the frame was already
2106      * output to the user), so we also set h->mmco_reset to detect the MMCO
2107      * reset code.
2108      * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames),
2109      * we increase the delay between input and output. All frames affected by
2110      * the lag (e.g. those that should have been output before another frame
2111      * that we already returned to the user) will be dropped. This is a bug
2112      * that we will fix later. */
2113     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
2114         cnt     += out->poc < h->last_pocs[i];
2115         invalid += out->poc == INT_MIN;
2116     }
2117     if (!h->mmco_reset && !cur->f.key_frame &&
2118         cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
2119         h->mmco_reset = 2;
2120         if (pics > 1)
2121             h->delayed_pic[pics - 2]->mmco_reset = 2;
2122     }
2123     if (h->mmco_reset || cur->f.key_frame) {
2124         for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2125             h->last_pocs[i] = INT_MIN;
2126         cnt     = 0;
2127         invalid = MAX_DELAYED_PIC_COUNT;
2128     }
2129     out     = h->delayed_pic[0];
2130     out_idx = 0;
2131     for (i = 1; i < MAX_DELAYED_PIC_COUNT &&
2132                 h->delayed_pic[i] &&
2133                 !h->delayed_pic[i - 1]->mmco_reset &&
2134                 !h->delayed_pic[i]->f.key_frame;
2135          i++)
2136         if (h->delayed_pic[i]->poc < out->poc) {
2137             out     = h->delayed_pic[i];
2138             out_idx = i;
2139         }
2140     if (h->avctx->has_b_frames == 0 &&
2141         (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
2142         h->next_outputed_poc = INT_MIN;
2143     out_of_order = !out->f.key_frame && !h->mmco_reset &&
2144                    (out->poc < h->next_outputed_poc);
2145
2146     if (h->sps.bitstream_restriction_flag &&
2147         h->avctx->has_b_frames >= h->sps.num_reorder_frames) {
2148     } else if (out_of_order && pics - 1 == h->avctx->has_b_frames &&
2149                h->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
2150         if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
2151             h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, cnt);
2152         }
2153         h->low_delay = 0;
2154     } else if (h->low_delay &&
2155                ((h->next_outputed_poc != INT_MIN &&
2156                  out->poc > h->next_outputed_poc + 2) ||
2157                 cur->f.pict_type == AV_PICTURE_TYPE_B)) {
2158         h->low_delay = 0;
2159         h->avctx->has_b_frames++;
2160     }
2161
2162     if (pics > h->avctx->has_b_frames) {
2163         out->reference &= ~DELAYED_PIC_REF;
2164         // for frame threading, the owner must be the second field's thread or
2165         // else the first thread can release the picture and reuse it unsafely
2166         for (i = out_idx; h->delayed_pic[i]; i++)
2167             h->delayed_pic[i] = h->delayed_pic[i + 1];
2168     }
2169     memmove(h->last_pocs, &h->last_pocs[1],
2170             sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
2171     h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
2172     if (!out_of_order && pics > h->avctx->has_b_frames) {
2173         h->next_output_pic = out;
2174         if (out->mmco_reset) {
2175             if (out_idx > 0) {
2176                 h->next_outputed_poc                    = out->poc;
2177                 h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
2178             } else {
2179                 h->next_outputed_poc = INT_MIN;
2180             }
2181         } else {
2182             if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
2183                 h->next_outputed_poc = INT_MIN;
2184             } else {
2185                 h->next_outputed_poc = out->poc;
2186             }
2187         }
2188         h->mmco_reset = 0;
2189     } else {
2190         av_log(h->avctx, AV_LOG_DEBUG, "no picture\n");
2191     }
2192
2193     if (h->next_output_pic) {
2194         if (h->next_output_pic->recovered) {
2195             // We have reached an recovery point and all frames after it in
2196             // display order are "recovered".
2197             h->frame_recovered |= FRAME_RECOVERED_SEI;
2198         }
2199         h->next_output_pic->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_SEI);
2200     }
2201
2202     if (setup_finished && !h->avctx->hwaccel)
2203         ff_thread_finish_setup(h->avctx);
2204 }
2205
2206 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
2207                                               uint8_t *src_cb, uint8_t *src_cr,
2208                                               int linesize, int uvlinesize,
2209                                               int simple)
2210 {
2211     uint8_t *top_border;
2212     int top_idx = 1;
2213     const int pixel_shift = h->pixel_shift;
2214     int chroma444 = CHROMA444(h);
2215     int chroma422 = CHROMA422(h);
2216
2217     src_y  -= linesize;
2218     src_cb -= uvlinesize;
2219     src_cr -= uvlinesize;
2220
2221     if (!simple && FRAME_MBAFF(h)) {
2222         if (h->mb_y & 1) {
2223             if (!MB_MBAFF(h)) {
2224                 top_border = h->top_borders[0][h->mb_x];
2225                 AV_COPY128(top_border, src_y + 15 * linesize);
2226                 if (pixel_shift)
2227                     AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
2228                 if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
2229                     if (chroma444) {
2230                         if (pixel_shift) {
2231                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
2232                             AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16);
2233                             AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize);
2234                             AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16);
2235                         } else {
2236                             AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize);
2237                             AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize);
2238                         }
2239                     } else if (chroma422) {
2240                         if (pixel_shift) {
2241                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
2242                             AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize);
2243                         } else {
2244                             AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize);
2245                             AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize);
2246                         }
2247                     } else {
2248                         if (pixel_shift) {
2249                             AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize);
2250                             AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize);
2251                         } else {
2252                             AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
2253                             AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
2254                         }
2255                     }
2256                 }
2257             }
2258         } else if (MB_MBAFF(h)) {
2259             top_idx = 0;
2260         } else
2261             return;
2262     }
2263
2264     top_border = h->top_borders[top_idx][h->mb_x];
2265     /* There are two lines saved, the line above the top macroblock
2266      * of a pair, and the line above the bottom macroblock. */
2267     AV_COPY128(top_border, src_y + 16 * linesize);
2268     if (pixel_shift)
2269         AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);
2270
2271     if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
2272         if (chroma444) {
2273             if (pixel_shift) {
2274                 AV_COPY128(top_border + 32, src_cb + 16 * linesize);
2275                 AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16);
2276                 AV_COPY128(top_border + 64, src_cr + 16 * linesize);
2277                 AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16);
2278             } else {
2279                 AV_COPY128(top_border + 16, src_cb + 16 * linesize);
2280                 AV_COPY128(top_border + 32, src_cr + 16 * linesize);
2281             }
2282         } else if (chroma422) {
2283             if (pixel_shift) {
2284                 AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize);
2285                 AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize);
2286             } else {
2287                 AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize);
2288                 AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize);
2289             }
2290         } else {
2291             if (pixel_shift) {
2292                 AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize);
2293                 AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize);
2294             } else {
2295                 AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize);
2296                 AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize);
2297             }
2298         }
2299     }
2300 }
2301
2302 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
2303                                             uint8_t *src_cb, uint8_t *src_cr,
2304                                             int linesize, int uvlinesize,
2305                                             int xchg, int chroma444,
2306                                             int simple, int pixel_shift)
2307 {
2308     int deblock_topleft;
2309     int deblock_top;
2310     int top_idx = 1;
2311     uint8_t *top_border_m1;
2312     uint8_t *top_border;
2313
2314     if (!simple && FRAME_MBAFF(h)) {
2315         if (h->mb_y & 1) {
2316             if (!MB_MBAFF(h))
2317                 return;
2318         } else {
2319             top_idx = MB_MBAFF(h) ? 0 : 1;
2320         }
2321     }
2322
2323     if (h->deblocking_filter == 2) {
2324         deblock_topleft = h->slice_table[h->mb_xy - 1 - h->mb_stride] == h->slice_num;
2325         deblock_top     = h->top_type;
2326     } else {
2327         deblock_topleft = (h->mb_x > 0);
2328         deblock_top     = (h->mb_y > !!MB_FIELD(h));
2329     }
2330
2331     src_y  -= linesize   + 1 + pixel_shift;
2332     src_cb -= uvlinesize + 1 + pixel_shift;
2333     src_cr -= uvlinesize + 1 + pixel_shift;
2334
2335     top_border_m1 = h->top_borders[top_idx][h->mb_x - 1];
2336     top_border    = h->top_borders[top_idx][h->mb_x];
2337
2338 #define XCHG(a, b, xchg)                        \
2339     if (pixel_shift) {                          \
2340         if (xchg) {                             \
2341             AV_SWAP64(b + 0, a + 0);            \
2342             AV_SWAP64(b + 8, a + 8);            \
2343         } else {                                \
2344             AV_COPY128(b, a);                   \
2345         }                                       \
2346     } else if (xchg)                            \
2347         AV_SWAP64(b, a);                        \
2348     else                                        \
2349         AV_COPY64(b, a);
2350
2351     if (deblock_top) {
2352         if (deblock_topleft) {
2353             XCHG(top_border_m1 + (8 << pixel_shift),
2354                  src_y - (7 << pixel_shift), 1);
2355         }
2356         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
2357         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
2358         if (h->mb_x + 1 < h->mb_width) {
2359             XCHG(h->top_borders[top_idx][h->mb_x + 1],
2360                  src_y + (17 << pixel_shift), 1);
2361         }
2362     }
2363     if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
2364         if (chroma444) {
2365             if (deblock_top) {
2366                 if (deblock_topleft) {
2367                     XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
2368                     XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
2369                 }
2370                 XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
2371                 XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
2372                 XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
2373                 XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
2374                 if (h->mb_x + 1 < h->mb_width) {
2375                     XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
2376                     XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
2377                 }
2378             }
2379         } else {
2380             if (deblock_top) {
2381                 if (deblock_topleft) {
2382                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
2383                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
2384                 }
2385                 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
2386                 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
2387             }
2388         }
2389     }
2390 }
2391
2392 static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
2393                                         int index)
2394 {
2395     if (high_bit_depth) {
2396         return AV_RN32A(((int32_t *)mb) + index);
2397     } else
2398         return AV_RN16A(mb + index);
2399 }
2400
2401 static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
2402                                          int index, int value)
2403 {
2404     if (high_bit_depth) {
2405         AV_WN32A(((int32_t *)mb) + index, value);
2406     } else
2407         AV_WN16A(mb + index, value);
2408 }
2409
2410 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
2411                                                        int mb_type, int is_h264,
2412                                                        int simple,
2413                                                        int transform_bypass,
2414                                                        int pixel_shift,
2415                                                        int *block_offset,
2416                                                        int linesize,
2417                                                        uint8_t *dest_y, int p)
2418 {
2419     void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
2420     void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
2421     int i;
2422     int qscale = p == 0 ? h->qscale : h->chroma_qp[p - 1];
2423     block_offset += 16 * p;
2424     if (IS_INTRA4x4(mb_type)) {
2425         if (IS_8x8DCT(mb_type)) {
2426             if (transform_bypass) {
2427                 idct_dc_add =
2428                 idct_add    = h->h264dsp.h264_add_pixels8_clear;
2429             } else {
2430                 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
2431                 idct_add    = h->h264dsp.h264_idct8_add;
2432             }
2433             for (i = 0; i < 16; i += 4) {
2434                 uint8_t *const ptr = dest_y + block_offset[i];
2435                 const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
2436                 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
2437                     h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2438                 } else {
2439                     const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
2440                     h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
2441                                          (h->topright_samples_available << i) & 0x4000, linesize);
2442                     if (nnz) {
2443                         if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2444                             idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2445                         else
2446                             idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2447                     }
2448                 }
2449             }
2450         } else {
2451             if (transform_bypass) {
2452                 idct_dc_add  =
2453                 idct_add     = h->h264dsp.h264_add_pixels4_clear;
2454             } else {
2455                 idct_dc_add = h->h264dsp.h264_idct_dc_add;
2456                 idct_add    = h->h264dsp.h264_idct_add;
2457             }
2458             for (i = 0; i < 16; i++) {
2459                 uint8_t *const ptr = dest_y + block_offset[i];
2460                 const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
2461
2462                 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
2463                     h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2464                 } else {
2465                     uint8_t *topright;
2466                     int nnz, tr;
2467                     uint64_t tr_high;
2468                     if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
2469                         const int topright_avail = (h->topright_samples_available << i) & 0x8000;
2470                         assert(h->mb_y || linesize <= block_offset[i]);
2471                         if (!topright_avail) {
2472                             if (pixel_shift) {
2473                                 tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
2474                                 topright = (uint8_t *)&tr_high;
2475                             } else {
2476                                 tr       = ptr[3 - linesize] * 0x01010101u;
2477                                 topright = (uint8_t *)&tr;
2478                             }
2479                         } else
2480                             topright = ptr + (4 << pixel_shift) - linesize;
2481                     } else
2482                         topright = NULL;
2483
2484                     h->hpc.pred4x4[dir](ptr, topright, linesize);
2485                     nnz = h->non_zero_count_cache[scan8[i + p * 16]];
2486                     if (nnz) {
2487                         if (is_h264) {
2488                             if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2489                                 idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2490                             else
2491                                 idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
2492                         } else if (CONFIG_SVQ3_DECODER)
2493                             ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0);
2494                     }
2495                 }
2496             }
2497         }
2498     } else {
2499         h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize);
2500         if (is_h264) {
2501             if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
2502                 if (!transform_bypass)
2503                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift),
2504                                                          h->mb_luma_dc[p],
2505                                                          h->dequant4_coeff[p][qscale][0]);
2506                 else {
2507                     static const uint8_t dc_mapping[16] = {
2508                          0 * 16,  1 * 16,  4 * 16,  5 * 16,
2509                          2 * 16,  3 * 16,  6 * 16,  7 * 16,
2510                          8 * 16,  9 * 16, 12 * 16, 13 * 16,
2511                         10 * 16, 11 * 16, 14 * 16, 15 * 16
2512                     };
2513                     for (i = 0; i < 16; i++)
2514                         dctcoef_set(h->mb + (p * 256 << pixel_shift),
2515                                     pixel_shift, dc_mapping[i],
2516                                     dctcoef_get(h->mb_luma_dc[p],
2517                                                 pixel_shift, i));
2518                 }
2519             }
2520         } else if (CONFIG_SVQ3_DECODER)
2521             ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256,
2522                                            h->mb_luma_dc[p], qscale);
2523     }
2524 }
2525
2526 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
2527                                                     int is_h264, int simple,
2528                                                     int transform_bypass,
2529                                                     int pixel_shift,
2530                                                     int *block_offset,
2531                                                     int linesize,
2532                                                     uint8_t *dest_y, int p)
2533 {
2534     void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
2535     int i;
2536     block_offset += 16 * p;
2537     if (!IS_INTRA4x4(mb_type)) {
2538         if (is_h264) {
2539             if (IS_INTRA16x16(mb_type)) {
2540                 if (transform_bypass) {
2541                     if (h->sps.profile_idc == 244 &&
2542                         (h->intra16x16_pred_mode == VERT_PRED8x8 ||
2543                          h->intra16x16_pred_mode == HOR_PRED8x8)) {
2544                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset,
2545                                                                       h->mb + (p * 256 << pixel_shift),
2546                                                                       linesize);
2547                     } else {
2548                         for (i = 0; i < 16; i++)
2549                             if (h->non_zero_count_cache[scan8[i + p * 16]] ||
2550                                 dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2551                                 h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i],
2552                                                                   h->mb + (i * 16 + p * 256 << pixel_shift),
2553                                                                   linesize);
2554                     }
2555                 } else {
2556                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
2557                                                     h->mb + (p * 256 << pixel_shift),
2558                                                     linesize,
2559                                                     h->non_zero_count_cache + p * 5 * 8);
2560                 }
2561             } else if (h->cbp & 15) {
2562                 if (transform_bypass) {
2563                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2564                     idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear
2565                                                   : h->h264dsp.h264_add_pixels4_clear;
2566                     for (i = 0; i < 16; i += di)
2567                         if (h->non_zero_count_cache[scan8[i + p * 16]])
2568                             idct_add(dest_y + block_offset[i],
2569                                      h->mb + (i * 16 + p * 256 << pixel_shift),
2570                                      linesize);
2571                 } else {
2572                     if (IS_8x8DCT(mb_type))
2573                         h->h264dsp.h264_idct8_add4(dest_y, block_offset,
2574                                                    h->mb + (p * 256 << pixel_shift),
2575                                                    linesize,
2576                                                    h->non_zero_count_cache + p * 5 * 8);
2577                     else
2578                         h->h264dsp.h264_idct_add16(dest_y, block_offset,
2579                                                    h->mb + (p * 256 << pixel_shift),
2580                                                    linesize,
2581                                                    h->non_zero_count_cache + p * 5 * 8);
2582                 }
2583             }
2584         } else if (CONFIG_SVQ3_DECODER) {
2585             for (i = 0; i < 16; i++)
2586                 if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) {
2587                     // FIXME benchmark weird rule, & below
2588                     uint8_t *const ptr = dest_y + block_offset[i];
2589                     ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize,
2590                                        h->qscale, IS_INTRA(mb_type) ? 1 : 0);
2591                 }
2592         }
2593     }
2594 }
2595
2596 #define BITS   8
2597 #define SIMPLE 1
2598 #include "h264_mb_template.c"
2599
2600 #undef  BITS
2601 #define BITS   16
2602 #include "h264_mb_template.c"
2603
2604 #undef  SIMPLE
2605 #define SIMPLE 0
2606 #include "h264_mb_template.c"
2607
2608 void ff_h264_hl_decode_mb(H264Context *h)
2609 {
2610     const int mb_xy   = h->mb_xy;
2611     const int mb_type = h->cur_pic.mb_type[mb_xy];
2612     int is_complex    = CONFIG_SMALL || h->is_complex ||
2613                         IS_INTRA_PCM(mb_type) || h->qscale == 0;
2614
2615     if (CHROMA444(h)) {
2616         if (is_complex || h->pixel_shift)
2617             hl_decode_mb_444_complex(h);
2618         else
2619             hl_decode_mb_444_simple_8(h);
2620     } else if (is_complex) {
2621         hl_decode_mb_complex(h);
2622     } else if (h->pixel_shift) {
2623         hl_decode_mb_simple_16(h);
2624     } else
2625         hl_decode_mb_simple_8(h);
2626 }
2627
2628 int ff_pred_weight_table(H264Context *h)
2629 {
2630     int list, i;
2631     int luma_def, chroma_def;
2632
2633     h->use_weight             = 0;
2634     h->use_weight_chroma      = 0;
2635     h->luma_log2_weight_denom = get_ue_golomb(&h->gb);
2636     if (h->sps.chroma_format_idc)
2637         h->chroma_log2_weight_denom = get_ue_golomb(&h->gb);
2638     luma_def   = 1 << h->luma_log2_weight_denom;
2639     chroma_def = 1 << h->chroma_log2_weight_denom;
2640
2641     for (list = 0; list < 2; list++) {
2642         h->luma_weight_flag[list]   = 0;
2643         h->chroma_weight_flag[list] = 0;
2644         for (i = 0; i < h->ref_count[list]; i++) {
2645             int luma_weight_flag, chroma_weight_flag;
2646
2647             luma_weight_flag = get_bits1(&h->gb);
2648             if (luma_weight_flag) {
2649                 h->luma_weight[i][list][0] = get_se_golomb(&h->gb);
2650                 h->luma_weight[i][list][1] = get_se_golomb(&h->gb);
2651                 if (h->luma_weight[i][list][0] != luma_def ||
2652                     h->luma_weight[i][list][1] != 0) {
2653                     h->use_weight             = 1;
2654                     h->luma_weight_flag[list] = 1;
2655                 }
2656             } else {
2657                 h->luma_weight[i][list][0] = luma_def;
2658                 h->luma_weight[i][list][1] = 0;
2659             }
2660
2661             if (h->sps.chroma_format_idc) {
2662                 chroma_weight_flag = get_bits1(&h->gb);
2663                 if (chroma_weight_flag) {
2664                     int j;
2665                     for (j = 0; j < 2; j++) {
2666                         h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
2667                         h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
2668                         if (h->chroma_weight[i][list][j][0] != chroma_def ||
2669                             h->chroma_weight[i][list][j][1] != 0) {
2670                             h->use_weight_chroma        = 1;
2671                             h->chroma_weight_flag[list] = 1;
2672                         }
2673                     }
2674                 } else {
2675                     int j;
2676                     for (j = 0; j < 2; j++) {
2677                         h->chroma_weight[i][list][j][0] = chroma_def;
2678                         h->chroma_weight[i][list][j][1] = 0;
2679                     }
2680                 }
2681             }
2682         }
2683         if (h->slice_type_nos != AV_PICTURE_TYPE_B)
2684             break;
2685     }
2686     h->use_weight = h->use_weight || h->use_weight_chroma;
2687     return 0;
2688 }
2689
2690 /**
2691  * Initialize implicit_weight table.
2692  * @param field  0/1 initialize the weight for interlaced MBAFF
2693  *                -1 initializes the rest
2694  */
2695 static void implicit_weight_table(H264Context *h, int field)
2696 {
2697     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
2698
2699     for (i = 0; i < 2; i++) {
2700         h->luma_weight_flag[i]   = 0;
2701         h->chroma_weight_flag[i] = 0;
2702     }
2703
2704     if (field < 0) {
2705         if (h->picture_structure == PICT_FRAME) {
2706             cur_poc = h->cur_pic_ptr->poc;
2707         } else {
2708             cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1];
2709         }
2710         if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) &&
2711             h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
2712             h->use_weight        = 0;
2713             h->use_weight_chroma = 0;
2714             return;
2715         }
2716         ref_start  = 0;
2717         ref_count0 = h->ref_count[0];
2718         ref_count1 = h->ref_count[1];
2719     } else {
2720         cur_poc    = h->cur_pic_ptr->field_poc[field];
2721         ref_start  = 16;
2722         ref_count0 = 16 + 2 * h->ref_count[0];
2723         ref_count1 = 16 + 2 * h->ref_count[1];
2724     }
2725
2726     h->use_weight               = 2;
2727     h->use_weight_chroma        = 2;
2728     h->luma_log2_weight_denom   = 5;
2729     h->chroma_log2_weight_denom = 5;
2730
2731     for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
2732         int poc0 = h->ref_list[0][ref0].poc;
2733         for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
2734             int w = 32;
2735             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
2736                 int poc1 = h->ref_list[1][ref1].poc;
2737                 int td   = av_clip(poc1 - poc0, -128, 127);
2738                 if (td) {
2739                     int tb = av_clip(cur_poc - poc0, -128, 127);
2740                     int tx = (16384 + (FFABS(td) >> 1)) / td;
2741                     int dist_scale_factor = (tb * tx + 32) >> 8;
2742                     if (dist_scale_factor >= -64 && dist_scale_factor <= 128)
2743                         w = 64 - dist_scale_factor;
2744                 }
2745             }
2746             if (field < 0) {
2747                 h->implicit_weight[ref0][ref1][0] =
2748                 h->implicit_weight[ref0][ref1][1] = w;
2749             } else {
2750                 h->implicit_weight[ref0][ref1][field] = w;
2751             }
2752         }
2753     }
2754 }
2755
2756 /**
2757  * instantaneous decoder refresh.
2758  */
2759 static void idr(H264Context *h)
2760 {
2761     ff_h264_remove_all_refs(h);
2762     h->prev_frame_num        = 0;
2763     h->prev_frame_num_offset = 0;
2764     h->prev_poc_msb          =
2765     h->prev_poc_lsb          = 0;
2766 }
2767
2768 /* forget old pics after a seek */
2769 static void flush_change(H264Context *h)
2770 {
2771     int i;
2772     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2773         h->last_pocs[i] = INT_MIN;
2774     h->outputed_poc          = h->next_outputed_poc = INT_MIN;
2775     h->prev_interlaced_frame = 1;
2776     idr(h);
2777     if (h->cur_pic_ptr)
2778         h->cur_pic_ptr->reference = 0;
2779     h->first_field = 0;
2780     memset(h->ref_list[0], 0, sizeof(h->ref_list[0]));
2781     memset(h->ref_list[1], 0, sizeof(h->ref_list[1]));
2782     memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0]));
2783     memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1]));
2784     ff_h264_reset_sei(h);
2785     h->recovery_frame = -1;
2786     h->frame_recovered = 0;
2787 }
2788
2789 /* forget old pics after a seek */
2790 static void flush_dpb(AVCodecContext *avctx)
2791 {
2792     H264Context *h = avctx->priv_data;
2793     int i;
2794
2795     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
2796         if (h->delayed_pic[i])
2797             h->delayed_pic[i]->reference = 0;
2798         h->delayed_pic[i] = NULL;
2799     }
2800
2801     flush_change(h);
2802
2803     if (h->DPB)
2804         for (i = 0; i < MAX_PICTURE_COUNT; i++)
2805             unref_picture(h, &h->DPB[i]);
2806     h->cur_pic_ptr = NULL;
2807     unref_picture(h, &h->cur_pic);
2808
2809     h->mb_x = h->mb_y = 0;
2810
2811     h->parse_context.state             = -1;
2812     h->parse_context.frame_start_found = 0;
2813     h->parse_context.overread          = 0;
2814     h->parse_context.overread_index    = 0;
2815     h->parse_context.index             = 0;
2816     h->parse_context.last_index        = 0;
2817
2818     free_tables(h, 1);
2819     h->context_initialized = 0;
2820 }
2821
2822 int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc)
2823 {
2824     const int max_frame_num = 1 << h->sps.log2_max_frame_num;
2825     int field_poc[2];
2826
2827     h->frame_num_offset = h->prev_frame_num_offset;
2828     if (h->frame_num < h->prev_frame_num)
2829         h->frame_num_offset += max_frame_num;
2830
2831     if (h->sps.poc_type == 0) {
2832         const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
2833
2834         if (h->poc_lsb < h->prev_poc_lsb &&
2835             h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
2836             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2837         else if (h->poc_lsb > h->prev_poc_lsb &&
2838                  h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
2839             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2840         else
2841             h->poc_msb = h->prev_poc_msb;
2842         field_poc[0] =
2843         field_poc[1] = h->poc_msb + h->poc_lsb;
2844         if (h->picture_structure == PICT_FRAME)
2845             field_poc[1] += h->delta_poc_bottom;
2846     } else if (h->sps.poc_type == 1) {
2847         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2848         int i;
2849
2850         if (h->sps.poc_cycle_length != 0)
2851             abs_frame_num = h->frame_num_offset + h->frame_num;
2852         else
2853             abs_frame_num = 0;
2854
2855         if (h->nal_ref_idc == 0 && abs_frame_num > 0)
2856             abs_frame_num--;
2857
2858         expected_delta_per_poc_cycle = 0;
2859         for (i = 0; i < h->sps.poc_cycle_length; i++)
2860             // FIXME integrate during sps parse
2861             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
2862
2863         if (abs_frame_num > 0) {
2864             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2865             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2866
2867             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2868             for (i = 0; i <= frame_num_in_poc_cycle; i++)
2869                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
2870         } else
2871             expectedpoc = 0;
2872
2873         if (h->nal_ref_idc == 0)
2874             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2875
2876         field_poc[0] = expectedpoc + h->delta_poc[0];
2877         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2878
2879         if (h->picture_structure == PICT_FRAME)
2880             field_poc[1] += h->delta_poc[1];
2881     } else {
2882         int poc = 2 * (h->frame_num_offset + h->frame_num);
2883
2884         if (!h->nal_ref_idc)
2885             poc--;
2886
2887         field_poc[0] = poc;
2888         field_poc[1] = poc;
2889     }
2890
2891     if (h->picture_structure != PICT_BOTTOM_FIELD)
2892         pic_field_poc[0] = field_poc[0];
2893     if (h->picture_structure != PICT_TOP_FIELD)
2894         pic_field_poc[1] = field_poc[1];
2895     *pic_poc = FFMIN(pic_field_poc[0], pic_field_poc[1]);
2896
2897     return 0;
2898 }
2899
2900 /**
2901  * initialize scan tables
2902  */
2903 static void init_scan_tables(H264Context *h)
2904 {
2905     int i;
2906     for (i = 0; i < 16; i++) {
2907 #define TRANSPOSE(x) (x >> 2) | ((x << 2) & 0xF)
2908         h->zigzag_scan[i] = TRANSPOSE(zigzag_scan[i]);
2909         h->field_scan[i]  = TRANSPOSE(field_scan[i]);
2910 #undef TRANSPOSE
2911     }
2912     for (i = 0; i < 64; i++) {
2913 #define TRANSPOSE(x) (x >> 3) | ((x & 7) << 3)
2914         h->zigzag_scan8x8[i]       = TRANSPOSE(ff_zigzag_direct[i]);
2915         h->zigzag_scan8x8_cavlc[i] = TRANSPOSE(zigzag_scan8x8_cavlc[i]);
2916         h->field_scan8x8[i]        = TRANSPOSE(field_scan8x8[i]);
2917         h->field_scan8x8_cavlc[i]  = TRANSPOSE(field_scan8x8_cavlc[i]);
2918 #undef TRANSPOSE
2919     }
2920     if (h->sps.transform_bypass) { // FIXME same ugly
2921         h->zigzag_scan_q0          = zigzag_scan;
2922         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
2923         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
2924         h->field_scan_q0           = field_scan;
2925         h->field_scan8x8_q0        = field_scan8x8;
2926         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
2927     } else {
2928         h->zigzag_scan_q0          = h->zigzag_scan;
2929         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
2930         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
2931         h->field_scan_q0           = h->field_scan;
2932         h->field_scan8x8_q0        = h->field_scan8x8;
2933         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
2934     }
2935 }
2936
2937 #if CONFIG_ERROR_RESILIENCE
2938 static void h264_set_erpic(ERPicture *dst, H264Picture *src)
2939 {
2940     int i;
2941
2942     if (!src)
2943         return;
2944
2945     dst->f = &src->f;
2946     dst->tf = &src->tf;
2947
2948     for (i = 0; i < 2; i++) {
2949         dst->motion_val[i] = src->motion_val[i];
2950         dst->ref_index[i] = src->ref_index[i];
2951     }
2952
2953     dst->mb_type = src->mb_type;
2954     dst->field_picture = src->field_picture;
2955 }
2956 #endif /* CONFIG_ERROR_RESILIENCE */
2957
2958 static int field_end(H264Context *h, int in_setup)
2959 {
2960     AVCodecContext *const avctx = h->avctx;
2961     int err = 0;
2962     h->mb_y = 0;
2963
2964     if (!in_setup && !h->droppable)
2965         ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
2966                                   h->picture_structure == PICT_BOTTOM_FIELD);
2967
2968     if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
2969         if (!h->droppable) {
2970             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
2971             h->prev_poc_msb = h->poc_msb;
2972             h->prev_poc_lsb = h->poc_lsb;
2973         }
2974         h->prev_frame_num_offset = h->frame_num_offset;
2975         h->prev_frame_num        = h->frame_num;
2976         h->outputed_poc          = h->next_outputed_poc;
2977     }
2978
2979     if (avctx->hwaccel) {
2980         if (avctx->hwaccel->end_frame(avctx) < 0)
2981             av_log(avctx, AV_LOG_ERROR,
2982                    "hardware accelerator failed to decode picture\n");
2983     }
2984
2985     /*
2986      * FIXME: Error handling code does not seem to support interlaced
2987      * when slices span multiple rows
2988      * The ff_er_add_slice calls don't work right for bottom
2989      * fields; they cause massive erroneous error concealing
2990      * Error marking covers both fields (top and bottom).
2991      * This causes a mismatched s->error_count
2992      * and a bad error table. Further, the error count goes to
2993      * INT_MAX when called for bottom field, because mb_y is
2994      * past end by one (callers fault) and resync_mb_y != 0
2995      * causes problems for the first MB line, too.
2996      */
2997     if (CONFIG_ERROR_RESILIENCE && !FIELD_PICTURE(h)) {
2998         h264_set_erpic(&h->er.cur_pic, h->cur_pic_ptr);
2999         h264_set_erpic(&h->er.last_pic,
3000                        h->ref_count[0] ? &h->ref_list[0][0] : NULL);
3001         h264_set_erpic(&h->er.next_pic,
3002                        h->ref_count[1] ? &h->ref_list[1][0] : NULL);
3003         ff_er_frame_end(&h->er);
3004     }
3005     emms_c();
3006
3007     h->current_slice = 0;
3008
3009     return err;
3010 }
3011
3012 /**
3013  * Replicate H264 "master" context to thread contexts.
3014  */
3015 static int clone_slice(H264Context *dst, H264Context *src)
3016 {
3017     memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3018     dst->cur_pic_ptr = src->cur_pic_ptr;
3019     dst->cur_pic     = src->cur_pic;
3020     dst->linesize    = src->linesize;
3021     dst->uvlinesize  = src->uvlinesize;
3022     dst->first_field = src->first_field;
3023
3024     dst->prev_poc_msb          = src->prev_poc_msb;
3025     dst->prev_poc_lsb          = src->prev_poc_lsb;
3026     dst->prev_frame_num_offset = src->prev_frame_num_offset;
3027     dst->prev_frame_num        = src->prev_frame_num;
3028     dst->short_ref_count       = src->short_ref_count;
3029
3030     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
3031     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
3032     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3033
3034     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
3035     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
3036
3037     return 0;
3038 }
3039
3040 /**
3041  * Compute profile from profile_idc and constraint_set?_flags.
3042  *
3043  * @param sps SPS
3044  *
3045  * @return profile as defined by FF_PROFILE_H264_*
3046  */
3047 int ff_h264_get_profile(SPS *sps)
3048 {
3049     int profile = sps->profile_idc;
3050
3051     switch (sps->profile_idc) {
3052     case FF_PROFILE_H264_BASELINE:
3053         // constraint_set1_flag set to 1
3054         profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0;
3055         break;
3056     case FF_PROFILE_H264_HIGH_10:
3057     case FF_PROFILE_H264_HIGH_422:
3058     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
3059         // constraint_set3_flag set to 1
3060         profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0;
3061         break;
3062     }
3063
3064     return profile;
3065 }
3066
3067 static int h264_set_parameter_from_sps(H264Context *h)
3068 {
3069     if (h->flags & CODEC_FLAG_LOW_DELAY ||
3070         (h->sps.bitstream_restriction_flag &&
3071          !h->sps.num_reorder_frames)) {
3072         if (h->avctx->has_b_frames > 1 || h->delayed_pic[0])
3073             av_log(h->avctx, AV_LOG_WARNING, "Delayed frames seen. "
3074                    "Reenabling low delay requires a codec flush.\n");
3075         else
3076             h->low_delay = 1;
3077     }
3078
3079     if (h->avctx->has_b_frames < 2)
3080         h->avctx->has_b_frames = !h->low_delay;
3081
3082     if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
3083         h->cur_chroma_format_idc      != h->sps.chroma_format_idc) {
3084         if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
3085             h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
3086             h->cur_chroma_format_idc      = h->sps.chroma_format_idc;
3087             h->pixel_shift                = h->sps.bit_depth_luma > 8;
3088
3089             ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
3090                             h->sps.chroma_format_idc);
3091             ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
3092             ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
3093             ff_h264_pred_init(&h->hpc, h->avctx->codec_id, h->sps.bit_depth_luma,
3094                               h->sps.chroma_format_idc);
3095             if (CONFIG_ERROR_RESILIENCE)
3096                 ff_dsputil_init(&h->dsp, h->avctx);
3097             ff_videodsp_init(&h->vdsp, h->sps.bit_depth_luma);
3098         } else {
3099             av_log(h->avctx, AV_LOG_ERROR, "Unsupported bit depth %d\n",
3100                    h->sps.bit_depth_luma);
3101             return AVERROR_INVALIDDATA;
3102         }
3103     }
3104     return 0;
3105 }
3106
3107 static enum AVPixelFormat get_pixel_format(H264Context *h)
3108 {
3109     switch (h->sps.bit_depth_luma) {
3110     case 9:
3111         if (CHROMA444(h)) {
3112             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
3113                 return AV_PIX_FMT_GBRP9;
3114             } else
3115                 return AV_PIX_FMT_YUV444P9;
3116         } else if (CHROMA422(h))
3117             return AV_PIX_FMT_YUV422P9;
3118         else
3119             return AV_PIX_FMT_YUV420P9;
3120         break;
3121     case 10:
3122         if (CHROMA444(h)) {
3123             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
3124                 return AV_PIX_FMT_GBRP10;
3125             } else
3126                 return AV_PIX_FMT_YUV444P10;
3127         } else if (CHROMA422(h))
3128             return AV_PIX_FMT_YUV422P10;
3129         else
3130             return AV_PIX_FMT_YUV420P10;
3131         break;
3132     case 8:
3133         if (CHROMA444(h)) {
3134             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
3135                 return AV_PIX_FMT_GBRP;
3136             } else
3137                 return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
3138                                                                  : AV_PIX_FMT_YUV444P;
3139         } else if (CHROMA422(h)) {
3140             return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P
3141                                                              : AV_PIX_FMT_YUV422P;
3142         } else {
3143             return h->avctx->get_format(h->avctx, h->avctx->codec->pix_fmts ?
3144                                         h->avctx->codec->pix_fmts :
3145                                         h->avctx->color_range == AVCOL_RANGE_JPEG ?
3146                                         h264_hwaccel_pixfmt_list_jpeg_420 :
3147                                         h264_hwaccel_pixfmt_list_420);
3148         }
3149         break;
3150     default:
3151         av_log(h->avctx, AV_LOG_ERROR,
3152                "Unsupported bit depth %d\n", h->sps.bit_depth_luma);
3153         return AVERROR_INVALIDDATA;
3154     }
3155 }
3156
3157 /* export coded and cropped frame dimensions to AVCodecContext */
3158 static int init_dimensions(H264Context *h)
3159 {
3160     int width  = h->width  - (h->sps.crop_right + h->sps.crop_left);
3161     int height = h->height - (h->sps.crop_top   + h->sps.crop_bottom);
3162
3163     /* handle container cropping */
3164     if (!h->sps.crop &&
3165         FFALIGN(h->avctx->width,  16) == h->width &&
3166         FFALIGN(h->avctx->height, 16) == h->height) {
3167         width  = h->avctx->width;
3168         height = h->avctx->height;
3169     }
3170
3171     if (width <= 0 || height <= 0) {
3172         av_log(h->avctx, AV_LOG_ERROR, "Invalid cropped dimensions: %dx%d.\n",
3173                width, height);
3174         if (h->avctx->err_recognition & AV_EF_EXPLODE)
3175             return AVERROR_INVALIDDATA;
3176
3177         av_log(h->avctx, AV_LOG_WARNING, "Ignoring cropping information.\n");
3178         h->sps.crop_bottom = h->sps.crop_top = h->sps.crop_right = h->sps.crop_left = 0;
3179         h->sps.crop        = 0;
3180
3181         width  = h->width;
3182         height = h->height;
3183     }
3184
3185     h->avctx->coded_width  = h->width;
3186     h->avctx->coded_height = h->height;
3187     h->avctx->width        = width;
3188     h->avctx->height       = height;
3189
3190     return 0;
3191 }
3192
3193 static int h264_slice_header_init(H264Context *h, int reinit)
3194 {
3195     int nb_slices = (HAVE_THREADS &&
3196                      h->avctx->active_thread_type & FF_THREAD_SLICE) ?
3197                     h->avctx->thread_count : 1;
3198     int i, ret;
3199
3200     h->avctx->sample_aspect_ratio = h->sps.sar;
3201     av_assert0(h->avctx->sample_aspect_ratio.den);
3202     av_pix_fmt_get_chroma_sub_sample(h->avctx->pix_fmt,
3203                                      &h->chroma_x_shift, &h->chroma_y_shift);
3204
3205     if (h->sps.timing_info_present_flag) {
3206         int64_t den = h->sps.time_scale;
3207         if (h->x264_build < 44U)
3208             den *= 2;
3209         av_reduce(&h->avctx->time_base.num, &h->avctx->time_base.den,
3210                   h->sps.num_units_in_tick, den, 1 << 30);
3211     }
3212
3213     h->avctx->hwaccel = ff_find_hwaccel(h->avctx);
3214
3215     if (reinit)
3216         free_tables(h, 0);
3217     h->first_field           = 0;
3218     h->prev_interlaced_frame = 1;
3219
3220     init_scan_tables(h);
3221     ret = ff_h264_alloc_tables(h);
3222     if (ret < 0) {
3223         av_log(h->avctx, AV_LOG_ERROR, "Could not allocate memory\n");
3224         return ret;
3225     }
3226
3227     if (nb_slices > MAX_THREADS || (nb_slices > h->mb_height && h->mb_height)) {
3228         int max_slices;
3229         if (h->mb_height)
3230             max_slices = FFMIN(MAX_THREADS, h->mb_height);
3231         else
3232             max_slices = MAX_THREADS;
3233         av_log(h->avctx, AV_LOG_WARNING, "too many threads/slices %d,"
3234                " reducing to %d\n", nb_slices, max_slices);
3235         nb_slices = max_slices;
3236     }
3237     h->slice_context_count = nb_slices;
3238
3239     if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_SLICE)) {
3240         ret = context_init(h);
3241         if (ret < 0) {
3242             av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n");
3243             return ret;
3244         }
3245     } else {
3246         for (i = 1; i < h->slice_context_count; i++) {
3247             H264Context *c;
3248             c                    = h->thread_context[i] = av_mallocz(sizeof(H264Context));
3249             if (!c)
3250                 return AVERROR(ENOMEM);
3251             c->avctx             = h->avctx;
3252             c->dsp               = h->dsp;
3253             c->vdsp              = h->vdsp;
3254             c->h264dsp           = h->h264dsp;
3255             c->h264qpel          = h->h264qpel;
3256             c->h264chroma        = h->h264chroma;
3257             c->sps               = h->sps;
3258             c->pps               = h->pps;
3259             c->pixel_shift       = h->pixel_shift;
3260             c->width             = h->width;
3261             c->height            = h->height;
3262             c->linesize          = h->linesize;
3263             c->uvlinesize        = h->uvlinesize;
3264             c->chroma_x_shift    = h->chroma_x_shift;
3265             c->chroma_y_shift    = h->chroma_y_shift;
3266             c->qscale            = h->qscale;
3267             c->droppable         = h->droppable;
3268             c->data_partitioning = h->data_partitioning;
3269             c->low_delay         = h->low_delay;
3270             c->mb_width          = h->mb_width;
3271             c->mb_height         = h->mb_height;
3272             c->mb_stride         = h->mb_stride;
3273             c->mb_num            = h->mb_num;
3274             c->flags             = h->flags;
3275             c->workaround_bugs   = h->workaround_bugs;
3276             c->pict_type         = h->pict_type;
3277
3278             init_scan_tables(c);
3279             clone_tables(c, h, i);
3280             c->context_initialized = 1;
3281         }
3282
3283         for (i = 0; i < h->slice_context_count; i++)
3284             if ((ret = context_init(h->thread_context[i])) < 0) {
3285                 av_log(h->avctx, AV_LOG_ERROR, "context_init() failed.\n");
3286                 return ret;
3287             }
3288     }
3289
3290     h->context_initialized = 1;
3291
3292     return 0;
3293 }
3294
3295 int ff_set_ref_count(H264Context *h)
3296 {
3297     int ref_count[2], list_count;
3298     int num_ref_idx_active_override_flag, max_refs;
3299
3300     // set defaults, might be overridden a few lines later
3301     ref_count[0] = h->pps.ref_count[0];
3302     ref_count[1] = h->pps.ref_count[1];
3303
3304     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3305         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3306             h->direct_spatial_mv_pred = get_bits1(&h->gb);
3307         num_ref_idx_active_override_flag = get_bits1(&h->gb);
3308
3309         if (num_ref_idx_active_override_flag) {
3310             ref_count[0] = get_ue_golomb(&h->gb) + 1;
3311             if (ref_count[0] < 1)
3312                 return AVERROR_INVALIDDATA;
3313             if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
3314                 ref_count[1] = get_ue_golomb(&h->gb) + 1;
3315                 if (ref_count[1] < 1)
3316                     return AVERROR_INVALIDDATA;
3317             }
3318         }
3319
3320         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3321             list_count = 2;
3322         else
3323             list_count = 1;
3324     } else {
3325         list_count   = 0;
3326         ref_count[0] = ref_count[1] = 0;
3327     }
3328
3329     max_refs = h->picture_structure == PICT_FRAME ? 16 : 32;
3330
3331     if (ref_count[0] > max_refs || ref_count[1] > max_refs) {
3332         av_log(h->avctx, AV_LOG_ERROR, "reference overflow\n");
3333         h->ref_count[0] = h->ref_count[1] = 0;
3334         return AVERROR_INVALIDDATA;
3335     }
3336
3337     if (list_count != h->list_count ||
3338         ref_count[0] != h->ref_count[0] ||
3339         ref_count[1] != h->ref_count[1]) {
3340         h->ref_count[0] = ref_count[0];
3341         h->ref_count[1] = ref_count[1];
3342         h->list_count   = list_count;
3343         return 1;
3344     }
3345
3346     return 0;
3347 }
3348
3349 /**
3350  * Decode a slice header.
3351  * This will (re)intialize the decoder and call h264_frame_start() as needed.
3352  *
3353  * @param h h264context
3354  * @param h0 h264 master context (differs from 'h' when doing sliced based
3355  *           parallel decoding)
3356  *
3357  * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3358  */
3359 static int decode_slice_header(H264Context *h, H264Context *h0)
3360 {
3361     unsigned int first_mb_in_slice;
3362     unsigned int pps_id;
3363     int ret;
3364     unsigned int slice_type, tmp, i, j;
3365     int default_ref_list_done = 0;
3366     int last_pic_structure, last_pic_droppable;
3367     int needs_reinit = 0;
3368     int field_pic_flag, bottom_field_flag;
3369
3370     h->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
3371     h->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
3372
3373     first_mb_in_slice = get_ue_golomb(&h->gb);
3374
3375     if (first_mb_in_slice == 0) { // FIXME better field boundary detection
3376         if (h0->current_slice && h->cur_pic_ptr && FIELD_PICTURE(h)) {
3377             field_end(h, 1);
3378         }
3379
3380         h0->current_slice = 0;
3381         if (!h0->first_field) {
3382             if (h->cur_pic_ptr && !h->droppable) {
3383                 ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
3384                                           h->picture_structure == PICT_BOTTOM_FIELD);
3385             }
3386             h->cur_pic_ptr = NULL;
3387         }
3388     }
3389
3390     slice_type = get_ue_golomb_31(&h->gb);
3391     if (slice_type > 9) {
3392         av_log(h->avctx, AV_LOG_ERROR,
3393                "slice type %d too large at %d %d\n",
3394                slice_type, h->mb_x, h->mb_y);
3395         return AVERROR_INVALIDDATA;
3396     }
3397     if (slice_type > 4) {
3398         slice_type -= 5;
3399         h->slice_type_fixed = 1;
3400     } else
3401         h->slice_type_fixed = 0;
3402
3403     slice_type = golomb_to_pict_type[slice_type];
3404     if (slice_type == AV_PICTURE_TYPE_I ||
3405         (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
3406         default_ref_list_done = 1;
3407     }
3408     h->slice_type     = slice_type;
3409     h->slice_type_nos = slice_type & 3;
3410
3411     if (h->nal_unit_type  == NAL_IDR_SLICE &&
3412         h->slice_type_nos != AV_PICTURE_TYPE_I) {
3413         av_log(h->avctx, AV_LOG_ERROR, "A non-intra slice in an IDR NAL unit.\n");
3414         return AVERROR_INVALIDDATA;
3415     }
3416
3417     // to make a few old functions happy, it's wrong though
3418     h->pict_type = h->slice_type;
3419
3420     pps_id = get_ue_golomb(&h->gb);
3421     if (pps_id >= MAX_PPS_COUNT) {
3422         av_log(h->avctx, AV_LOG_ERROR, "pps_id %u out of range\n", pps_id);
3423         return AVERROR_INVALIDDATA;
3424     }
3425     if (!h0->pps_buffers[pps_id]) {
3426         av_log(h->avctx, AV_LOG_ERROR,
3427                "non-existing PPS %u referenced\n",
3428                pps_id);
3429         return AVERROR_INVALIDDATA;
3430     }
3431     h->pps = *h0->pps_buffers[pps_id];
3432
3433     if (!h0->sps_buffers[h->pps.sps_id]) {
3434         av_log(h->avctx, AV_LOG_ERROR,
3435                "non-existing SPS %u referenced\n",
3436                h->pps.sps_id);
3437         return AVERROR_INVALIDDATA;
3438     }
3439
3440     if (h->pps.sps_id != h->sps.sps_id ||
3441         h0->sps_buffers[h->pps.sps_id]->new) {
3442         h0->sps_buffers[h->pps.sps_id]->new = 0;
3443
3444         h->sps = *h0->sps_buffers[h->pps.sps_id];
3445
3446         if (h->bit_depth_luma    != h->sps.bit_depth_luma ||
3447             h->chroma_format_idc != h->sps.chroma_format_idc) {
3448             h->bit_depth_luma    = h->sps.bit_depth_luma;
3449             h->chroma_format_idc = h->sps.chroma_format_idc;
3450             needs_reinit         = 1;
3451         }
3452         if ((ret = h264_set_parameter_from_sps(h)) < 0)
3453             return ret;
3454     }
3455
3456     h->avctx->profile = ff_h264_get_profile(&h->sps);
3457     h->avctx->level   = h->sps.level_idc;
3458     h->avctx->refs    = h->sps.ref_frame_count;
3459
3460     if (h->mb_width  != h->sps.mb_width ||
3461         h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag))
3462         needs_reinit = 1;
3463
3464     h->mb_width  = h->sps.mb_width;
3465     h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3466     h->mb_num    = h->mb_width * h->mb_height;
3467     h->mb_stride = h->mb_width + 1;
3468
3469     h->b_stride = h->mb_width * 4;
3470
3471     h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
3472
3473     h->width  = 16 * h->mb_width;
3474     h->height = 16 * h->mb_height;
3475
3476     ret = init_dimensions(h);
3477     if (ret < 0)
3478         return ret;
3479
3480     if (h->sps.video_signal_type_present_flag) {
3481         h->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
3482                                                   : AVCOL_RANGE_MPEG;
3483         if (h->sps.colour_description_present_flag) {
3484             if (h->avctx->colorspace != h->sps.colorspace)
3485                 needs_reinit = 1;
3486             h->avctx->color_primaries = h->sps.color_primaries;
3487             h->avctx->color_trc       = h->sps.color_trc;
3488             h->avctx->colorspace      = h->sps.colorspace;
3489         }
3490     }
3491
3492     if (h->context_initialized &&
3493         (h->width  != h->avctx->coded_width   ||
3494          h->height != h->avctx->coded_height  ||
3495          needs_reinit)) {
3496         if (h != h0) {
3497             av_log(h->avctx, AV_LOG_ERROR,
3498                    "changing width %d -> %d / height %d -> %d on "
3499                    "slice %d\n",
3500                    h->width, h->avctx->coded_width,
3501                    h->height, h->avctx->coded_height,
3502                    h0->current_slice + 1);
3503             return AVERROR_INVALIDDATA;
3504         }
3505
3506         flush_change(h);
3507
3508         if ((ret = get_pixel_format(h)) < 0)
3509             return ret;
3510         h->avctx->pix_fmt = ret;
3511
3512         av_log(h->avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
3513                "pix_fmt: %d\n", h->width, h->height, h->avctx->pix_fmt);
3514
3515         if ((ret = h264_slice_header_init(h, 1)) < 0) {
3516             av_log(h->avctx, AV_LOG_ERROR,
3517                    "h264_slice_header_init() failed\n");
3518             return ret;
3519         }
3520     }
3521     if (!h->context_initialized) {
3522         if (h != h0) {
3523             av_log(h->avctx, AV_LOG_ERROR,
3524                    "Cannot (re-)initialize context during parallel decoding.\n");
3525             return AVERROR_PATCHWELCOME;
3526         }
3527
3528         if ((ret = get_pixel_format(h)) < 0)
3529             return ret;
3530         h->avctx->pix_fmt = ret;
3531
3532         if ((ret = h264_slice_header_init(h, 0)) < 0) {
3533             av_log(h->avctx, AV_LOG_ERROR,
3534                    "h264_slice_header_init() failed\n");
3535             return ret;
3536         }
3537     }
3538
3539     if (h == h0 && h->dequant_coeff_pps != pps_id) {
3540         h->dequant_coeff_pps = pps_id;
3541         init_dequant_tables(h);
3542     }
3543
3544     h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
3545
3546     h->mb_mbaff        = 0;
3547     h->mb_aff_frame    = 0;
3548     last_pic_structure = h0->picture_structure;
3549     last_pic_droppable = h0->droppable;
3550     h->droppable       = h->nal_ref_idc == 0;
3551     if (h->sps.frame_mbs_only_flag) {
3552         h->picture_structure = PICT_FRAME;
3553     } else {
3554         field_pic_flag = get_bits1(&h->gb);
3555         if (field_pic_flag) {
3556             bottom_field_flag = get_bits1(&h->gb);
3557             h->picture_structure = PICT_TOP_FIELD + bottom_field_flag;
3558         } else {
3559             h->picture_structure = PICT_FRAME;
3560             h->mb_aff_frame      = h->sps.mb_aff;
3561         }
3562     }
3563     h->mb_field_decoding_flag = h->picture_structure != PICT_FRAME;
3564
3565     if (h0->current_slice != 0) {
3566         if (last_pic_structure != h->picture_structure ||
3567             last_pic_droppable != h->droppable) {
3568             av_log(h->avctx, AV_LOG_ERROR,
3569                    "Changing field mode (%d -> %d) between slices is not allowed\n",
3570                    last_pic_structure, h->picture_structure);
3571             h->picture_structure = last_pic_structure;
3572             h->droppable         = last_pic_droppable;
3573             return AVERROR_INVALIDDATA;
3574         } else if (!h0->cur_pic_ptr) {
3575             av_log(h->avctx, AV_LOG_ERROR,
3576                    "unset cur_pic_ptr on slice %d\n",
3577                    h0->current_slice + 1);
3578             return AVERROR_INVALIDDATA;
3579         }
3580     } else {
3581         /* Shorten frame num gaps so we don't have to allocate reference
3582          * frames just to throw them away */
3583         if (h->frame_num != h->prev_frame_num) {
3584             int unwrap_prev_frame_num = h->prev_frame_num;
3585             int max_frame_num         = 1 << h->sps.log2_max_frame_num;
3586
3587             if (unwrap_prev_frame_num > h->frame_num)
3588                 unwrap_prev_frame_num -= max_frame_num;
3589
3590             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
3591                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
3592                 if (unwrap_prev_frame_num < 0)
3593                     unwrap_prev_frame_num += max_frame_num;
3594
3595                 h->prev_frame_num = unwrap_prev_frame_num;
3596             }
3597         }
3598
3599         /* See if we have a decoded first field looking for a pair...
3600          * Here, we're using that to see if we should mark previously
3601          * decode frames as "finished".
3602          * We have to do that before the "dummy" in-between frame allocation,
3603          * since that can modify s->current_picture_ptr. */
3604         if (h0->first_field) {
3605             assert(h0->cur_pic_ptr);
3606             assert(h0->cur_pic_ptr->f.buf[0]);
3607             assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF);
3608
3609             /* figure out if we have a complementary field pair */
3610             if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
3611                 /* Previous field is unmatched. Don't display it, but let it
3612                  * remain for reference if marked as such. */
3613                 if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
3614                     ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
3615                                               last_pic_structure == PICT_TOP_FIELD);
3616                 }
3617             } else {
3618                 if (h0->cur_pic_ptr->frame_num != h->frame_num) {
3619                     /* This and previous field were reference, but had
3620                      * different frame_nums. Consider this field first in
3621                      * pair. Throw away previous field except for reference
3622                      * purposes. */
3623                     if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
3624                         ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
3625                                                   last_pic_structure == PICT_TOP_FIELD);
3626                     }
3627                 } else {
3628                     /* Second field in complementary pair */
3629                     if (!((last_pic_structure   == PICT_TOP_FIELD &&
3630                            h->picture_structure == PICT_BOTTOM_FIELD) ||
3631                           (last_pic_structure   == PICT_BOTTOM_FIELD &&
3632                            h->picture_structure == PICT_TOP_FIELD))) {
3633                         av_log(h->avctx, AV_LOG_ERROR,
3634                                "Invalid field mode combination %d/%d\n",
3635                                last_pic_structure, h->picture_structure);
3636                         h->picture_structure = last_pic_structure;
3637                         h->droppable         = last_pic_droppable;
3638                         return AVERROR_INVALIDDATA;
3639                     } else if (last_pic_droppable != h->droppable) {
3640                         avpriv_request_sample(h->avctx,
3641                                               "Found reference and non-reference fields in the same frame, which");
3642                         h->picture_structure = last_pic_structure;
3643                         h->droppable         = last_pic_droppable;
3644                         return AVERROR_PATCHWELCOME;
3645                     }
3646                 }
3647             }
3648         }
3649
3650         while (h->frame_num != h->prev_frame_num &&
3651                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
3652             H264Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
3653             av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
3654                    h->frame_num, h->prev_frame_num);
3655             ret = h264_frame_start(h);
3656             if (ret < 0) {
3657                 h0->first_field = 0;
3658                 return ret;
3659             }
3660
3661             h->prev_frame_num++;
3662             h->prev_frame_num        %= 1 << h->sps.log2_max_frame_num;
3663             h->cur_pic_ptr->frame_num = h->prev_frame_num;
3664             ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0);
3665             ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1);
3666             ret = ff_generate_sliding_window_mmcos(h, 1);
3667             if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
3668                 return ret;
3669             ret = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3670             if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
3671                 return ret;
3672             /* Error concealment: If a ref is missing, copy the previous ref
3673              * in its place.
3674              * FIXME: Avoiding a memcpy would be nice, but ref handling makes
3675              * many assumptions about there being no actual duplicates.
3676              * FIXME: This does not copy padding for out-of-frame motion
3677              * vectors.  Given we are concealing a lost frame, this probably
3678              * is not noticeable by comparison, but it should be fixed. */
3679             if (h->short_ref_count) {
3680                 if (prev) {
3681                     av_image_copy(h->short_ref[0]->f.data,
3682                                   h->short_ref[0]->f.linesize,
3683                                   (const uint8_t **)prev->f.data,
3684                                   prev->f.linesize,
3685                                   h->avctx->pix_fmt,
3686                                   h->mb_width  * 16,
3687                                   h->mb_height * 16);
3688                     h->short_ref[0]->poc = prev->poc + 2;
3689                 }
3690                 h->short_ref[0]->frame_num = h->prev_frame_num;
3691             }
3692         }
3693
3694         /* See if we have a decoded first field looking for a pair...
3695          * We're using that to see whether to continue decoding in that
3696          * frame, or to allocate a new one. */
3697         if (h0->first_field) {
3698             assert(h0->cur_pic_ptr);
3699             assert(h0->cur_pic_ptr->f.buf[0]);
3700             assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF);
3701
3702             /* figure out if we have a complementary field pair */
3703             if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
3704                 /* Previous field is unmatched. Don't display it, but let it
3705                  * remain for reference if marked as such. */
3706                 h0->cur_pic_ptr = NULL;
3707                 h0->first_field = FIELD_PICTURE(h);
3708             } else {
3709                 if (h0->cur_pic_ptr->frame_num != h->frame_num) {
3710                     /* This and the previous field had different frame_nums.
3711                      * Consider this field first in pair. Throw away previous
3712                      * one except for reference purposes. */
3713                     h0->first_field = 1;
3714                     h0->cur_pic_ptr = NULL;
3715                 } else {
3716                     /* Second field in complementary pair */
3717                     h0->first_field = 0;
3718                 }
3719             }
3720         } else {
3721             /* Frame or first field in a potentially complementary pair */
3722             h0->first_field = FIELD_PICTURE(h);
3723         }
3724
3725         if (!FIELD_PICTURE(h) || h0->first_field) {
3726             if (h264_frame_start(h) < 0) {
3727                 h0->first_field = 0;
3728                 return AVERROR_INVALIDDATA;
3729             }
3730         } else {
3731             release_unused_pictures(h, 0);
3732         }
3733     }
3734     if (h != h0 && (ret = clone_slice(h, h0)) < 0)
3735         return ret;
3736
3737     h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
3738
3739     assert(h->mb_num == h->mb_width * h->mb_height);
3740     if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
3741         first_mb_in_slice >= h->mb_num) {
3742         av_log(h->avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3743         return AVERROR_INVALIDDATA;
3744     }
3745     h->resync_mb_x = h->mb_x =  first_mb_in_slice % h->mb_width;
3746     h->resync_mb_y = h->mb_y = (first_mb_in_slice / h->mb_width) <<
3747                                FIELD_OR_MBAFF_PICTURE(h);
3748     if (h->picture_structure == PICT_BOTTOM_FIELD)
3749         h->resync_mb_y = h->mb_y = h->mb_y + 1;
3750     assert(h->mb_y < h->mb_height);
3751
3752     if (h->picture_structure == PICT_FRAME) {
3753         h->curr_pic_num = h->frame_num;
3754         h->max_pic_num  = 1 << h->sps.log2_max_frame_num;
3755     } else {
3756         h->curr_pic_num = 2 * h->frame_num + 1;
3757         h->max_pic_num  = 1 << (h->sps.log2_max_frame_num + 1);
3758     }
3759
3760     if (h->nal_unit_type == NAL_IDR_SLICE)
3761         get_ue_golomb(&h->gb); /* idr_pic_id */
3762
3763     if (h->sps.poc_type == 0) {
3764         h->poc_lsb = get_bits(&h->gb, h->sps.log2_max_poc_lsb);
3765
3766         if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME)
3767             h->delta_poc_bottom = get_se_golomb(&h->gb);
3768     }
3769
3770     if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
3771         h->delta_poc[0] = get_se_golomb(&h->gb);
3772
3773         if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME)
3774             h->delta_poc[1] = get_se_golomb(&h->gb);
3775     }
3776
3777     ff_init_poc(h, h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc);
3778
3779     if (h->pps.redundant_pic_cnt_present)
3780         h->redundant_pic_count = get_ue_golomb(&h->gb);
3781
3782     ret = ff_set_ref_count(h);
3783     if (ret < 0)
3784         return ret;
3785     else if (ret == 1)
3786         default_ref_list_done = 0;
3787
3788     if (!default_ref_list_done)
3789         ff_h264_fill_default_ref_list(h);
3790
3791     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3792        ret = ff_h264_decode_ref_pic_list_reordering(h);
3793        if (ret < 0) {
3794            h->ref_count[1] = h->ref_count[0] = 0;
3795            return ret;
3796        }
3797     }
3798
3799     if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
3800         (h->pps.weighted_bipred_idc == 1 &&
3801          h->slice_type_nos == AV_PICTURE_TYPE_B))
3802         ff_pred_weight_table(h);
3803     else if (h->pps.weighted_bipred_idc == 2 &&
3804              h->slice_type_nos == AV_PICTURE_TYPE_B) {
3805         implicit_weight_table(h, -1);
3806     } else {
3807         h->use_weight = 0;
3808         for (i = 0; i < 2; i++) {
3809             h->luma_weight_flag[i]   = 0;
3810             h->chroma_weight_flag[i] = 0;
3811         }
3812     }
3813
3814     // If frame-mt is enabled, only update mmco tables for the first slice
3815     // in a field. Subsequent slices can temporarily clobber h->mmco_index
3816     // or h->mmco, which will cause ref list mix-ups and decoding errors
3817     // further down the line. This may break decoding if the first slice is
3818     // corrupt, thus we only do this if frame-mt is enabled.
3819     if (h->nal_ref_idc) {
3820         ret = ff_h264_decode_ref_pic_marking(h0, &h->gb,
3821                                              !(h->avctx->active_thread_type & FF_THREAD_FRAME) ||
3822                                              h0->current_slice == 0);
3823         if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
3824             return AVERROR_INVALIDDATA;
3825     }
3826
3827     if (FRAME_MBAFF(h)) {
3828         ff_h264_fill_mbaff_ref_list(h);
3829
3830         if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) {
3831             implicit_weight_table(h, 0);
3832             implicit_weight_table(h, 1);
3833         }
3834     }
3835
3836     if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
3837         ff_h264_direct_dist_scale_factor(h);
3838     ff_h264_direct_ref_list_init(h);
3839
3840     if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) {
3841         tmp = get_ue_golomb_31(&h->gb);
3842         if (tmp > 2) {
3843             av_log(h->avctx, AV_LOG_ERROR, "cabac_init_idc %u overflow\n", tmp);
3844             return AVERROR_INVALIDDATA;
3845         }
3846         h->cabac_init_idc = tmp;
3847     }
3848
3849     h->last_qscale_diff = 0;
3850     tmp = h->pps.init_qp + get_se_golomb(&h->gb);
3851     if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) {
3852         av_log(h->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3853         return AVERROR_INVALIDDATA;
3854     }
3855     h->qscale       = tmp;
3856     h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale);
3857     h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale);
3858     // FIXME qscale / qp ... stuff
3859     if (h->slice_type == AV_PICTURE_TYPE_SP)
3860         get_bits1(&h->gb); /* sp_for_switch_flag */
3861     if (h->slice_type == AV_PICTURE_TYPE_SP ||
3862         h->slice_type == AV_PICTURE_TYPE_SI)
3863         get_se_golomb(&h->gb); /* slice_qs_delta */
3864
3865     h->deblocking_filter     = 1;
3866     h->slice_alpha_c0_offset = 0;
3867     h->slice_beta_offset     = 0;
3868     if (h->pps.deblocking_filter_parameters_present) {
3869         tmp = get_ue_golomb_31(&h->gb);
3870         if (tmp > 2) {
3871             av_log(h->avctx, AV_LOG_ERROR,
3872                    "deblocking_filter_idc %u out of range\n", tmp);
3873             return AVERROR_INVALIDDATA;
3874         }
3875         h->deblocking_filter = tmp;
3876         if (h->deblocking_filter < 2)
3877             h->deblocking_filter ^= 1;  // 1<->0
3878
3879         if (h->deblocking_filter) {
3880             h->slice_alpha_c0_offset = get_se_golomb(&h->gb) * 2;
3881             h->slice_beta_offset     = get_se_golomb(&h->gb) * 2;
3882             if (h->slice_alpha_c0_offset >  12 ||
3883                 h->slice_alpha_c0_offset < -12 ||
3884                 h->slice_beta_offset >  12     ||
3885                 h->slice_beta_offset < -12) {
3886                 av_log(h->avctx, AV_LOG_ERROR,
3887                        "deblocking filter parameters %d %d out of range\n",
3888                        h->slice_alpha_c0_offset, h->slice_beta_offset);
3889                 return AVERROR_INVALIDDATA;
3890             }
3891         }
3892     }
3893
3894     if (h->avctx->skip_loop_filter >= AVDISCARD_ALL ||
3895         (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&
3896          h->slice_type_nos != AV_PICTURE_TYPE_I) ||
3897         (h->avctx->skip_loop_filter >= AVDISCARD_BIDIR  &&
3898          h->slice_type_nos == AV_PICTURE_TYPE_B) ||
3899         (h->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
3900          h->nal_ref_idc == 0))
3901         h->deblocking_filter = 0;
3902
3903     if (h->deblocking_filter == 1 && h0->max_contexts > 1) {
3904         if (h->avctx->flags2 & CODEC_FLAG2_FAST) {
3905             /* Cheat slightly for speed:
3906              * Do not bother to deblock across slices. */
3907             h->deblocking_filter = 2;
3908         } else {
3909             h0->max_contexts = 1;
3910             if (!h0->single_decode_warning) {
3911                 av_log(h->avctx, AV_LOG_INFO,
3912                        "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3913                 h0->single_decode_warning = 1;
3914             }
3915             if (h != h0) {
3916                 av_log(h->avctx, AV_LOG_ERROR,
3917                        "Deblocking switched inside frame.\n");
3918                 return 1;
3919             }
3920         }
3921     }
3922     h->qp_thresh = 15 -
3923                    FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) -
3924                    FFMAX3(0,
3925                           h->pps.chroma_qp_index_offset[0],
3926                           h->pps.chroma_qp_index_offset[1]) +
3927                    6 * (h->sps.bit_depth_luma - 8);
3928
3929     h0->last_slice_type = slice_type;
3930     h->slice_num        = ++h0->current_slice;
3931     if (h->slice_num >= MAX_SLICES) {
3932         av_log(h->avctx, AV_LOG_ERROR,
3933                "Too many slices, increase MAX_SLICES and recompile\n");
3934     }
3935
3936     for (j = 0; j < 2; j++) {
3937         int id_list[16];
3938         int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j];
3939         for (i = 0; i < 16; i++) {
3940             id_list[i] = 60;
3941             if (j < h->list_count && i < h->ref_count[j] &&
3942                 h->ref_list[j][i].f.buf[0]) {
3943                 int k;
3944                 AVBuffer *buf = h->ref_list[j][i].f.buf[0]->buffer;
3945                 for (k = 0; k < h->short_ref_count; k++)
3946                     if (h->short_ref[k]->f.buf[0]->buffer == buf) {
3947                         id_list[i] = k;
3948                         break;
3949                     }
3950                 for (k = 0; k < h->long_ref_count; k++)
3951                     if (h->long_ref[k] && h->long_ref[k]->f.buf[0]->buffer == buf) {
3952                         id_list[i] = h->short_ref_count + k;
3953                         break;
3954                     }
3955             }
3956         }
3957
3958         ref2frm[0] =
3959         ref2frm[1] = -1;
3960         for (i = 0; i < 16; i++)
3961             ref2frm[i + 2] = 4 * id_list[i] + (h->ref_list[j][i].reference & 3);
3962         ref2frm[18 + 0] =
3963         ref2frm[18 + 1] = -1;
3964         for (i = 16; i < 48; i++)
3965             ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] +
3966                              (h->ref_list[j][i].reference & 3);
3967     }
3968
3969     if (h->avctx->debug & FF_DEBUG_PICT_INFO) {
3970         av_log(h->avctx, AV_LOG_DEBUG,
3971                "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3972                h->slice_num,
3973                (h->picture_structure == PICT_FRAME ? "F" : h->picture_structure == PICT_TOP_FIELD ? "T" : "B"),
3974                first_mb_in_slice,
3975                av_get_picture_type_char(h->slice_type),
3976                h->slice_type_fixed ? " fix" : "",
3977                h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3978                pps_id, h->frame_num,
3979                h->cur_pic_ptr->field_poc[0],
3980                h->cur_pic_ptr->field_poc[1],
3981                h->ref_count[0], h->ref_count[1],
3982                h->qscale,
3983                h->deblocking_filter,
3984                h->slice_alpha_c0_offset, h->slice_beta_offset,
3985                h->use_weight,
3986                h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
3987                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
3988     }
3989
3990     return 0;
3991 }
3992
3993 int ff_h264_get_slice_type(const H264Context *h)
3994 {
3995     switch (h->slice_type) {
3996     case AV_PICTURE_TYPE_P:
3997         return 0;
3998     case AV_PICTURE_TYPE_B:
3999         return 1;
4000     case AV_PICTURE_TYPE_I:
4001         return 2;
4002     case AV_PICTURE_TYPE_SP:
4003         return 3;
4004     case AV_PICTURE_TYPE_SI:
4005         return 4;
4006     default:
4007         return AVERROR_INVALIDDATA;
4008     }
4009 }
4010
4011 static av_always_inline void fill_filter_caches_inter(H264Context *h,
4012                                                       int mb_type, int top_xy,
4013                                                       int left_xy[LEFT_MBS],
4014                                                       int top_type,
4015                                                       int left_type[LEFT_MBS],
4016                                                       int mb_xy, int list)
4017 {
4018     int b_stride = h->b_stride;
4019     int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
4020     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
4021     if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) {
4022         if (USES_LIST(top_type, list)) {
4023             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
4024             const int b8_xy = 4 * top_xy + 2;
4025             int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
4026             AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
4027             ref_cache[0 - 1 * 8] =
4028             ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
4029             ref_cache[2 - 1 * 8] =
4030             ref_cache[3 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 1]];
4031         } else {
4032             AV_ZERO128(mv_dst - 1 * 8);
4033             AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4034         }
4035
4036         if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) {
4037             if (USES_LIST(left_type[LTOP], list)) {
4038                 const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
4039                 const int b8_xy = 4 * left_xy[LTOP] + 1;
4040                 int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
4041                 AV_COPY32(mv_dst - 1 +  0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]);
4042                 AV_COPY32(mv_dst - 1 +  8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]);
4043                 AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]);
4044                 AV_COPY32(mv_dst - 1 + 24, h->cur_pic.motion_val[list][b_xy + b_stride * 3]);
4045                 ref_cache[-1 +  0] =
4046                 ref_cache[-1 +  8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 0]];
4047                 ref_cache[-1 + 16] =
4048                 ref_cache[-1 + 24] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 1]];
4049             } else {
4050                 AV_ZERO32(mv_dst - 1 +  0);
4051                 AV_ZERO32(mv_dst - 1 +  8);
4052                 AV_ZERO32(mv_dst - 1 + 16);
4053                 AV_ZERO32(mv_dst - 1 + 24);
4054                 ref_cache[-1 +  0] =
4055                 ref_cache[-1 +  8] =
4056                 ref_cache[-1 + 16] =
4057                 ref_cache[-1 + 24] = LIST_NOT_USED;
4058             }
4059         }
4060     }
4061
4062     if (!USES_LIST(mb_type, list)) {
4063         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4);
4064         AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4065         AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4066         AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4067         AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
4068         return;
4069     }
4070
4071     {
4072         int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
4073         int (*ref2frm)[64] = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
4074         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
4075         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
4076         AV_WN32A(&ref_cache[0 * 8], ref01);
4077         AV_WN32A(&ref_cache[1 * 8], ref01);
4078         AV_WN32A(&ref_cache[2 * 8], ref23);
4079         AV_WN32A(&ref_cache[3 * 8], ref23);
4080     }
4081
4082     {
4083         int16_t(*mv_src)[2] = &h->cur_pic.motion_val[list][4 * h->mb_x + 4 * h->mb_y * b_stride];
4084         AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride);
4085         AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride);
4086         AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride);
4087         AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride);
4088     }
4089 }
4090
4091 /**
4092  *
4093  * @return non zero if the loop filter can be skipped
4094  */
4095 static int fill_filter_caches(H264Context *h, int mb_type)
4096 {
4097     const int mb_xy = h->mb_xy;
4098     int top_xy, left_xy[LEFT_MBS];
4099     int top_type, left_type[LEFT_MBS];
4100     uint8_t *nnz;
4101     uint8_t *nnz_cache;
4102
4103     top_xy = mb_xy - (h->mb_stride << MB_FIELD(h));
4104
4105     /* Wow, what a mess, why didn't they simplify the interlacing & intra
4106      * stuff, I can't imagine that these complex rules are worth it. */
4107
4108     left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
4109     if (FRAME_MBAFF(h)) {
4110         const int left_mb_field_flag = IS_INTERLACED(h->cur_pic.mb_type[mb_xy - 1]);
4111         const int curr_mb_field_flag = IS_INTERLACED(mb_type);
4112         if (h->mb_y & 1) {
4113             if (left_mb_field_flag != curr_mb_field_flag)
4114                 left_xy[LTOP] -= h->mb_stride;
4115         } else {
4116             if (curr_mb_field_flag)
4117                 top_xy += h->mb_stride &
4118                           (((h->cur_pic.mb_type[top_xy] >> 7) & 1) - 1);
4119             if (left_mb_field_flag != curr_mb_field_flag)
4120                 left_xy[LBOT] += h->mb_stride;
4121         }
4122     }
4123
4124     h->top_mb_xy        = top_xy;
4125     h->left_mb_xy[LTOP] = left_xy[LTOP];
4126     h->left_mb_xy[LBOT] = left_xy[LBOT];
4127     {
4128         /* For sufficiently low qp, filtering wouldn't do anything.
4129          * This is a conservative estimate: could also check beta_offset
4130          * and more accurate chroma_qp. */
4131         int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice
4132         int qp        = h->cur_pic.qscale_table[mb_xy];
4133         if (qp <= qp_thresh &&
4134             (left_xy[LTOP] < 0 ||
4135              ((qp + h->cur_pic.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) &&
4136             (top_xy < 0 ||
4137              ((qp + h->cur_pic.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) {
4138             if (!FRAME_MBAFF(h))
4139                 return 1;
4140             if ((left_xy[LTOP] < 0 ||
4141                  ((qp + h->cur_pic.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) &&
4142                 (top_xy < h->mb_stride ||
4143                  ((qp + h->cur_pic.qscale_table[top_xy - h->mb_stride] + 1) >> 1) <= qp_thresh))
4144                 return 1;
4145         }
4146     }
4147
4148     top_type        = h->cur_pic.mb_type[top_xy];
4149     left_type[LTOP] = h->cur_pic.mb_type[left_xy[LTOP]];
4150     left_type[LBOT] = h->cur_pic.mb_type[left_xy[LBOT]];
4151     if (h->deblocking_filter == 2) {
4152         if (h->slice_table[top_xy] != h->slice_num)
4153             top_type = 0;
4154         if (h->slice_table[left_xy[LBOT]] != h->slice_num)
4155             left_type[LTOP] = left_type[LBOT] = 0;
4156     } else {
4157         if (h->slice_table[top_xy] == 0xFFFF)
4158             top_type = 0;
4159         if (h->slice_table[left_xy[LBOT]] == 0xFFFF)
4160             left_type[LTOP] = left_type[LBOT] = 0;
4161     }
4162     h->top_type        = top_type;
4163     h->left_type[LTOP] = left_type[LTOP];
4164     h->left_type[LBOT] = left_type[LBOT];
4165
4166     if (IS_INTRA(mb_type))
4167         return 0;
4168
4169     fill_filter_caches_inter(h, mb_type, top_xy, left_xy,
4170                              top_type, left_type, mb_xy, 0);
4171     if (h->list_count == 2)
4172         fill_filter_caches_inter(h, mb_type, top_xy, left_xy,
4173                                  top_type, left_type, mb_xy, 1);
4174
4175     nnz       = h->non_zero_count[mb_xy];
4176     nnz_cache = h->non_zero_count_cache;
4177     AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]);
4178     AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]);
4179     AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]);
4180     AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]);
4181     h->cbp = h->cbp_table[mb_xy];
4182
4183     if (top_type) {
4184         nnz = h->non_zero_count[top_xy];
4185         AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]);
4186     }
4187
4188     if (left_type[LTOP]) {
4189         nnz = h->non_zero_count[left_xy[LTOP]];
4190         nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4];
4191         nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4];
4192         nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4];
4193         nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4];
4194     }
4195
4196     /* CAVLC 8x8dct requires NNZ values for residual decoding that differ
4197      * from what the loop filter needs */
4198     if (!CABAC(h) && h->pps.transform_8x8_mode) {
4199         if (IS_8x8DCT(top_type)) {
4200             nnz_cache[4 + 8 * 0] =
4201             nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12;
4202             nnz_cache[6 + 8 * 0] =
4203             nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12;
4204         }
4205         if (IS_8x8DCT(left_type[LTOP])) {
4206             nnz_cache[3 + 8 * 1] =
4207             nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF
4208         }
4209         if (IS_8x8DCT(left_type[LBOT])) {
4210             nnz_cache[3 + 8 * 3] =
4211             nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF
4212         }
4213
4214         if (IS_8x8DCT(mb_type)) {
4215             nnz_cache[scan8[0]] =
4216             nnz_cache[scan8[1]] =
4217             nnz_cache[scan8[2]] =
4218             nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12;
4219
4220             nnz_cache[scan8[0 + 4]] =
4221             nnz_cache[scan8[1 + 4]] =
4222             nnz_cache[scan8[2 + 4]] =
4223             nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12;
4224
4225             nnz_cache[scan8[0 + 8]] =
4226             nnz_cache[scan8[1 + 8]] =
4227             nnz_cache[scan8[2 + 8]] =
4228             nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12;
4229
4230             nnz_cache[scan8[0 + 12]] =
4231             nnz_cache[scan8[1 + 12]] =
4232             nnz_cache[scan8[2 + 12]] =
4233             nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12;
4234         }
4235     }
4236
4237     return 0;
4238 }
4239
4240 static void loop_filter(H264Context *h, int start_x, int end_x)
4241 {
4242     uint8_t *dest_y, *dest_cb, *dest_cr;
4243     int linesize, uvlinesize, mb_x, mb_y;
4244     const int end_mb_y       = h->mb_y + FRAME_MBAFF(h);
4245     const int old_slice_type = h->slice_type;
4246     const int pixel_shift    = h->pixel_shift;
4247     const int block_h        = 16 >> h->chroma_y_shift;
4248
4249     if (h->deblocking_filter) {
4250         for (mb_x = start_x; mb_x < end_x; mb_x++)
4251             for (mb_y = end_mb_y - FRAME_MBAFF(h); mb_y <= end_mb_y; mb_y++) {
4252                 int mb_xy, mb_type;
4253                 mb_xy         = h->mb_xy = mb_x + mb_y * h->mb_stride;
4254                 h->slice_num  = h->slice_table[mb_xy];
4255                 mb_type       = h->cur_pic.mb_type[mb_xy];
4256                 h->list_count = h->list_counts[mb_xy];
4257
4258                 if (FRAME_MBAFF(h))
4259                     h->mb_mbaff               =
4260                     h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
4261
4262                 h->mb_x = mb_x;
4263                 h->mb_y = mb_y;
4264                 dest_y  = h->cur_pic.f.data[0] +
4265                           ((mb_x << pixel_shift) + mb_y * h->linesize) * 16;
4266                 dest_cb = h->cur_pic.f.data[1] +
4267                           (mb_x << pixel_shift) * (8 << CHROMA444(h)) +
4268                           mb_y * h->uvlinesize * block_h;
4269                 dest_cr = h->cur_pic.f.data[2] +
4270                           (mb_x << pixel_shift) * (8 << CHROMA444(h)) +
4271                           mb_y * h->uvlinesize * block_h;
4272                 // FIXME simplify above
4273
4274                 if (MB_FIELD(h)) {
4275                     linesize   = h->mb_linesize   = h->linesize   * 2;
4276                     uvlinesize = h->mb_uvlinesize = h->uvlinesize * 2;
4277                     if (mb_y & 1) { // FIXME move out of this function?
4278                         dest_y  -= h->linesize   * 15;
4279                         dest_cb -= h->uvlinesize * (block_h - 1);
4280                         dest_cr -= h->uvlinesize * (block_h - 1);
4281                     }
4282                 } else {
4283                     linesize   = h->mb_linesize   = h->linesize;
4284                     uvlinesize = h->mb_uvlinesize = h->uvlinesize;
4285                 }
4286                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
4287                                  uvlinesize, 0);
4288                 if (fill_filter_caches(h, mb_type))
4289                     continue;
4290                 h->chroma_qp[0] = get_chroma_qp(h, 0, h->cur_pic.qscale_table[mb_xy]);
4291                 h->chroma_qp[1] = get_chroma_qp(h, 1, h->cur_pic.qscale_table[mb_xy]);
4292
4293                 if (FRAME_MBAFF(h)) {
4294                     ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr,
4295                                       linesize, uvlinesize);
4296                 } else {
4297                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb,
4298                                            dest_cr, linesize, uvlinesize);
4299                 }
4300             }
4301     }
4302     h->slice_type   = old_slice_type;
4303     h->mb_x         = end_x;
4304     h->mb_y         = end_mb_y - FRAME_MBAFF(h);
4305     h->chroma_qp[0] = get_chroma_qp(h, 0, h->qscale);
4306     h->chroma_qp[1] = get_chroma_qp(h, 1, h->qscale);
4307 }
4308
4309 static void predict_field_decoding_flag(H264Context *h)
4310 {
4311     const int mb_xy = h->mb_x + h->mb_y * h->mb_stride;
4312     int mb_type     = (h->slice_table[mb_xy - 1] == h->slice_num) ?
4313                       h->cur_pic.mb_type[mb_xy - 1] :
4314                       (h->slice_table[mb_xy - h->mb_stride] == h->slice_num) ?
4315                       h->cur_pic.mb_type[mb_xy - h->mb_stride] : 0;
4316     h->mb_mbaff     = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4317 }
4318
4319 /**
4320  * Draw edges and report progress for the last MB row.
4321  */
4322 static void decode_finish_row(H264Context *h)
4323 {
4324     int top            = 16 * (h->mb_y      >> FIELD_PICTURE(h));
4325     int pic_height     = 16 *  h->mb_height >> FIELD_PICTURE(h);
4326     int height         =  16      << FRAME_MBAFF(h);
4327     int deblock_border = (16 + 4) << FRAME_MBAFF(h);
4328
4329     if (h->deblocking_filter) {
4330         if ((top + height) >= pic_height)
4331             height += deblock_border;
4332         top -= deblock_border;
4333     }
4334
4335     if (top >= pic_height || (top + height) < 0)
4336         return;
4337
4338     height = FFMIN(height, pic_height - top);
4339     if (top < 0) {
4340         height = top + height;
4341         top    = 0;
4342     }
4343
4344     ff_h264_draw_horiz_band(h, top, height);
4345
4346     if (h->droppable)
4347         return;
4348
4349     ff_thread_report_progress(&h->cur_pic_ptr->tf, top + height - 1,
4350                               h->picture_structure == PICT_BOTTOM_FIELD);
4351 }
4352
4353 static void er_add_slice(H264Context *h, int startx, int starty,
4354                          int endx, int endy, int status)
4355 {
4356 #if CONFIG_ERROR_RESILIENCE
4357     ERContext *er = &h->er;
4358
4359     er->ref_count = h->ref_count[0];
4360     ff_er_add_slice(er, startx, starty, endx, endy, status);
4361 #endif
4362 }
4363
4364 static int decode_slice(struct AVCodecContext *avctx, void *arg)
4365 {
4366     H264Context *h = *(void **)arg;
4367     int lf_x_start = h->mb_x;
4368
4369     h->mb_skip_run = -1;
4370
4371     h->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME ||
4372                     avctx->codec_id != AV_CODEC_ID_H264 ||
4373                     (CONFIG_GRAY && (h->flags & CODEC_FLAG_GRAY));
4374
4375     if (h->pps.cabac) {
4376         /* realign */
4377         align_get_bits(&h->gb);
4378
4379         /* init cabac */
4380         ff_init_cabac_decoder(&h->cabac,
4381                               h->gb.buffer + get_bits_count(&h->gb) / 8,
4382                               (get_bits_left(&h->gb) + 7) / 8);
4383
4384         ff_h264_init_cabac_states(h);
4385
4386         for (;;) {
4387             // START_TIMER
4388             int ret = ff_h264_decode_mb_cabac(h);
4389             int eos;
4390             // STOP_TIMER("decode_mb_cabac")
4391
4392             if (ret >= 0)
4393                 ff_h264_hl_decode_mb(h);
4394
4395             // FIXME optimal? or let mb_decode decode 16x32 ?
4396             if (ret >= 0 && FRAME_MBAFF(h)) {
4397                 h->mb_y++;
4398
4399                 ret = ff_h264_decode_mb_cabac(h);
4400
4401                 if (ret >= 0)
4402                     ff_h264_hl_decode_mb(h);
4403                 h->mb_y--;
4404             }
4405             eos = get_cabac_terminate(&h->cabac);
4406
4407             if ((h->workaround_bugs & FF_BUG_TRUNCATED) &&
4408                 h->cabac.bytestream > h->cabac.bytestream_end + 2) {
4409                 er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1,
4410                              h->mb_y, ER_MB_END);
4411                 if (h->mb_x >= lf_x_start)
4412                     loop_filter(h, lf_x_start, h->mb_x + 1);
4413                 return 0;
4414             }
4415             if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
4416                 av_log(h->avctx, AV_LOG_ERROR,
4417                        "error while decoding MB %d %d, bytestream %td\n",
4418                        h->mb_x, h->mb_y,
4419                        h->cabac.bytestream_end - h->cabac.bytestream);
4420                 er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
4421                              h->mb_y, ER_MB_ERROR);
4422                 return AVERROR_INVALIDDATA;
4423             }
4424
4425             if (++h->mb_x >= h->mb_width) {
4426                 loop_filter(h, lf_x_start, h->mb_x);
4427                 h->mb_x = lf_x_start = 0;
4428                 decode_finish_row(h);
4429                 ++h->mb_y;
4430                 if (FIELD_OR_MBAFF_PICTURE(h)) {
4431                     ++h->mb_y;
4432                     if (FRAME_MBAFF(h) && h->mb_y < h->mb_height)
4433                         predict_field_decoding_flag(h);
4434                 }
4435             }
4436
4437             if (eos || h->mb_y >= h->mb_height) {
4438                 tprintf(h->avctx, "slice end %d %d\n",
4439                         get_bits_count(&h->gb), h->gb.size_in_bits);
4440                 er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x - 1,
4441                              h->mb_y, ER_MB_END);
4442                 if (h->mb_x > lf_x_start)
4443                     loop_filter(h, lf_x_start, h->mb_x);
4444                 return 0;
4445             }
4446         }
4447     } else {
4448         for (;;) {
4449             int ret = ff_h264_decode_mb_cavlc(h);
4450
4451             if (ret >= 0)
4452                 ff_h264_hl_decode_mb(h);
4453
4454             // FIXME optimal? or let mb_decode decode 16x32 ?
4455             if (ret >= 0 && FRAME_MBAFF(h)) {
4456                 h->mb_y++;
4457                 ret = ff_h264_decode_mb_cavlc(h);
4458
4459                 if (ret >= 0)
4460                     ff_h264_hl_decode_mb(h);
4461                 h->mb_y--;
4462             }
4463
4464             if (ret < 0) {
4465                 av_log(h->avctx, AV_LOG_ERROR,
4466                        "error while decoding MB %d %d\n", h->mb_x, h->mb_y);
4467                 er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
4468                              h->mb_y, ER_MB_ERROR);
4469                 return ret;
4470             }
4471
4472             if (++h->mb_x >= h->mb_width) {
4473                 loop_filter(h, lf_x_start, h->mb_x);
4474                 h->mb_x = lf_x_start = 0;
4475                 decode_finish_row(h);
4476                 ++h->mb_y;
4477                 if (FIELD_OR_MBAFF_PICTURE(h)) {
4478                     ++h->mb_y;
4479                     if (FRAME_MBAFF(h) && h->mb_y < h->mb_height)
4480                         predict_field_decoding_flag(h);
4481                 }
4482                 if (h->mb_y >= h->mb_height) {
4483                     tprintf(h->avctx, "slice end %d %d\n",
4484                             get_bits_count(&h->gb), h->gb.size_in_bits);
4485
4486                     if (get_bits_left(&h->gb) == 0) {
4487                         er_add_slice(h, h->resync_mb_x, h->resync_mb_y,
4488                                      h->mb_x - 1, h->mb_y,
4489                                      ER_MB_END);
4490
4491                         return 0;
4492                     } else {
4493                         er_add_slice(h, h->resync_mb_x, h->resync_mb_y,
4494                                      h->mb_x - 1, h->mb_y,
4495                                      ER_MB_END);
4496
4497                         return AVERROR_INVALIDDATA;
4498                     }
4499                 }
4500             }
4501
4502             if (get_bits_left(&h->gb) <= 0 && h->mb_skip_run <= 0) {
4503                 tprintf(h->avctx, "slice end %d %d\n",
4504                         get_bits_count(&h->gb), h->gb.size_in_bits);
4505
4506                 if (get_bits_left(&h->gb) == 0) {
4507                     er_add_slice(h, h->resync_mb_x, h->resync_mb_y,
4508                                  h->mb_x - 1, h->mb_y,
4509                                  ER_MB_END);
4510                     if (h->mb_x > lf_x_start)
4511                         loop_filter(h, lf_x_start, h->mb_x);
4512
4513                     return 0;
4514                 } else {
4515                     er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
4516                                  h->mb_y, ER_MB_ERROR);
4517
4518                     return AVERROR_INVALIDDATA;
4519                 }
4520             }
4521         }
4522     }
4523 }
4524
4525 /**
4526  * Call decode_slice() for each context.
4527  *
4528  * @param h h264 master context
4529  * @param context_count number of contexts to execute
4530  */
4531 static int execute_decode_slices(H264Context *h, unsigned context_count)
4532 {
4533     AVCodecContext *const avctx = h->avctx;
4534     H264Context *hx;
4535     int i;
4536
4537     if (h->mb_y >= h->mb_height) {
4538         av_log(h->avctx, AV_LOG_ERROR,
4539                "Input contains more MB rows than the frame height.\n");
4540         return AVERROR_INVALIDDATA;
4541     }
4542
4543     if (h->avctx->hwaccel)
4544         return 0;
4545     if (context_count == 1) {
4546         return decode_slice(avctx, &h);
4547     } else {
4548         for (i = 1; i < context_count; i++) {
4549             hx                 = h->thread_context[i];
4550             hx->er.error_count = 0;
4551         }
4552
4553         avctx->execute(avctx, decode_slice, h->thread_context,
4554                        NULL, context_count, sizeof(void *));
4555
4556         /* pull back stuff from slices to master context */
4557         hx                   = h->thread_context[context_count - 1];
4558         h->mb_x              = hx->mb_x;
4559         h->mb_y              = hx->mb_y;
4560         h->droppable         = hx->droppable;
4561         h->picture_structure = hx->picture_structure;
4562         for (i = 1; i < context_count; i++)
4563             h->er.error_count += h->thread_context[i]->er.error_count;
4564     }
4565
4566     return 0;
4567 }
4568
4569 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
4570                             int parse_extradata)
4571 {
4572     AVCodecContext *const avctx = h->avctx;
4573     H264Context *hx; ///< thread context
4574     int buf_index;
4575     unsigned context_count;
4576     int next_avc;
4577     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
4578     int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts
4579     int nal_index;
4580     int ret = 0;
4581
4582     h->max_contexts = h->slice_context_count;
4583     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) {
4584         h->current_slice = 0;
4585         if (!h->first_field)
4586             h->cur_pic_ptr = NULL;
4587         ff_h264_reset_sei(h);
4588     }
4589
4590     for (; pass <= 1; pass++) {
4591         buf_index     = 0;
4592         context_count = 0;
4593         next_avc      = h->is_avc ? 0 : buf_size;
4594         nal_index     = 0;
4595         for (;;) {
4596             int consumed;
4597             int dst_length;
4598             int bit_length;
4599             const uint8_t *ptr;
4600             int i, nalsize = 0;
4601             int err;
4602
4603             if (buf_index >= next_avc) {
4604                 if (buf_index >= buf_size - h->nal_length_size)
4605                     break;
4606                 nalsize = 0;
4607                 for (i = 0; i < h->nal_length_size; i++)
4608                     nalsize = (nalsize << 8) | buf[buf_index++];
4609                 if (nalsize <= 0 || nalsize > buf_size - buf_index) {
4610                     av_log(h->avctx, AV_LOG_ERROR,
4611                            "AVC: nal size %d\n", nalsize);
4612                     break;
4613                 }
4614                 next_avc = buf_index + nalsize;
4615             } else {
4616                 // start code prefix search
4617                 for (; buf_index + 3 < next_avc; buf_index++)
4618                     // This should always succeed in the first iteration.
4619                     if (buf[buf_index]     == 0 &&
4620                         buf[buf_index + 1] == 0 &&
4621                         buf[buf_index + 2] == 1)
4622                         break;
4623
4624                 if (buf_index + 3 >= buf_size) {
4625                     buf_index = buf_size;
4626                     break;
4627                 }
4628
4629                 buf_index += 3;
4630                 if (buf_index >= next_avc)
4631                     continue;
4632             }
4633
4634             hx = h->thread_context[context_count];
4635
4636             ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
4637                                      &consumed, next_avc - buf_index);
4638             if (ptr == NULL || dst_length < 0) {
4639                 ret = -1;
4640                 goto end;
4641             }
4642             i = buf_index + consumed;
4643             if ((h->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc &&
4644                 buf[i]     == 0x00 && buf[i + 1] == 0x00 &&
4645                 buf[i + 2] == 0x01 && buf[i + 3] == 0xE0)
4646                 h->workaround_bugs |= FF_BUG_TRUNCATED;
4647
4648             if (!(h->workaround_bugs & FF_BUG_TRUNCATED))
4649                 while (dst_length > 0 && ptr[dst_length - 1] == 0)
4650                     dst_length--;
4651             bit_length = !dst_length ? 0
4652                                      : (8 * dst_length -
4653                                         decode_rbsp_trailing(h, ptr + dst_length - 1));
4654
4655             if (h->avctx->debug & FF_DEBUG_STARTCODE)
4656                 av_log(h->avctx, AV_LOG_DEBUG,
4657                        "NAL %d at %d/%d length %d\n",
4658                        hx->nal_unit_type, buf_index, buf_size, dst_length);
4659
4660             if (h->is_avc && (nalsize != consumed) && nalsize)
4661                 av_log(h->avctx, AV_LOG_DEBUG,
4662                        "AVC: Consumed only %d bytes instead of %d\n",
4663                        consumed, nalsize);
4664
4665             buf_index += consumed;
4666             nal_index++;
4667
4668             if (pass == 0) {
4669                 /* packets can sometimes contain multiple PPS/SPS,
4670                  * e.g. two PAFF field pictures in one packet, or a demuxer
4671                  * which splits NALs strangely if so, when frame threading we
4672                  * can't start the next thread until we've read all of them */
4673                 switch (hx->nal_unit_type) {
4674                 case NAL_SPS:
4675                 case NAL_PPS:
4676                     nals_needed = nal_index;
4677                     break;
4678                 case NAL_DPA:
4679                 case NAL_IDR_SLICE:
4680                 case NAL_SLICE:
4681                     init_get_bits(&hx->gb, ptr, bit_length);
4682                     if (!get_ue_golomb(&hx->gb))
4683                         nals_needed = nal_index;
4684                 }
4685                 continue;
4686             }
4687
4688             if (avctx->skip_frame >= AVDISCARD_NONREF &&
4689                 h->nal_ref_idc == 0 &&
4690                 h->nal_unit_type != NAL_SEI)
4691                 continue;
4692
4693 again:
4694             /* Ignore every NAL unit type except PPS and SPS during extradata
4695              * parsing. Decoding slices is not possible in codec init
4696              * with frame-mt */
4697             if (parse_extradata && HAVE_THREADS &&
4698                 (h->avctx->active_thread_type & FF_THREAD_FRAME) &&
4699                 (hx->nal_unit_type != NAL_PPS &&
4700                  hx->nal_unit_type != NAL_SPS)) {
4701                 if (hx->nal_unit_type < NAL_AUD ||
4702                     hx->nal_unit_type > NAL_AUXILIARY_SLICE)
4703                     av_log(avctx, AV_LOG_INFO,
4704                            "Ignoring NAL unit %d during extradata parsing\n",
4705                            hx->nal_unit_type);
4706                 hx->nal_unit_type = NAL_FF_IGNORE;
4707             }
4708             err = 0;
4709             switch (hx->nal_unit_type) {
4710             case NAL_IDR_SLICE:
4711                 if (h->nal_unit_type != NAL_IDR_SLICE) {
4712                     av_log(h->avctx, AV_LOG_ERROR,
4713                            "Invalid mix of idr and non-idr slices\n");
4714                     ret = -1;
4715                     goto end;
4716                 }
4717                 idr(h); // FIXME ensure we don't lose some frames if there is reordering
4718             case NAL_SLICE:
4719                 init_get_bits(&hx->gb, ptr, bit_length);
4720                 hx->intra_gb_ptr      =
4721                 hx->inter_gb_ptr      = &hx->gb;
4722                 hx->data_partitioning = 0;
4723
4724                 if ((err = decode_slice_header(hx, h)))
4725                     break;
4726
4727                 if (h->sei_recovery_frame_cnt >= 0 && h->recovery_frame < 0) {
4728                     h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) &
4729                                         ((1 << h->sps.log2_max_frame_num) - 1);
4730                 }
4731
4732                 h->cur_pic_ptr->f.key_frame |=
4733                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
4734                     (h->sei_recovery_frame_cnt >= 0);
4735
4736                 if (hx->nal_unit_type == NAL_IDR_SLICE ||
4737                     h->recovery_frame == h->frame_num) {
4738                     h->recovery_frame         = -1;
4739                     h->cur_pic_ptr->recovered = 1;
4740                 }
4741                 // If we have an IDR, all frames after it in decoded order are
4742                 // "recovered".
4743                 if (hx->nal_unit_type == NAL_IDR_SLICE)
4744                     h->frame_recovered |= FRAME_RECOVERED_IDR;
4745                 h->cur_pic_ptr->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_IDR);
4746
4747                 if (h->current_slice == 1) {
4748                     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS))
4749                         decode_postinit(h, nal_index >= nals_needed);
4750
4751                     if (h->avctx->hwaccel &&
4752                         (ret = h->avctx->hwaccel->start_frame(h->avctx, NULL, 0)) < 0)
4753                         return ret;
4754                 }
4755
4756                 if (hx->redundant_pic_count == 0 &&
4757                     (avctx->skip_frame < AVDISCARD_NONREF ||
4758                      hx->nal_ref_idc) &&
4759                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
4760                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
4761                     (avctx->skip_frame < AVDISCARD_NONKEY ||
4762                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
4763                     avctx->skip_frame < AVDISCARD_ALL) {
4764                     if (avctx->hwaccel) {
4765                         ret = avctx->hwaccel->decode_slice(avctx,
4766                                                            &buf[buf_index - consumed],
4767                                                            consumed);
4768                         if (ret < 0)
4769                             return ret;
4770                     } else
4771                         context_count++;
4772                 }
4773                 break;
4774             case NAL_DPA:
4775                 if (h->avctx->flags & CODEC_FLAG2_CHUNKS) {
4776                     av_log(h->avctx, AV_LOG_ERROR,
4777                            "Decoding in chunks is not supported for "
4778                            "partitioned slices.\n");
4779                     return AVERROR(ENOSYS);
4780                 }
4781
4782                 init_get_bits(&hx->gb, ptr, bit_length);
4783                 hx->intra_gb_ptr =
4784                 hx->inter_gb_ptr = NULL;
4785
4786                 if ((err = decode_slice_header(hx, h)) < 0) {
4787                     /* make sure data_partitioning is cleared if it was set
4788                      * before, so we don't try decoding a slice without a valid
4789                      * slice header later */
4790                     h->data_partitioning = 0;
4791                     break;
4792                 }
4793
4794                 hx->data_partitioning = 1;
4795                 break;
4796             case NAL_DPB:
4797                 init_get_bits(&hx->intra_gb, ptr, bit_length);
4798                 hx->intra_gb_ptr = &hx->intra_gb;
4799                 break;
4800             case NAL_DPC:
4801                 init_get_bits(&hx->inter_gb, ptr, bit_length);
4802                 hx->inter_gb_ptr = &hx->inter_gb;
4803
4804                 if (hx->redundant_pic_count == 0 &&
4805                     hx->intra_gb_ptr &&
4806                     hx->data_partitioning &&
4807                     h->cur_pic_ptr && h->context_initialized &&
4808                     (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) &&
4809                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
4810                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
4811                     (avctx->skip_frame < AVDISCARD_NONKEY ||
4812                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
4813                     avctx->skip_frame < AVDISCARD_ALL)
4814                     context_count++;
4815                 break;
4816             case NAL_SEI:
4817                 init_get_bits(&h->gb, ptr, bit_length);
4818                 ff_h264_decode_sei(h);
4819                 break;
4820             case NAL_SPS:
4821                 init_get_bits(&h->gb, ptr, bit_length);
4822                 ret = ff_h264_decode_seq_parameter_set(h);
4823                 if (ret < 0 && h->is_avc && (nalsize != consumed) && nalsize) {
4824                     av_log(h->avctx, AV_LOG_DEBUG,
4825                            "SPS decoding failure, trying again with the complete NAL\n");
4826                     init_get_bits(&h->gb, buf + buf_index + 1 - consumed,
4827                                   8 * (nalsize - 1));
4828                     ff_h264_decode_seq_parameter_set(h);
4829                 }
4830
4831                 ret = h264_set_parameter_from_sps(h);
4832                 if (ret < 0)
4833                     goto end;
4834
4835                 break;
4836             case NAL_PPS:
4837                 init_get_bits(&h->gb, ptr, bit_length);
4838                 ff_h264_decode_picture_parameter_set(h, bit_length);
4839                 break;
4840             case NAL_AUD:
4841             case NAL_END_SEQUENCE:
4842             case NAL_END_STREAM:
4843             case NAL_FILLER_DATA:
4844             case NAL_SPS_EXT:
4845             case NAL_AUXILIARY_SLICE:
4846                 break;
4847             case NAL_FF_IGNORE:
4848                 break;
4849             default:
4850                 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n",
4851                        hx->nal_unit_type, bit_length);
4852             }
4853
4854             if (context_count == h->max_contexts) {
4855                 execute_decode_slices(h, context_count);
4856                 context_count = 0;
4857             }
4858
4859             if (err < 0) {
4860                 av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n");
4861                 h->ref_count[0] = h->ref_count[1] = h->list_count = 0;
4862             } else if (err == 1) {
4863                 /* Slice could not be decoded in parallel mode, copy down
4864                  * NAL unit stuff to context 0 and restart. Note that
4865                  * rbsp_buffer is not transferred, but since we no longer
4866                  * run in parallel mode this should not be an issue. */
4867                 h->nal_unit_type = hx->nal_unit_type;
4868                 h->nal_ref_idc   = hx->nal_ref_idc;
4869                 hx               = h;
4870                 goto again;
4871             }
4872         }
4873     }
4874     if (context_count)
4875         execute_decode_slices(h, context_count);
4876
4877 end:
4878     /* clean up */
4879     if (h->cur_pic_ptr && !h->droppable) {
4880         ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
4881                                   h->picture_structure == PICT_BOTTOM_FIELD);
4882     }
4883
4884     return (ret < 0) ? ret : buf_index;
4885 }
4886
4887 /**
4888  * Return the number of bytes consumed for building the current frame.
4889  */
4890 static int get_consumed_bytes(int pos, int buf_size)
4891 {
4892     if (pos == 0)
4893         pos = 1;          // avoid infinite loops (i doubt that is needed but ...)
4894     if (pos + 10 > buf_size)
4895         pos = buf_size;                   // oops ;)
4896
4897     return pos;
4898 }
4899
4900 static int output_frame(H264Context *h, AVFrame *dst, AVFrame *src)
4901 {
4902     int i;
4903     int ret = av_frame_ref(dst, src);
4904     if (ret < 0)
4905         return ret;
4906
4907     if (!h->sps.crop)
4908         return 0;
4909
4910     for (i = 0; i < 3; i++) {
4911         int hshift = (i > 0) ? h->chroma_x_shift : 0;
4912         int vshift = (i > 0) ? h->chroma_y_shift : 0;
4913         int off    = ((h->sps.crop_left >> hshift) << h->pixel_shift) +
4914                      (h->sps.crop_top >> vshift) * dst->linesize[i];
4915         dst->data[i] += off;
4916     }
4917     return 0;
4918 }
4919
4920 static int h264_decode_frame(AVCodecContext *avctx, void *data,
4921                              int *got_frame, AVPacket *avpkt)
4922 {
4923     const uint8_t *buf = avpkt->data;
4924     int buf_size       = avpkt->size;
4925     H264Context *h     = avctx->priv_data;
4926     AVFrame *pict      = data;
4927     int buf_index      = 0;
4928     int ret;
4929
4930     h->flags = avctx->flags;
4931     /* reset data partitioning here, to ensure GetBitContexts from previous
4932      * packets do not get used. */
4933     h->data_partitioning = 0;
4934
4935     /* end of stream, output what is still in the buffers */
4936 out:
4937     if (buf_size == 0) {
4938         H264Picture *out;
4939         int i, out_idx;
4940
4941         h->cur_pic_ptr = NULL;
4942
4943         // FIXME factorize this with the output code below
4944         out     = h->delayed_pic[0];
4945         out_idx = 0;
4946         for (i = 1;
4947              h->delayed_pic[i] &&
4948              !h->delayed_pic[i]->f.key_frame &&
4949              !h->delayed_pic[i]->mmco_reset;
4950              i++)
4951             if (h->delayed_pic[i]->poc < out->poc) {
4952                 out     = h->delayed_pic[i];
4953                 out_idx = i;
4954             }
4955
4956         for (i = out_idx; h->delayed_pic[i]; i++)
4957             h->delayed_pic[i] = h->delayed_pic[i + 1];
4958
4959         if (out) {
4960             ret = output_frame(h, pict, &out->f);
4961             if (ret < 0)
4962                 return ret;
4963             *got_frame = 1;
4964         }
4965
4966         return buf_index;
4967     }
4968
4969     buf_index = decode_nal_units(h, buf, buf_size, 0);
4970     if (buf_index < 0)
4971         return AVERROR_INVALIDDATA;
4972
4973     if (!h->cur_pic_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
4974         buf_size = 0;
4975         goto out;
4976     }
4977
4978     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) {
4979         if (avctx->skip_frame >= AVDISCARD_NONREF)
4980             return 0;
4981         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
4982         return AVERROR_INVALIDDATA;
4983     }
4984
4985     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) ||
4986         (h->mb_y >= h->mb_height && h->mb_height)) {
4987         if (avctx->flags2 & CODEC_FLAG2_CHUNKS)
4988             decode_postinit(h, 1);
4989
4990         field_end(h, 0);
4991
4992         *got_frame = 0;
4993         if (h->next_output_pic && ((avctx->flags & CODEC_FLAG_OUTPUT_CORRUPT) ||
4994                                    h->next_output_pic->recovered)) {
4995             if (!h->next_output_pic->recovered)
4996                 h->next_output_pic->f.flags |= AV_FRAME_FLAG_CORRUPT;
4997
4998             ret = output_frame(h, pict, &h->next_output_pic->f);
4999             if (ret < 0)
5000                 return ret;
5001             *got_frame = 1;
5002         }
5003     }
5004
5005     assert(pict->buf[0] || !*got_frame);
5006
5007     return get_consumed_bytes(buf_index, buf_size);
5008 }
5009
5010 av_cold void ff_h264_free_context(H264Context *h)
5011 {
5012     int i;
5013
5014     free_tables(h, 1); // FIXME cleanup init stuff perhaps
5015
5016     for (i = 0; i < MAX_SPS_COUNT; i++)
5017         av_freep(h->sps_buffers + i);
5018
5019     for (i = 0; i < MAX_PPS_COUNT; i++)
5020         av_freep(h->pps_buffers + i);
5021 }
5022
5023 static av_cold int h264_decode_end(AVCodecContext *avctx)
5024 {
5025     H264Context *h = avctx->priv_data;
5026
5027     ff_h264_free_context(h);
5028
5029     unref_picture(h, &h->cur_pic);
5030
5031     return 0;
5032 }
5033
5034 static const AVProfile profiles[] = {
5035     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
5036     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
5037     { FF_PROFILE_H264_MAIN,                 "Main"                  },
5038     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
5039     { FF_PROFILE_H264_HIGH,                 "High"                  },
5040     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
5041     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
5042     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
5043     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
5044     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
5045     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
5046     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
5047     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
5048     { FF_PROFILE_UNKNOWN },
5049 };
5050
5051 AVCodec ff_h264_decoder = {
5052     .name                  = "h264",
5053     .long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
5054     .type                  = AVMEDIA_TYPE_VIDEO,
5055     .id                    = AV_CODEC_ID_H264,
5056     .priv_data_size        = sizeof(H264Context),
5057     .init                  = ff_h264_decode_init,
5058     .close                 = h264_decode_end,
5059     .decode                = h264_decode_frame,
5060     .capabilities          = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
5061                              CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS |
5062                              CODEC_CAP_FRAME_THREADS,
5063     .flush                 = flush_dpb,
5064     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
5065     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
5066     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
5067 };