git.sesse.net Git - ffmpeg/blob - libavcodec/h264.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... decoder
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * H.264 / AVC / MPEG4 part10 codec.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include "libavutil/imgutils.h"
  29 #include "internal.h"
  30 #include "cabac.h"
  31 #include "cabac_functions.h"
  32 #include "dsputil.h"
  33 #include "avcodec.h"
  34 #include "mpegvideo.h"
  35 #include "h264.h"
  36 #include "h264data.h"
  37 #include "h264chroma.h"
  38 #include "h264_mvpred.h"
  39 #include "golomb.h"
  40 #include "mathops.h"
  41 #include "rectangle.h"
  42 #include "svq3.h"
  43 #include "thread.h"
  44 #include "vdpau_internal.h"
  45 #include "libavutil/avassert.h"
  46
  47 // #undef NDEBUG
  48 #include <assert.h>
  49
  50 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
  51
  52 static const uint8_t rem6[QP_MAX_NUM + 1] = {
  53     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
  54     3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
  55     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
  56 };
  57
  58 static const uint8_t div6[QP_MAX_NUM + 1] = {
  59     0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
  60     3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
  61     7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
  62 };
  63
  64 static const enum AVPixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
  65 #if CONFIG_H264_DXVA2_HWACCEL
  66     AV_PIX_FMT_DXVA2_VLD,
  67 #endif
  68 #if CONFIG_H264_VAAPI_HWACCEL
  69     AV_PIX_FMT_VAAPI_VLD,
  70 #endif
  71 #if CONFIG_H264_VDA_HWACCEL
  72     AV_PIX_FMT_VDA_VLD,
  73 #endif
  74 #if CONFIG_H264_VDPAU_HWACCEL
  75     AV_PIX_FMT_VDPAU,
  76 #endif
  77     AV_PIX_FMT_YUVJ420P,
  78     AV_PIX_FMT_NONE
  79 };
  80
  81 static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
  82                               int (*mv)[2][4][2],
  83                               int mb_x, int mb_y, int mb_intra, int mb_skipped)
  84 {
  85     H264Context    *h = opaque;
  86     MpegEncContext *s = &h->s;
  87
  88     s->mb_x  = mb_x;
  89     s->mb_y  = mb_y;
  90     h->mb_xy = s->mb_x + s->mb_y * s->mb_stride;
  91     memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
  92     assert(ref >= 0);
  93     /* FIXME: It is possible albeit uncommon that slice references
  94      * differ between slices. We take the easy approach and ignore
  95      * it for now. If this turns out to have any relevance in
  96      * practice then correct remapping should be added. */
  97     if (ref >= h->ref_count[0])
  98         ref = 0;
  99     fill_rectangle(&s->current_picture.f.ref_index[0][4 * h->mb_xy],
 100                    2, 2, 2, ref, 1);
 101     fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
 102     fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
 103                    pack16to32(s->mv[0][0][0], s->mv[0][0][1]), 4);
 104     assert(!FRAME_MBAFF);
 105     ff_h264_hl_decode_mb(h);
 106 }
 107
 108 /**
 109  * Check if the top & left blocks are available if needed and
 110  * change the dc mode so it only uses the available blocks.
 111  */
 112 int ff_h264_check_intra4x4_pred_mode(H264Context *h)
 113 {
 114     MpegEncContext *const s     = &h->s;
 115     static const int8_t top[12] = {
 116         -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
 117     };
 118     static const int8_t left[12] = {
 119         0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
 120     };
 121     int i;
 122
 123     if (!(h->top_samples_available & 0x8000)) {
 124         for (i = 0; i < 4; i++) {
 125             int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
 126             if (status < 0) {
 127                 av_log(h->s.avctx, AV_LOG_ERROR,
 128                        "top block unavailable for requested intra4x4 mode %d at %d %d\n",
 129                        status, s->mb_x, s->mb_y);
 130                 return -1;
 131             } else if (status) {
 132                 h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
 133             }
 134         }
 135     }
 136
 137     if ((h->left_samples_available & 0x8888) != 0x8888) {
 138         static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
 139         for (i = 0; i < 4; i++)
 140             if (!(h->left_samples_available & mask[i])) {
 141                 int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
 142                 if (status < 0) {
 143                     av_log(h->s.avctx, AV_LOG_ERROR,
 144                            "left block unavailable for requested intra4x4 mode %d at %d %d\n",
 145                            status, s->mb_x, s->mb_y);
 146                     return -1;
 147                 } else if (status) {
 148                     h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
 149                 }
 150             }
 151     }
 152
 153     return 0;
 154 } // FIXME cleanup like ff_h264_check_intra_pred_mode
 155
 156 /**
 157  * Check if the top & left blocks are available if needed and
 158  * change the dc mode so it only uses the available blocks.
 159  */
 160 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
 161 {
 162     MpegEncContext *const s     = &h->s;
 163     static const int8_t top[7]  = { LEFT_DC_PRED8x8, 1, -1, -1 };
 164     static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };
 165
 166     if (mode > 6U) {
 167         av_log(h->s.avctx, AV_LOG_ERROR,
 168                "out of range intra chroma pred mode at %d %d\n",
 169                s->mb_x, s->mb_y);
 170         return -1;
 171     }
 172
 173     if (!(h->top_samples_available & 0x8000)) {
 174         mode = top[mode];
 175         if (mode < 0) {
 176             av_log(h->s.avctx, AV_LOG_ERROR,
 177                    "top block unavailable for requested intra mode at %d %d\n",
 178                    s->mb_x, s->mb_y);
 179             return -1;
 180         }
 181     }
 182
 183     if ((h->left_samples_available & 0x8080) != 0x8080) {
 184         mode = left[mode];
 185         if (is_chroma && (h->left_samples_available & 0x8080)) {
 186             // mad cow disease mode, aka MBAFF + constrained_intra_pred
 187             mode = ALZHEIMER_DC_L0T_PRED8x8 +
 188                    (!(h->left_samples_available & 0x8000)) +
 189                    2 * (mode == DC_128_PRED8x8);
 190         }
 191         if (mode < 0) {
 192             av_log(h->s.avctx, AV_LOG_ERROR,
 193                    "left block unavailable for requested intra mode at %d %d\n",
 194                    s->mb_x, s->mb_y);
 195             return -1;
 196         }
 197     }
 198
 199     return mode;
 200 }
 201
 202 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
 203                                   int *dst_length, int *consumed, int length)
 204 {
 205     int i, si, di;
 206     uint8_t *dst;
 207     int bufidx;
 208
 209     // src[0]&0x80; // forbidden bit
 210     h->nal_ref_idc   = src[0] >> 5;
 211     h->nal_unit_type = src[0] & 0x1F;
 212
 213     src++;
 214     length--;
 215
 216 #define STARTCODE_TEST                                                  \
 217         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
 218             if (src[i + 2] != 3) {                                      \
 219                 /* startcode, so we must be past the end */             \
 220                 length = i;                                             \
 221             }                                                           \
 222             break;                                                      \
 223         }
 224 #if HAVE_FAST_UNALIGNED
 225 #define FIND_FIRST_ZERO                                                 \
 226         if (i > 0 && !src[i])                                           \
 227             i--;                                                        \
 228         while (src[i])                                                  \
 229             i++
 230 #if HAVE_FAST_64BIT
 231     for (i = 0; i + 1 < length; i += 9) {
 232         if (!((~AV_RN64A(src + i) &
 233                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
 234               0x8000800080008080ULL))
 235             continue;
 236         FIND_FIRST_ZERO;
 237         STARTCODE_TEST;
 238         i -= 7;
 239     }
 240 #else
 241     for (i = 0; i + 1 < length; i += 5) {
 242         if (!((~AV_RN32A(src + i) &
 243                (AV_RN32A(src + i) - 0x01000101U)) &
 244               0x80008080U))
 245             continue;
 246         FIND_FIRST_ZERO;
 247         STARTCODE_TEST;
 248         i -= 3;
 249     }
 250 #endif
 251 #else
 252     for (i = 0; i + 1 < length; i += 2) {
 253         if (src[i])
 254             continue;
 255         if (i > 0 && src[i - 1] == 0)
 256             i--;
 257         STARTCODE_TEST;
 258     }
 259 #endif
 260
 261     if (i >= length - 1) { // no escaped 0
 262         *dst_length = length;
 263         *consumed   = length + 1; // +1 for the header
 264         return src;
 265     }
 266
 267     // use second escape buffer for inter data
 268     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
 269     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx],
 270                    length + FF_INPUT_BUFFER_PADDING_SIZE);
 271     dst = h->rbsp_buffer[bufidx];
 272
 273     if (dst == NULL)
 274         return NULL;
 275
 276     memcpy(dst, src, i);
 277     si = di = i;
 278     while (si + 2 < length) {
 279         // remove escapes (very rare 1:2^22)
 280         if (src[si + 2] > 3) {
 281             dst[di++] = src[si++];
 282             dst[di++] = src[si++];
 283         } else if (src[si] == 0 && src[si + 1] == 0) {
 284             if (src[si + 2] == 3) { // escape
 285                 dst[di++]  = 0;
 286                 dst[di++]  = 0;
 287                 si        += 3;
 288                 continue;
 289             } else // next start code
 290                 goto nsc;
 291         }
 292
 293         dst[di++] = src[si++];
 294     }
 295     while (si < length)
 296         dst[di++] = src[si++];
 297 nsc:
 298
 299     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 300
 301     *dst_length = di;
 302     *consumed   = si + 1; // +1 for the header
 303     /* FIXME store exact number of bits in the getbitcontext
 304      * (it is needed for decoding) */
 305     return dst;
 306 }
 307
 308 /**
 309  * Identify the exact end of the bitstream
 310  * @return the length of the trailing, or 0 if damaged
 311  */
 312 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
 313 {
 314     int v = *src;
 315     int r;
 316
 317     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
 318
 319     for (r = 1; r < 9; r++) {
 320         if (v & 1)
 321             return r;
 322         v >>= 1;
 323     }
 324     return 0;
 325 }
 326
 327 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
 328                                          int height, int y_offset, int list)
 329 {
 330     int raw_my        = h->mv_cache[list][scan8[n]][1];
 331     int filter_height_up   = (raw_my & 3) ? 2 : 0;
 332     int filter_height_down = (raw_my & 3) ? 3 : 0;
 333     int full_my       = (raw_my >> 2) + y_offset;
 334     int top           = full_my - filter_height_up;
 335     int bottom        = full_my + filter_height_down + height;
 336
 337     return FFMAX(abs(top), bottom);
 338 }
 339
 340 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
 341                                      int height, int y_offset, int list0,
 342                                      int list1, int *nrefs)
 343 {
 344     MpegEncContext *const s = &h->s;
 345     int my;
 346
 347     y_offset += 16 * (s->mb_y >> MB_FIELD);
 348
 349     if (list0) {
 350         int ref_n    = h->ref_cache[0][scan8[n]];
 351         Picture *ref = &h->ref_list[0][ref_n];
 352
 353         // Error resilience puts the current picture in the ref list.
 354         // Don't try to wait on these as it will cause a deadlock.
 355         // Fields can wait on each other, though.
 356         if (ref->f.thread_opaque   != s->current_picture.f.thread_opaque ||
 357             (ref->f.reference & 3) != s->picture_structure) {
 358             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
 359             if (refs[0][ref_n] < 0)
 360                 nrefs[0] += 1;
 361             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
 362         }
 363     }
 364
 365     if (list1) {
 366         int ref_n    = h->ref_cache[1][scan8[n]];
 367         Picture *ref = &h->ref_list[1][ref_n];
 368
 369         if (ref->f.thread_opaque   != s->current_picture.f.thread_opaque ||
 370             (ref->f.reference & 3) != s->picture_structure) {
 371             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
 372             if (refs[1][ref_n] < 0)
 373                 nrefs[1] += 1;
 374             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
 375         }
 376     }
 377 }
 378
 379 /**
 380  * Wait until all reference frames are available for MC operations.
 381  *
 382  * @param h the H264 context
 383  */
 384 static void await_references(H264Context *h)
 385 {
 386     MpegEncContext *const s = &h->s;
 387     const int mb_xy   = h->mb_xy;
 388     const int mb_type = s->current_picture.f.mb_type[mb_xy];
 389     int refs[2][48];
 390     int nrefs[2] = { 0 };
 391     int ref, list;
 392
 393     memset(refs, -1, sizeof(refs));
 394
 395     if (IS_16X16(mb_type)) {
 396         get_lowest_part_y(h, refs, 0, 16, 0,
 397                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
 398     } else if (IS_16X8(mb_type)) {
 399         get_lowest_part_y(h, refs, 0, 8, 0,
 400                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
 401         get_lowest_part_y(h, refs, 8, 8, 8,
 402                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
 403     } else if (IS_8X16(mb_type)) {
 404         get_lowest_part_y(h, refs, 0, 16, 0,
 405                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
 406         get_lowest_part_y(h, refs, 4, 16, 0,
 407                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
 408     } else {
 409         int i;
 410
 411         assert(IS_8X8(mb_type));
 412
 413         for (i = 0; i < 4; i++) {
 414             const int sub_mb_type = h->sub_mb_type[i];
 415             const int n           = 4 * i;
 416             int y_offset          = (i & 2) << 2;
 417
 418             if (IS_SUB_8X8(sub_mb_type)) {
 419                 get_lowest_part_y(h, refs, n, 8, y_offset,
 420                                   IS_DIR(sub_mb_type, 0, 0),
 421                                   IS_DIR(sub_mb_type, 0, 1),
 422                                   nrefs);
 423             } else if (IS_SUB_8X4(sub_mb_type)) {
 424                 get_lowest_part_y(h, refs, n, 4, y_offset,
 425                                   IS_DIR(sub_mb_type, 0, 0),
 426                                   IS_DIR(sub_mb_type, 0, 1),
 427                                   nrefs);
 428                 get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
 429                                   IS_DIR(sub_mb_type, 0, 0),
 430                                   IS_DIR(sub_mb_type, 0, 1),
 431                                   nrefs);
 432             } else if (IS_SUB_4X8(sub_mb_type)) {
 433                 get_lowest_part_y(h, refs, n, 8, y_offset,
 434                                   IS_DIR(sub_mb_type, 0, 0),
 435                                   IS_DIR(sub_mb_type, 0, 1),
 436                                   nrefs);
 437                 get_lowest_part_y(h, refs, n + 1, 8, y_offset,
 438                                   IS_DIR(sub_mb_type, 0, 0),
 439                                   IS_DIR(sub_mb_type, 0, 1),
 440                                   nrefs);
 441             } else {
 442                 int j;
 443                 assert(IS_SUB_4X4(sub_mb_type));
 444                 for (j = 0; j < 4; j++) {
 445                     int sub_y_offset = y_offset + 2 * (j & 2);
 446                     get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
 447                                       IS_DIR(sub_mb_type, 0, 0),
 448                                       IS_DIR(sub_mb_type, 0, 1),
 449                                       nrefs);
 450                 }
 451             }
 452         }
 453     }
 454
 455     for (list = h->list_count - 1; list >= 0; list--)
 456         for (ref = 0; ref < 48 && nrefs[list]; ref++) {
 457             int row = refs[list][ref];
 458             if (row >= 0) {
 459                 Picture *ref_pic      = &h->ref_list[list][ref];
 460                 int ref_field         = ref_pic->f.reference - 1;
 461                 int ref_field_picture = ref_pic->field_picture;
 462                 int pic_height        = 16 * s->mb_height >> ref_field_picture;
 463
 464                 row <<= MB_MBAFF;
 465                 nrefs[list]--;
 466
 467                 if (!FIELD_PICTURE && ref_field_picture) { // frame referencing two fields
 468                     ff_thread_await_progress(&ref_pic->f,
 469                                              FFMIN((row >> 1) - !(row & 1),
 470                                                    pic_height - 1),
 471                                              1);
 472                     ff_thread_await_progress(&ref_pic->f,
 473                                              FFMIN((row >> 1), pic_height - 1),
 474                                              0);
 475                 } else if (FIELD_PICTURE && !ref_field_picture) { // field referencing one field of a frame
 476                     ff_thread_await_progress(&ref_pic->f,
 477                                              FFMIN(row * 2 + ref_field,
 478                                                    pic_height - 1),
 479                                              0);
 480                 } else if (FIELD_PICTURE) {
 481                     ff_thread_await_progress(&ref_pic->f,
 482                                              FFMIN(row, pic_height - 1),
 483                                              ref_field);
 484                 } else {
 485                     ff_thread_await_progress(&ref_pic->f,
 486                                              FFMIN(row, pic_height - 1),
 487                                              0);
 488                 }
 489             }
 490         }
 491 }
 492
 493 static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
 494                                          int n, int square, int height,
 495                                          int delta, int list,
 496                                          uint8_t *dest_y, uint8_t *dest_cb,
 497                                          uint8_t *dest_cr,
 498                                          int src_x_offset, int src_y_offset,
 499                                          qpel_mc_func *qpix_op,
 500                                          h264_chroma_mc_func chroma_op,
 501                                          int pixel_shift, int chroma_idc)
 502 {
 503     MpegEncContext *const s = &h->s;
 504     const int mx      = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
 505     int my            = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
 506     const int luma_xy = (mx & 3) + ((my & 3) << 2);
 507     int offset        = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
 508     uint8_t *src_y    = pic->f.data[0] + offset;
 509     uint8_t *src_cb, *src_cr;
 510     int extra_width  = h->emu_edge_width;
 511     int extra_height = h->emu_edge_height;
 512     int emu = 0;
 513     const int full_mx    = mx >> 2;
 514     const int full_my    = my >> 2;
 515     const int pic_width  = 16 * s->mb_width;
 516     const int pic_height = 16 * s->mb_height >> MB_FIELD;
 517     int ysh;
 518
 519     if (mx & 7)
 520         extra_width -= 3;
 521     if (my & 7)
 522         extra_height -= 3;
 523
 524     if (full_mx                <          0 - extra_width  ||
 525         full_my                <          0 - extra_height ||
 526         full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
 527         full_my + 16 /*FIXME*/ > pic_height + extra_height) {
 528         s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
 529                                  src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
 530                                  h->mb_linesize,
 531                                  16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
 532                                  full_my - 2, pic_width, pic_height);
 533         src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
 534         emu   = 1;
 535     }
 536
 537     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
 538     if (!square)
 539         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
 540
 541     if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
 542         return;
 543
 544     if (chroma_idc == 3 /* yuv444 */) {
 545         src_cb = pic->f.data[1] + offset;
 546         if (emu) {
 547             s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
 548                                      src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
 549                                      h->mb_linesize,
 550                                      16 + 5, 16 + 5 /*FIXME*/,
 551                                      full_mx - 2, full_my - 2,
 552                                      pic_width, pic_height);
 553             src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
 554         }
 555         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
 556         if (!square)
 557             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
 558
 559         src_cr = pic->f.data[2] + offset;
 560         if (emu) {
 561             s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
 562                                      src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
 563                                      h->mb_linesize,
 564                                      16 + 5, 16 + 5 /*FIXME*/,
 565                                      full_mx - 2, full_my - 2,
 566                                      pic_width, pic_height);
 567             src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
 568         }
 569         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
 570         if (!square)
 571             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
 572         return;
 573     }
 574
 575     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
 576     if (chroma_idc == 1 /* yuv420 */ && MB_FIELD) {
 577         // chroma offset when predicting from a field of opposite parity
 578         my  += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
 579         emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
 580     }
 581
 582     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
 583              (my >> ysh) * h->mb_uvlinesize;
 584     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
 585              (my >> ysh) * h->mb_uvlinesize;
 586
 587     if (emu) {
 588         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
 589                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
 590                                  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
 591         src_cb = s->edge_emu_buffer;
 592     }
 593     chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
 594               height >> (chroma_idc == 1 /* yuv420 */),
 595               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
 596
 597     if (emu) {
 598         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
 599                                  9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
 600                                  pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
 601         src_cr = s->edge_emu_buffer;
 602     }
 603     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
 604               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
 605 }
 606
 607 static av_always_inline void mc_part_std(H264Context *h, int n, int square,
 608                                          int height, int delta,
 609                                          uint8_t *dest_y, uint8_t *dest_cb,
 610                                          uint8_t *dest_cr,
 611                                          int x_offset, int y_offset,
 612                                          qpel_mc_func *qpix_put,
 613                                          h264_chroma_mc_func chroma_put,
 614                                          qpel_mc_func *qpix_avg,
 615                                          h264_chroma_mc_func chroma_avg,
 616                                          int list0, int list1,
 617                                          int pixel_shift, int chroma_idc)
 618 {
 619     MpegEncContext *const s       = &h->s;
 620     qpel_mc_func *qpix_op         = qpix_put;
 621     h264_chroma_mc_func chroma_op = chroma_put;
 622
 623     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 624     if (chroma_idc == 3 /* yuv444 */) {
 625         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 626         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 627     } else if (chroma_idc == 2 /* yuv422 */) {
 628         dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
 629         dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
 630     } else { /* yuv420 */
 631         dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
 632         dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
 633     }
 634     x_offset += 8 * s->mb_x;
 635     y_offset += 8 * (s->mb_y >> MB_FIELD);
 636
 637     if (list0) {
 638         Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
 639         mc_dir_part(h, ref, n, square, height, delta, 0,
 640                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
 641                     qpix_op, chroma_op, pixel_shift, chroma_idc);
 642
 643         qpix_op   = qpix_avg;
 644         chroma_op = chroma_avg;
 645     }
 646
 647     if (list1) {
 648         Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
 649         mc_dir_part(h, ref, n, square, height, delta, 1,
 650                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
 651                     qpix_op, chroma_op, pixel_shift, chroma_idc);
 652     }
 653 }
 654
 655 static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
 656                                               int height, int delta,
 657                                               uint8_t *dest_y, uint8_t *dest_cb,
 658                                               uint8_t *dest_cr,
 659                                               int x_offset, int y_offset,
 660                                               qpel_mc_func *qpix_put,
 661                                               h264_chroma_mc_func chroma_put,
 662                                               h264_weight_func luma_weight_op,
 663                                               h264_weight_func chroma_weight_op,
 664                                               h264_biweight_func luma_weight_avg,
 665                                               h264_biweight_func chroma_weight_avg,
 666                                               int list0, int list1,
 667                                               int pixel_shift, int chroma_idc)
 668 {
 669     MpegEncContext *const s = &h->s;
 670     int chroma_height;
 671
 672     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 673     if (chroma_idc == 3 /* yuv444 */) {
 674         chroma_height     = height;
 675         chroma_weight_avg = luma_weight_avg;
 676         chroma_weight_op  = luma_weight_op;
 677         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 678         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
 679     } else if (chroma_idc == 2 /* yuv422 */) {
 680         chroma_height = height;
 681         dest_cb      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
 682         dest_cr      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
 683     } else { /* yuv420 */
 684         chroma_height = height >> 1;
 685         dest_cb      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
 686         dest_cr      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
 687     }
 688     x_offset += 8 * s->mb_x;
 689     y_offset += 8 * (s->mb_y >> MB_FIELD);
 690
 691     if (list0 && list1) {
 692         /* don't optimize for luma-only case, since B-frames usually
 693          * use implicit weights => chroma too. */
 694         uint8_t *tmp_cb = h->bipred_scratchpad;
 695         uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift);
 696         uint8_t *tmp_y  = h->bipred_scratchpad + 16 * h->mb_uvlinesize;
 697         int refn0       = h->ref_cache[0][scan8[n]];
 698         int refn1       = h->ref_cache[1][scan8[n]];
 699
 700         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
 701                     dest_y, dest_cb, dest_cr,
 702                     x_offset, y_offset, qpix_put, chroma_put,
 703                     pixel_shift, chroma_idc);
 704         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
 705                     tmp_y, tmp_cb, tmp_cr,
 706                     x_offset, y_offset, qpix_put, chroma_put,
 707                     pixel_shift, chroma_idc);
 708
 709         if (h->use_weight == 2) {
 710             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y & 1];
 711             int weight1 = 64 - weight0;
 712             luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
 713                             height, 5, weight0, weight1, 0);
 714             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
 715                               chroma_height, 5, weight0, weight1, 0);
 716             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
 717                               chroma_height, 5, weight0, weight1, 0);
 718         } else {
 719             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
 720                             h->luma_log2_weight_denom,
 721                             h->luma_weight[refn0][0][0],
 722                             h->luma_weight[refn1][1][0],
 723                             h->luma_weight[refn0][0][1] +
 724                             h->luma_weight[refn1][1][1]);
 725             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
 726                               h->chroma_log2_weight_denom,
 727                               h->chroma_weight[refn0][0][0][0],
 728                               h->chroma_weight[refn1][1][0][0],
 729                               h->chroma_weight[refn0][0][0][1] +
 730                               h->chroma_weight[refn1][1][0][1]);
 731             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
 732                               h->chroma_log2_weight_denom,
 733                               h->chroma_weight[refn0][0][1][0],
 734                               h->chroma_weight[refn1][1][1][0],
 735                               h->chroma_weight[refn0][0][1][1] +
 736                               h->chroma_weight[refn1][1][1][1]);
 737         }
 738     } else {
 739         int list     = list1 ? 1 : 0;
 740         int refn     = h->ref_cache[list][scan8[n]];
 741         Picture *ref = &h->ref_list[list][refn];
 742         mc_dir_part(h, ref, n, square, height, delta, list,
 743                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
 744                     qpix_put, chroma_put, pixel_shift, chroma_idc);
 745
 746         luma_weight_op(dest_y, h->mb_linesize, height,
 747                        h->luma_log2_weight_denom,
 748                        h->luma_weight[refn][list][0],
 749                        h->luma_weight[refn][list][1]);
 750         if (h->use_weight_chroma) {
 751             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
 752                              h->chroma_log2_weight_denom,
 753                              h->chroma_weight[refn][list][0][0],
 754                              h->chroma_weight[refn][list][0][1]);
 755             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
 756                              h->chroma_log2_weight_denom,
 757                              h->chroma_weight[refn][list][1][0],
 758                              h->chroma_weight[refn][list][1][1]);
 759         }
 760     }
 761 }
 762
 763 static av_always_inline void prefetch_motion(H264Context *h, int list,
 764                                              int pixel_shift, int chroma_idc)
 765 {
 766     /* fetch pixels for estimated mv 4 macroblocks ahead
 767      * optimized for 64byte cache lines */
 768     MpegEncContext *const s = &h->s;
 769     const int refn = h->ref_cache[list][scan8[0]];
 770     if (refn >= 0) {
 771         const int mx  = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * s->mb_x + 8;
 772         const int my  = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * s->mb_y;
 773         uint8_t **src = h->ref_list[list][refn].f.data;
 774         int off       = (mx << pixel_shift) +
 775                         (my + (s->mb_x & 3) * 4) * h->mb_linesize +
 776                         (64 << pixel_shift);
 777         s->vdsp.prefetch(src[0] + off, s->linesize, 4);
 778         if (chroma_idc == 3 /* yuv444 */) {
 779             s->vdsp.prefetch(src[1] + off, s->linesize, 4);
 780             s->vdsp.prefetch(src[2] + off, s->linesize, 4);
 781         } else {
 782             off = ((mx >> 1) << pixel_shift) +
 783                   ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize +
 784                   (64 << pixel_shift);
 785             s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
 786         }
 787     }
 788 }
 789
 790 static void free_tables(H264Context *h, int free_rbsp)
 791 {
 792     int i;
 793     H264Context *hx;
 794
 795     av_freep(&h->intra4x4_pred_mode);
 796     av_freep(&h->chroma_pred_mode_table);
 797     av_freep(&h->cbp_table);
 798     av_freep(&h->mvd_table[0]);
 799     av_freep(&h->mvd_table[1]);
 800     av_freep(&h->direct_table);
 801     av_freep(&h->non_zero_count);
 802     av_freep(&h->slice_table_base);
 803     h->slice_table = NULL;
 804     av_freep(&h->list_counts);
 805
 806     av_freep(&h->mb2b_xy);
 807     av_freep(&h->mb2br_xy);
 808
 809     for (i = 0; i < MAX_THREADS; i++) {
 810         hx = h->thread_context[i];
 811         if (!hx)
 812             continue;
 813         av_freep(&hx->top_borders[1]);
 814         av_freep(&hx->top_borders[0]);
 815         av_freep(&hx->bipred_scratchpad);
 816         if (free_rbsp) {
 817             av_freep(&hx->rbsp_buffer[1]);
 818             av_freep(&hx->rbsp_buffer[0]);
 819             hx->rbsp_buffer_size[0] = 0;
 820             hx->rbsp_buffer_size[1] = 0;
 821         }
 822         if (i)
 823             av_freep(&h->thread_context[i]);
 824     }
 825 }
 826
 827 static void init_dequant8_coeff_table(H264Context *h)
 828 {
 829     int i, j, q, x;
 830     const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
 831
 832     for (i = 0; i < 6; i++) {
 833         h->dequant8_coeff[i] = h->dequant8_buffer[i];
 834         for (j = 0; j < i; j++)
 835             if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
 836                         64 * sizeof(uint8_t))) {
 837                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
 838                 break;
 839             }
 840         if (j < i)
 841             continue;
 842
 843         for (q = 0; q < max_qp + 1; q++) {
 844             int shift = div6[q];
 845             int idx   = rem6[q];
 846             for (x = 0; x < 64; x++)
 847                 h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
 848                     ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
 849                      h->pps.scaling_matrix8[i][x]) << shift;
 850         }
 851     }
 852 }
 853
 854 static void init_dequant4_coeff_table(H264Context *h)
 855 {
 856     int i, j, q, x;
 857     const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
 858     for (i = 0; i < 6; i++) {
 859         h->dequant4_coeff[i] = h->dequant4_buffer[i];
 860         for (j = 0; j < i; j++)
 861             if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
 862                         16 * sizeof(uint8_t))) {
 863                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
 864                 break;
 865             }
 866         if (j < i)
 867             continue;
 868
 869         for (q = 0; q < max_qp + 1; q++) {
 870             int shift = div6[q] + 2;
 871             int idx   = rem6[q];
 872             for (x = 0; x < 16; x++)
 873                 h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
 874                     ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
 875                      h->pps.scaling_matrix4[i][x]) << shift;
 876         }
 877     }
 878 }
 879
 880 static void init_dequant_tables(H264Context *h)
 881 {
 882     int i, x;
 883     init_dequant4_coeff_table(h);
 884     if (h->pps.transform_8x8_mode)
 885         init_dequant8_coeff_table(h);
 886     if (h->sps.transform_bypass) {
 887         for (i = 0; i < 6; i++)
 888             for (x = 0; x < 16; x++)
 889                 h->dequant4_coeff[i][0][x] = 1 << 6;
 890         if (h->pps.transform_8x8_mode)
 891             for (i = 0; i < 6; i++)
 892                 for (x = 0; x < 64; x++)
 893                     h->dequant8_coeff[i][0][x] = 1 << 6;
 894     }
 895 }
 896
 897 int ff_h264_alloc_tables(H264Context *h)
 898 {
 899     MpegEncContext *const s = &h->s;
 900     const int big_mb_num    = s->mb_stride * (s->mb_height + 1);
 901     const int row_mb_num    = s->mb_stride * 2 * s->avctx->thread_count;
 902     int x, y;
 903
 904     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode,
 905                       row_mb_num * 8 * sizeof(uint8_t), fail)
 906     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count,
 907                       big_mb_num * 48 * sizeof(uint8_t), fail)
 908     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base,
 909                       (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base), fail)
 910     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table,
 911                       big_mb_num * sizeof(uint16_t), fail)
 912     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table,
 913                       big_mb_num * sizeof(uint8_t), fail)
 914     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0],
 915                       16 * row_mb_num * sizeof(uint8_t), fail);
 916     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1],
 917                       16 * row_mb_num * sizeof(uint8_t), fail);
 918     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table,
 919                       4 * big_mb_num * sizeof(uint8_t), fail);
 920     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts,
 921                       big_mb_num * sizeof(uint8_t), fail)
 922
 923     memset(h->slice_table_base, -1,
 924            (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base));
 925     h->slice_table = h->slice_table_base + s->mb_stride * 2 + 1;
 926
 927     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy,
 928                       big_mb_num * sizeof(uint32_t), fail);
 929     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy,
 930                       big_mb_num * sizeof(uint32_t), fail);
 931     for (y = 0; y < s->mb_height; y++)
 932         for (x = 0; x < s->mb_width; x++) {
 933             const int mb_xy = x + y * s->mb_stride;
 934             const int b_xy  = 4 * x + 4 * y * h->b_stride;
 935
 936             h->mb2b_xy[mb_xy]  = b_xy;
 937             h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * s->mb_stride)));
 938         }
 939
 940     if (!h->dequant4_coeff[0])
 941         init_dequant_tables(h);
 942
 943     return 0;
 944
 945 fail:
 946     free_tables(h, 1);
 947     return -1;
 948 }
 949
 950 /**
 951  * Mimic alloc_tables(), but for every context thread.
 952  */
 953 static void clone_tables(H264Context *dst, H264Context *src, int i)
 954 {
 955     MpegEncContext *const s     = &src->s;
 956     dst->intra4x4_pred_mode     = src->intra4x4_pred_mode + i * 8 * 2 * s->mb_stride;
 957     dst->non_zero_count         = src->non_zero_count;
 958     dst->slice_table            = src->slice_table;
 959     dst->cbp_table              = src->cbp_table;
 960     dst->mb2b_xy                = src->mb2b_xy;
 961     dst->mb2br_xy               = src->mb2br_xy;
 962     dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
 963     dst->mvd_table[0]           = src->mvd_table[0] + i * 8 * 2 * s->mb_stride;
 964     dst->mvd_table[1]           = src->mvd_table[1] + i * 8 * 2 * s->mb_stride;
 965     dst->direct_table           = src->direct_table;
 966     dst->list_counts            = src->list_counts;
 967     dst->bipred_scratchpad      = NULL;
 968     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma,
 969                       src->sps.chroma_format_idc);
 970 }
 971
 972 /**
 973  * Init context
 974  * Allocate buffers which are not shared amongst multiple threads.
 975  */
 976 static int context_init(H264Context *h)
 977 {
 978     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0],
 979                       h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
 980     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1],
 981                       h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
 982
 983     h->ref_cache[0][scan8[5]  + 1] =
 984     h->ref_cache[0][scan8[7]  + 1] =
 985     h->ref_cache[0][scan8[13] + 1] =
 986     h->ref_cache[1][scan8[5]  + 1] =
 987     h->ref_cache[1][scan8[7]  + 1] =
 988     h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
 989
 990     h->s.er.decode_mb = h264_er_decode_mb;
 991     h->s.er.opaque    = h;
 992
 993     return 0;
 994
 995 fail:
 996     return -1; // free_tables will clean up for us
 997 }
 998
 999 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
1000                             int parse_extradata);
1001
1002 static av_cold void common_init(H264Context *h)
1003 {
1004     MpegEncContext *const s = &h->s;
1005
1006     s->width    = s->avctx->width;
1007     s->height   = s->avctx->height;
1008     s->codec_id = s->avctx->codec->id;
1009
1010     ff_h264dsp_init(&h->h264dsp, 8, 1);
1011     ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
1012     ff_h264qpel_init(&h->h264qpel, 8);
1013     ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
1014
1015     h->dequant_coeff_pps = -1;
1016     s->unrestricted_mv   = 1;
1017
1018     /* needed so that IDCT permutation is known early */
1019     ff_dsputil_init(&s->dsp, s->avctx);
1020     ff_videodsp_init(&s->vdsp, 8);
1021
1022     memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1023     memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1024 }
1025
1026 int ff_h264_decode_extradata(H264Context *h)
1027 {
1028     AVCodecContext *avctx = h->s.avctx;
1029
1030     if (avctx->extradata[0] == 1) {
1031         int i, cnt, nalsize;
1032         unsigned char *p = avctx->extradata;
1033
1034         h->is_avc = 1;
1035
1036         if (avctx->extradata_size < 7) {
1037             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
1038             return -1;
1039         }
1040         /* sps and pps in the avcC always have length coded with 2 bytes,
1041          * so put a fake nal_length_size = 2 while parsing them */
1042         h->nal_length_size = 2;
1043         // Decode sps from avcC
1044         cnt = *(p + 5) & 0x1f; // Number of sps
1045         p  += 6;
1046         for (i = 0; i < cnt; i++) {
1047             nalsize = AV_RB16(p) + 2;
1048             if (p - avctx->extradata + nalsize > avctx->extradata_size)
1049                 return -1;
1050             if (decode_nal_units(h, p, nalsize, 1) < 0) {
1051                 av_log(avctx, AV_LOG_ERROR,
1052                        "Decoding sps %d from avcC failed\n", i);
1053                 return -1;
1054             }
1055             p += nalsize;
1056         }
1057         // Decode pps from avcC
1058         cnt = *(p++); // Number of pps
1059         for (i = 0; i < cnt; i++) {
1060             nalsize = AV_RB16(p) + 2;
1061             if (p - avctx->extradata + nalsize > avctx->extradata_size)
1062                 return -1;
1063             if (decode_nal_units(h, p, nalsize, 1) < 0) {
1064                 av_log(avctx, AV_LOG_ERROR,
1065                        "Decoding pps %d from avcC failed\n", i);
1066                 return -1;
1067             }
1068             p += nalsize;
1069         }
1070         // Now store right nal length size, that will be used to parse all other nals
1071         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
1072     } else {
1073         h->is_avc = 0;
1074         if (decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1) < 0)
1075             return -1;
1076     }
1077     return 0;
1078 }
1079
1080 av_cold int ff_h264_decode_init(AVCodecContext *avctx)
1081 {
1082     H264Context *h = avctx->priv_data;
1083     MpegEncContext *const s = &h->s;
1084     int i;
1085
1086     ff_MPV_decode_defaults(s);
1087
1088     s->avctx = avctx;
1089     common_init(h);
1090
1091     s->out_format      = FMT_H264;
1092     s->workaround_bugs = avctx->workaround_bugs;
1093
1094     /* set defaults */
1095     // s->decode_mb = ff_h263_decode_mb;
1096     s->quarter_sample = 1;
1097     if (!avctx->has_b_frames)
1098         s->low_delay = 1;
1099
1100     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
1101
1102     ff_h264_decode_init_vlc();
1103
1104     h->pixel_shift = 0;
1105     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
1106
1107     h->thread_context[0] = h;
1108     h->outputed_poc      = h->next_outputed_poc = INT_MIN;
1109     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1110         h->last_pocs[i] = INT_MIN;
1111     h->prev_poc_msb = 1 << 16;
1112     h->x264_build   = -1;
1113     ff_h264_reset_sei(h);
1114     if (avctx->codec_id == AV_CODEC_ID_H264) {
1115         if (avctx->ticks_per_frame == 1)
1116             s->avctx->time_base.den *= 2;
1117         avctx->ticks_per_frame = 2;
1118     }
1119
1120     if (avctx->extradata_size > 0 && avctx->extradata &&
1121         ff_h264_decode_extradata(h))
1122         return -1;
1123
1124     if (h->sps.bitstream_restriction_flag &&
1125         s->avctx->has_b_frames < h->sps.num_reorder_frames) {
1126         s->avctx->has_b_frames = h->sps.num_reorder_frames;
1127         s->low_delay           = 0;
1128     }
1129
1130     return 0;
1131 }
1132
1133 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
1134
1135 static void copy_picture_range(Picture **to, Picture **from, int count,
1136                                MpegEncContext *new_base,
1137                                MpegEncContext *old_base)
1138 {
1139     int i;
1140
1141     for (i = 0; i < count; i++) {
1142         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
1143                 IN_RANGE(from[i], old_base->picture,
1144                          sizeof(Picture) * old_base->picture_count) ||
1145                 !from[i]));
1146         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
1147     }
1148 }
1149
1150 static void copy_parameter_set(void **to, void **from, int count, int size)
1151 {
1152     int i;
1153
1154     for (i = 0; i < count; i++) {
1155         if (to[i] && !from[i])
1156             av_freep(&to[i]);
1157         else if (from[i] && !to[i])
1158             to[i] = av_malloc(size);
1159
1160         if (from[i])
1161             memcpy(to[i], from[i], size);
1162     }
1163 }
1164
1165 static int decode_init_thread_copy(AVCodecContext *avctx)
1166 {
1167     H264Context *h = avctx->priv_data;
1168
1169     if (!avctx->internal->is_copy)
1170         return 0;
1171     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1172     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1173
1174     h->s.context_initialized = 0;
1175
1176     return 0;
1177 }
1178
1179 #define copy_fields(to, from, start_field, end_field)                   \
1180     memcpy(&to->start_field, &from->start_field,                        \
1181            (char *)&to->end_field - (char *)&to->start_field)
1182
1183 static int h264_slice_header_init(H264Context *, int);
1184
1185 static int h264_set_parameter_from_sps(H264Context *h);
1186
1187 static int decode_update_thread_context(AVCodecContext *dst,
1188                                         const AVCodecContext *src)
1189 {
1190     H264Context *h = dst->priv_data, *h1 = src->priv_data;
1191     MpegEncContext *const s = &h->s, *const s1 = &h1->s;
1192     int inited = s->context_initialized, err;
1193     int i;
1194
1195     if (dst == src || !s1->context_initialized)
1196         return 0;
1197
1198     if (inited &&
1199         (s->width      != s1->width      ||
1200          s->height     != s1->height     ||
1201          s->mb_width   != s1->mb_width   ||
1202          s->mb_height  != s1->mb_height  ||
1203          h->sps.bit_depth_luma    != h1->sps.bit_depth_luma    ||
1204          h->sps.chroma_format_idc != h1->sps.chroma_format_idc ||
1205          h->sps.colorspace        != h1->sps.colorspace)) {
1206
1207         av_freep(&h->bipred_scratchpad);
1208
1209         s->width     = s1->width;
1210         s->height    = s1->height;
1211         s->mb_height = s1->mb_height;
1212         h->b_stride  = h1->b_stride;
1213
1214         if ((err = h264_slice_header_init(h, 1)) < 0) {
1215             av_log(h->s.avctx, AV_LOG_ERROR, "h264_slice_header_init() failed");
1216             return err;
1217         }
1218         h->context_reinitialized = 1;
1219
1220         /* update linesize on resize for h264. The h264 decoder doesn't
1221          * necessarily call ff_MPV_frame_start in the new thread */
1222         s->linesize   = s1->linesize;
1223         s->uvlinesize = s1->uvlinesize;
1224
1225         /* copy block_offset since frame_start may not be called */
1226         memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
1227         h264_set_parameter_from_sps(h);
1228     }
1229
1230     err = ff_mpeg_update_thread_context(dst, src);
1231     if (err)
1232         return err;
1233
1234     if (!inited) {
1235         for (i = 0; i < MAX_SPS_COUNT; i++)
1236             av_freep(h->sps_buffers + i);
1237
1238         for (i = 0; i < MAX_PPS_COUNT; i++)
1239             av_freep(h->pps_buffers + i);
1240
1241         // copy all fields after MpegEnc
1242         memcpy(&h->s + 1, &h1->s + 1,
1243                sizeof(H264Context) - sizeof(MpegEncContext));
1244         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1245         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1246         if (ff_h264_alloc_tables(h) < 0) {
1247             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
1248             return AVERROR(ENOMEM);
1249         }
1250         context_init(h);
1251
1252         for (i = 0; i < 2; i++) {
1253             h->rbsp_buffer[i]      = NULL;
1254             h->rbsp_buffer_size[i] = 0;
1255         }
1256         h->bipred_scratchpad = NULL;
1257
1258         h->thread_context[0] = h;
1259
1260         s->dsp.clear_blocks(h->mb);
1261         s->dsp.clear_blocks(h->mb + (24 * 16 << h->pixel_shift));
1262     }
1263
1264     /* frame_start may not be called for the next thread (if it's decoding
1265      * a bottom field) so this has to be allocated here */
1266     if (!h->bipred_scratchpad)
1267         h->bipred_scratchpad = av_malloc(16 * 6 * s->linesize);
1268
1269     // extradata/NAL handling
1270     h->is_avc = h1->is_avc;
1271
1272     // SPS/PPS
1273     copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers,
1274                        MAX_SPS_COUNT, sizeof(SPS));
1275     h->sps = h1->sps;
1276     copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers,
1277                        MAX_PPS_COUNT, sizeof(PPS));
1278     h->pps = h1->pps;
1279
1280     // Dequantization matrices
1281     // FIXME these are big - can they be only copied when PPS changes?
1282     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
1283
1284     for (i = 0; i < 6; i++)
1285         h->dequant4_coeff[i] = h->dequant4_buffer[0] +
1286                                (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
1287
1288     for (i = 0; i < 6; i++)
1289         h->dequant8_coeff[i] = h->dequant8_buffer[0] +
1290                                (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
1291
1292     h->dequant_coeff_pps = h1->dequant_coeff_pps;
1293
1294     // POC timing
1295     copy_fields(h, h1, poc_lsb, redundant_pic_count);
1296
1297     // reference lists
1298     copy_fields(h, h1, ref_count, list_count);
1299     copy_fields(h, h1, ref2frm, intra_gb);
1300     copy_fields(h, h1, short_ref, cabac_init_idc);
1301
1302     copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);
1303     copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1);
1304     copy_picture_range(h->delayed_pic, h1->delayed_pic,
1305                        MAX_DELAYED_PIC_COUNT + 2, s, s1);
1306
1307     h->last_slice_type = h1->last_slice_type;
1308
1309     if (!s->current_picture_ptr)
1310         return 0;
1311
1312     if (!s->droppable) {
1313         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
1314         h->prev_poc_msb = h->poc_msb;
1315         h->prev_poc_lsb = h->poc_lsb;
1316     }
1317     h->prev_frame_num_offset = h->frame_num_offset;
1318     h->prev_frame_num        = h->frame_num;
1319     h->outputed_poc          = h->next_outputed_poc;
1320
1321     return err;
1322 }
1323
1324 int ff_h264_frame_start(H264Context *h)
1325 {
1326     MpegEncContext *const s = &h->s;
1327     int i;
1328     const int pixel_shift = h->pixel_shift;
1329
1330     if (ff_MPV_frame_start(s, s->avctx) < 0)
1331         return -1;
1332     ff_mpeg_er_frame_start(s);
1333     /*
1334      * ff_MPV_frame_start uses pict_type to derive key_frame.
1335      * This is incorrect for H.264; IDR markings must be used.
1336      * Zero here; IDR markings per slice in frame or fields are ORed in later.
1337      * See decode_nal_units().
1338      */
1339     s->current_picture_ptr->f.key_frame = 0;
1340     s->current_picture_ptr->mmco_reset  = 0;
1341
1342     assert(s->linesize && s->uvlinesize);
1343
1344     for (i = 0; i < 16; i++) {
1345         h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->linesize * ((scan8[i] - scan8[0]) >> 3);
1346         h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->linesize * ((scan8[i] - scan8[0]) >> 3);
1347     }
1348     for (i = 0; i < 16; i++) {
1349         h->block_offset[16 + i]      =
1350         h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1351         h->block_offset[48 + 16 + i] =
1352         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1353     }
1354
1355     /* can't be in alloc_tables because linesize isn't known there.
1356      * FIXME: redo bipred weight to not require extra buffer? */
1357     for (i = 0; i < s->slice_context_count; i++)
1358         if (h->thread_context[i] && !h->thread_context[i]->bipred_scratchpad)
1359             h->thread_context[i]->bipred_scratchpad = av_malloc(16 * 6 * s->linesize);
1360
1361     /* Some macroblocks can be accessed before they're available in case
1362      * of lost slices, MBAFF or threading. */
1363     memset(h->slice_table, -1,
1364            (s->mb_height * s->mb_stride - 1) * sizeof(*h->slice_table));
1365
1366     // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1367     //             s->current_picture.f.reference /* || h->contains_intra */ || 1;
1368
1369     /* We mark the current picture as non-reference after allocating it, so
1370      * that if we break out due to an error it can be released automatically
1371      * in the next ff_MPV_frame_start().
1372      * SVQ3 as well as most other codecs have only last/next/current and thus
1373      * get released even with set reference, besides SVQ3 and others do not
1374      * mark frames as reference later "naturally". */
1375     if (s->codec_id != AV_CODEC_ID_SVQ3)
1376         s->current_picture_ptr->f.reference = 0;
1377
1378     s->current_picture_ptr->field_poc[0]     =
1379         s->current_picture_ptr->field_poc[1] = INT_MAX;
1380
1381     h->next_output_pic = NULL;
1382
1383     assert(s->current_picture_ptr->long_ref == 0);
1384
1385     return 0;
1386 }
1387
1388 /**
1389  * Run setup operations that must be run after slice header decoding.
1390  * This includes finding the next displayed frame.
1391  *
1392  * @param h h264 master context
1393  * @param setup_finished enough NALs have been read that we can call
1394  * ff_thread_finish_setup()
1395  */
1396 static void decode_postinit(H264Context *h, int setup_finished)
1397 {
1398     MpegEncContext *const s = &h->s;
1399     Picture *out = s->current_picture_ptr;
1400     Picture *cur = s->current_picture_ptr;
1401     int i, pics, out_of_order, out_idx;
1402     int invalid = 0, cnt = 0;
1403
1404     s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
1405     s->current_picture_ptr->f.pict_type   = s->pict_type;
1406
1407     if (h->next_output_pic)
1408         return;
1409
1410     if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) {
1411         /* FIXME: if we have two PAFF fields in one packet, we can't start
1412          * the next thread here. If we have one field per packet, we can.
1413          * The check in decode_nal_units() is not good enough to find this
1414          * yet, so we assume the worst for now. */
1415         // if (setup_finished)
1416         //    ff_thread_finish_setup(s->avctx);
1417         return;
1418     }
1419
1420     cur->f.interlaced_frame = 0;
1421     cur->f.repeat_pict      = 0;
1422
1423     /* Signal interlacing information externally. */
1424     /* Prioritize picture timing SEI information over used
1425      * decoding process if it exists. */
1426
1427     if (h->sps.pic_struct_present_flag) {
1428         switch (h->sei_pic_struct) {
1429         case SEI_PIC_STRUCT_FRAME:
1430             break;
1431         case SEI_PIC_STRUCT_TOP_FIELD:
1432         case SEI_PIC_STRUCT_BOTTOM_FIELD:
1433             cur->f.interlaced_frame = 1;
1434             break;
1435         case SEI_PIC_STRUCT_TOP_BOTTOM:
1436         case SEI_PIC_STRUCT_BOTTOM_TOP:
1437             if (FIELD_OR_MBAFF_PICTURE)
1438                 cur->f.interlaced_frame = 1;
1439             else
1440                 // try to flag soft telecine progressive
1441                 cur->f.interlaced_frame = h->prev_interlaced_frame;
1442             break;
1443         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
1444         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
1445             /* Signal the possibility of telecined film externally
1446              * (pic_struct 5,6). From these hints, let the applications
1447              * decide if they apply deinterlacing. */
1448             cur->f.repeat_pict = 1;
1449             break;
1450         case SEI_PIC_STRUCT_FRAME_DOUBLING:
1451             cur->f.repeat_pict = 2;
1452             break;
1453         case SEI_PIC_STRUCT_FRAME_TRIPLING:
1454             cur->f.repeat_pict = 4;
1455             break;
1456         }
1457
1458         if ((h->sei_ct_type & 3) &&
1459             h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
1460             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
1461     } else {
1462         /* Derive interlacing flag from used decoding process. */
1463         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
1464     }
1465     h->prev_interlaced_frame = cur->f.interlaced_frame;
1466
1467     if (cur->field_poc[0] != cur->field_poc[1]) {
1468         /* Derive top_field_first from field pocs. */
1469         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
1470     } else {
1471         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
1472             /* Use picture timing SEI information. Even if it is a
1473              * information of a past frame, better than nothing. */
1474             if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ||
1475                 h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
1476                 cur->f.top_field_first = 1;
1477             else
1478                 cur->f.top_field_first = 0;
1479         } else {
1480             /* Most likely progressive */
1481             cur->f.top_field_first = 0;
1482         }
1483     }
1484
1485     // FIXME do something with unavailable reference frames
1486
1487     /* Sort B-frames into display order */
1488
1489     if (h->sps.bitstream_restriction_flag &&
1490         s->avctx->has_b_frames < h->sps.num_reorder_frames) {
1491         s->avctx->has_b_frames = h->sps.num_reorder_frames;
1492         s->low_delay           = 0;
1493     }
1494
1495     if (s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT &&
1496         !h->sps.bitstream_restriction_flag) {
1497         s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
1498         s->low_delay           = 0;
1499     }
1500
1501     pics = 0;
1502     while (h->delayed_pic[pics])
1503         pics++;
1504
1505     assert(pics <= MAX_DELAYED_PIC_COUNT);
1506
1507     h->delayed_pic[pics++] = cur;
1508     if (cur->f.reference == 0)
1509         cur->f.reference = DELAYED_PIC_REF;
1510
1511     /* Frame reordering. This code takes pictures from coding order and sorts
1512      * them by their incremental POC value into display order. It supports POC
1513      * gaps, MMCO reset codes and random resets.
1514      * A "display group" can start either with a IDR frame (f.key_frame = 1),
1515      * and/or can be closed down with a MMCO reset code. In sequences where
1516      * there is no delay, we can't detect that (since the frame was already
1517      * output to the user), so we also set h->mmco_reset to detect the MMCO
1518      * reset code.
1519      * FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames),
1520      * we increase the delay between input and output. All frames affected by
1521      * the lag (e.g. those that should have been output before another frame
1522      * that we already returned to the user) will be dropped. This is a bug
1523      * that we will fix later. */
1524     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
1525         cnt     += out->poc < h->last_pocs[i];
1526         invalid += out->poc == INT_MIN;
1527     }
1528     if (!h->mmco_reset && !cur->f.key_frame &&
1529         cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
1530         h->mmco_reset = 2;
1531         if (pics > 1)
1532             h->delayed_pic[pics - 2]->mmco_reset = 2;
1533     }
1534     if (h->mmco_reset || cur->f.key_frame) {
1535         for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1536             h->last_pocs[i] = INT_MIN;
1537         cnt     = 0;
1538         invalid = MAX_DELAYED_PIC_COUNT;
1539     }
1540     out     = h->delayed_pic[0];
1541     out_idx = 0;
1542     for (i = 1; i < MAX_DELAYED_PIC_COUNT &&
1543                 h->delayed_pic[i] &&
1544                 !h->delayed_pic[i - 1]->mmco_reset &&
1545                 !h->delayed_pic[i]->f.key_frame;
1546          i++)
1547         if (h->delayed_pic[i]->poc < out->poc) {
1548             out     = h->delayed_pic[i];
1549             out_idx = i;
1550         }
1551     if (s->avctx->has_b_frames == 0 &&
1552         (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
1553         h->next_outputed_poc = INT_MIN;
1554     out_of_order = !out->f.key_frame && !h->mmco_reset &&
1555                    (out->poc < h->next_outputed_poc);
1556
1557     if (h->sps.bitstream_restriction_flag &&
1558         s->avctx->has_b_frames >= h->sps.num_reorder_frames) {
1559     } else if (out_of_order && pics - 1 == s->avctx->has_b_frames &&
1560                s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
1561         if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
1562             s->avctx->has_b_frames = FFMAX(s->avctx->has_b_frames, cnt);
1563         }
1564         s->low_delay = 0;
1565     } else if (s->low_delay &&
1566                ((h->next_outputed_poc != INT_MIN &&
1567                  out->poc > h->next_outputed_poc + 2) ||
1568                 cur->f.pict_type == AV_PICTURE_TYPE_B)) {
1569         s->low_delay = 0;
1570         s->avctx->has_b_frames++;
1571     }
1572
1573     if (pics > s->avctx->has_b_frames) {
1574         out->f.reference &= ~DELAYED_PIC_REF;
1575         // for frame threading, the owner must be the second field's thread or
1576         // else the first thread can release the picture and reuse it unsafely
1577         out->owner2       = s;
1578         for (i = out_idx; h->delayed_pic[i]; i++)
1579             h->delayed_pic[i] = h->delayed_pic[i + 1];
1580     }
1581     memmove(h->last_pocs, &h->last_pocs[1],
1582             sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
1583     h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
1584     if (!out_of_order && pics > s->avctx->has_b_frames) {
1585         h->next_output_pic = out;
1586         if (out->mmco_reset) {
1587             if (out_idx > 0) {
1588                 h->next_outputed_poc                    = out->poc;
1589                 h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
1590             } else {
1591                 h->next_outputed_poc = INT_MIN;
1592             }
1593         } else {
1594             if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
1595                 h->next_outputed_poc = INT_MIN;
1596             } else {
1597                 h->next_outputed_poc = out->poc;
1598             }
1599         }
1600         h->mmco_reset = 0;
1601     } else {
1602         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
1603     }
1604
1605     if (setup_finished)
1606         ff_thread_finish_setup(s->avctx);
1607 }
1608
1609 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
1610                                               uint8_t *src_cb, uint8_t *src_cr,
1611                                               int linesize, int uvlinesize,
1612                                               int simple)
1613 {
1614     MpegEncContext *const s = &h->s;
1615     uint8_t *top_border;
1616     int top_idx = 1;
1617     const int pixel_shift = h->pixel_shift;
1618     int chroma444 = CHROMA444;
1619     int chroma422 = CHROMA422;
1620
1621     src_y  -= linesize;
1622     src_cb -= uvlinesize;
1623     src_cr -= uvlinesize;
1624
1625     if (!simple && FRAME_MBAFF) {
1626         if (s->mb_y & 1) {
1627             if (!MB_MBAFF) {
1628                 top_border = h->top_borders[0][s->mb_x];
1629                 AV_COPY128(top_border, src_y + 15 * linesize);
1630                 if (pixel_shift)
1631                     AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
1632                 if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
1633                     if (chroma444) {
1634                         if (pixel_shift) {
1635                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
1636                             AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16);
1637                             AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize);
1638                             AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16);
1639                         } else {
1640                             AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize);
1641                             AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize);
1642                         }
1643                     } else if (chroma422) {
1644                         if (pixel_shift) {
1645                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
1646                             AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize);
1647                         } else {
1648                             AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize);
1649                             AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize);
1650                         }
1651                     } else {
1652                         if (pixel_shift) {
1653                             AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize);
1654                             AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize);
1655                         } else {
1656                             AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1657                             AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1658                         }
1659                     }
1660                 }
1661             }
1662         } else if (MB_MBAFF) {
1663             top_idx = 0;
1664         } else
1665             return;
1666     }
1667
1668     top_border = h->top_borders[top_idx][s->mb_x];
1669     /* There are two lines saved, the line above the top macroblock
1670      * of a pair, and the line above the bottom macroblock. */
1671     AV_COPY128(top_border, src_y + 16 * linesize);
1672     if (pixel_shift)
1673         AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);
1674
1675     if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
1676         if (chroma444) {
1677             if (pixel_shift) {
1678                 AV_COPY128(top_border + 32, src_cb + 16 * linesize);
1679                 AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16);
1680                 AV_COPY128(top_border + 64, src_cr + 16 * linesize);
1681                 AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16);
1682             } else {
1683                 AV_COPY128(top_border + 16, src_cb + 16 * linesize);
1684                 AV_COPY128(top_border + 32, src_cr + 16 * linesize);
1685             }
1686         } else if (chroma422) {
1687             if (pixel_shift) {
1688                 AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize);
1689                 AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize);
1690             } else {
1691                 AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize);
1692                 AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize);
1693             }
1694         } else {
1695             if (pixel_shift) {
1696                 AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize);
1697                 AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize);
1698             } else {
1699                 AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize);
1700                 AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize);
1701             }
1702         }
1703     }
1704 }
1705
1706 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
1707                                             uint8_t *src_cb, uint8_t *src_cr,
1708                                             int linesize, int uvlinesize,
1709                                             int xchg, int chroma444,
1710                                             int simple, int pixel_shift)
1711 {
1712     MpegEncContext *const s = &h->s;
1713     int deblock_topleft;
1714     int deblock_top;
1715     int top_idx = 1;
1716     uint8_t *top_border_m1;
1717     uint8_t *top_border;
1718
1719     if (!simple && FRAME_MBAFF) {
1720         if (s->mb_y & 1) {
1721             if (!MB_MBAFF)
1722                 return;
1723         } else {
1724             top_idx = MB_MBAFF ? 0 : 1;
1725         }
1726     }
1727
1728     if (h->deblocking_filter == 2) {
1729         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
1730         deblock_top     = h->top_type;
1731     } else {
1732         deblock_topleft = (s->mb_x > 0);
1733         deblock_top     = (s->mb_y > !!MB_FIELD);
1734     }
1735
1736     src_y  -= linesize   + 1 + pixel_shift;
1737     src_cb -= uvlinesize + 1 + pixel_shift;
1738     src_cr -= uvlinesize + 1 + pixel_shift;
1739
1740     top_border_m1 = h->top_borders[top_idx][s->mb_x - 1];
1741     top_border    = h->top_borders[top_idx][s->mb_x];
1742
1743 #define XCHG(a, b, xchg)                        \
1744     if (pixel_shift) {                          \
1745         if (xchg) {                             \
1746             AV_SWAP64(b + 0, a + 0);            \
1747             AV_SWAP64(b + 8, a + 8);            \
1748         } else {                                \
1749             AV_COPY128(b, a);                   \
1750         }                                       \
1751     } else if (xchg)                            \
1752         AV_SWAP64(b, a);                        \
1753     else                                        \
1754         AV_COPY64(b, a);
1755
1756     if (deblock_top) {
1757         if (deblock_topleft) {
1758             XCHG(top_border_m1 + (8 << pixel_shift),
1759                  src_y - (7 << pixel_shift), 1);
1760         }
1761         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
1762         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
1763         if (s->mb_x + 1 < s->mb_width) {
1764             XCHG(h->top_borders[top_idx][s->mb_x + 1],
1765                  src_y + (17 << pixel_shift), 1);
1766         }
1767     }
1768     if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
1769         if (chroma444) {
1770             if (deblock_topleft) {
1771                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
1772                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
1773             }
1774             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
1775             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
1776             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
1777             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
1778             if (s->mb_x + 1 < s->mb_width) {
1779                 XCHG(h->top_borders[top_idx][s->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
1780                 XCHG(h->top_borders[top_idx][s->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
1781             }
1782         } else {
1783             if (deblock_top) {
1784                 if (deblock_topleft) {
1785                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
1786                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
1787                 }
1788                 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
1789                 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
1790             }
1791         }
1792     }
1793 }
1794
1795 static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
1796                                         int index)
1797 {
1798     if (high_bit_depth) {
1799         return AV_RN32A(((int32_t *)mb) + index);
1800     } else
1801         return AV_RN16A(mb + index);
1802 }
1803
1804 static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
1805                                          int index, int value)
1806 {
1807     if (high_bit_depth) {
1808         AV_WN32A(((int32_t *)mb) + index, value);
1809     } else
1810         AV_WN16A(mb + index, value);
1811 }
1812
1813 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
1814                                                        int mb_type, int is_h264,
1815                                                        int simple,
1816                                                        int transform_bypass,
1817                                                        int pixel_shift,
1818                                                        int *block_offset,
1819                                                        int linesize,
1820                                                        uint8_t *dest_y, int p)
1821 {
1822     MpegEncContext *const s = &h->s;
1823     void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
1824     void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
1825     int i;
1826     int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1];
1827     block_offset += 16 * p;
1828     if (IS_INTRA4x4(mb_type)) {
1829         if (IS_8x8DCT(mb_type)) {
1830             if (transform_bypass) {
1831                 idct_dc_add  =
1832                 idct_add     = s->dsp.add_pixels8;
1833             } else {
1834                 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
1835                 idct_add    = h->h264dsp.h264_idct8_add;
1836             }
1837             for (i = 0; i < 16; i += 4) {
1838                 uint8_t *const ptr = dest_y + block_offset[i];
1839                 const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
1840                 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
1841                     h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1842                 } else {
1843                     const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
1844                     h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
1845                                          (h->topright_samples_available << i) & 0x4000, linesize);
1846                     if (nnz) {
1847                         if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
1848                             idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1849                         else
1850                             idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1851                     }
1852                 }
1853             }
1854         } else {
1855             if (transform_bypass) {
1856                 idct_dc_add  =
1857                     idct_add = s->dsp.add_pixels4;
1858             } else {
1859                 idct_dc_add = h->h264dsp.h264_idct_dc_add;
1860                 idct_add    = h->h264dsp.h264_idct_add;
1861             }
1862             for (i = 0; i < 16; i++) {
1863                 uint8_t *const ptr = dest_y + block_offset[i];
1864                 const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
1865
1866                 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
1867                     h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1868                 } else {
1869                     uint8_t *topright;
1870                     int nnz, tr;
1871                     uint64_t tr_high;
1872                     if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
1873                         const int topright_avail = (h->topright_samples_available << i) & 0x8000;
1874                         assert(s->mb_y || linesize <= block_offset[i]);
1875                         if (!topright_avail) {
1876                             if (pixel_shift) {
1877                                 tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
1878                                 topright = (uint8_t *)&tr_high;
1879                             } else {
1880                                 tr       = ptr[3 - linesize] * 0x01010101u;
1881                                 topright = (uint8_t *)&tr;
1882                             }
1883                         } else
1884                             topright = ptr + (4 << pixel_shift) - linesize;
1885                     } else
1886                         topright = NULL;
1887
1888                     h->hpc.pred4x4[dir](ptr, topright, linesize);
1889                     nnz = h->non_zero_count_cache[scan8[i + p * 16]];
1890                     if (nnz) {
1891                         if (is_h264) {
1892                             if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
1893                                 idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1894                             else
1895                                 idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1896                         } else if (CONFIG_SVQ3_DECODER)
1897                             ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0);
1898                     }
1899                 }
1900             }
1901         }
1902     } else {
1903         h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize);
1904         if (is_h264) {
1905             if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
1906                 if (!transform_bypass)
1907                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift),
1908                                                          h->mb_luma_dc[p],
1909                                                          h->dequant4_coeff[p][qscale][0]);
1910                 else {
1911                     static const uint8_t dc_mapping[16] = {
1912                          0 * 16,  1 * 16,  4 * 16,  5 * 16,
1913                          2 * 16,  3 * 16,  6 * 16,  7 * 16,
1914                          8 * 16,  9 * 16, 12 * 16, 13 * 16,
1915                         10 * 16, 11 * 16, 14 * 16, 15 * 16 };
1916                     for (i = 0; i < 16; i++)
1917                         dctcoef_set(h->mb + (p * 256 << pixel_shift),
1918                                     pixel_shift, dc_mapping[i],
1919                                     dctcoef_get(h->mb_luma_dc[p],
1920                                                 pixel_shift, i));
1921                 }
1922             }
1923         } else if (CONFIG_SVQ3_DECODER)
1924             ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256,
1925                                            h->mb_luma_dc[p], qscale);
1926     }
1927 }
1928
1929 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
1930                                                     int is_h264, int simple,
1931                                                     int transform_bypass,
1932                                                     int pixel_shift,
1933                                                     int *block_offset,
1934                                                     int linesize,
1935                                                     uint8_t *dest_y, int p)
1936 {
1937     MpegEncContext *const s = &h->s;
1938     void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
1939     int i;
1940     block_offset += 16 * p;
1941     if (!IS_INTRA4x4(mb_type)) {
1942         if (is_h264) {
1943             if (IS_INTRA16x16(mb_type)) {
1944                 if (transform_bypass) {
1945                     if (h->sps.profile_idc == 244 &&
1946                         (h->intra16x16_pred_mode == VERT_PRED8x8 ||
1947                          h->intra16x16_pred_mode == HOR_PRED8x8)) {
1948                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset,
1949                                                                       h->mb + (p * 256 << pixel_shift),
1950                                                                       linesize);
1951                     } else {
1952                         for (i = 0; i < 16; i++)
1953                             if (h->non_zero_count_cache[scan8[i + p * 16]] ||
1954                                 dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
1955                                 s->dsp.add_pixels4(dest_y + block_offset[i],
1956                                                    h->mb + (i * 16 + p * 256 << pixel_shift),
1957                                                    linesize);
1958                     }
1959                 } else {
1960                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
1961                                                     h->mb + (p * 256 << pixel_shift),
1962                                                     linesize,
1963                                                     h->non_zero_count_cache + p * 5 * 8);
1964                 }
1965             } else if (h->cbp & 15) {
1966                 if (transform_bypass) {
1967                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1968                     idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8
1969                                                   : s->dsp.add_pixels4;
1970                     for (i = 0; i < 16; i += di)
1971                         if (h->non_zero_count_cache[scan8[i + p * 16]])
1972                             idct_add(dest_y + block_offset[i],
1973                                      h->mb + (i * 16 + p * 256 << pixel_shift),
1974                                      linesize);
1975                 } else {
1976                     if (IS_8x8DCT(mb_type))
1977                         h->h264dsp.h264_idct8_add4(dest_y, block_offset,
1978                                                    h->mb + (p * 256 << pixel_shift),
1979                                                    linesize,
1980                                                    h->non_zero_count_cache + p * 5 * 8);
1981                     else
1982                         h->h264dsp.h264_idct_add16(dest_y, block_offset,
1983                                                    h->mb + (p * 256 << pixel_shift),
1984                                                    linesize,
1985                                                    h->non_zero_count_cache + p * 5 * 8);
1986                 }
1987             }
1988         } else if (CONFIG_SVQ3_DECODER) {
1989             for (i = 0; i < 16; i++)
1990                 if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) {
1991                     // FIXME benchmark weird rule, & below
1992                     uint8_t *const ptr = dest_y + block_offset[i];
1993                     ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize,
1994                                        s->qscale, IS_INTRA(mb_type) ? 1 : 0);
1995                 }
1996         }
1997     }
1998 }
1999
2000 #define BITS   8
2001 #define SIMPLE 1
2002 #include "h264_mb_template.c"
2003
2004 #undef  BITS
2005 #define BITS   16
2006 #include "h264_mb_template.c"
2007
2008 #undef  SIMPLE
2009 #define SIMPLE 0
2010 #include "h264_mb_template.c"
2011
2012 void ff_h264_hl_decode_mb(H264Context *h)
2013 {
2014     MpegEncContext *const s = &h->s;
2015     const int mb_xy   = h->mb_xy;
2016     const int mb_type = s->current_picture.f.mb_type[mb_xy];
2017     int is_complex    = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2018
2019     if (CHROMA444) {
2020         if (is_complex || h->pixel_shift)
2021             hl_decode_mb_444_complex(h);
2022         else
2023             hl_decode_mb_444_simple_8(h);
2024     } else if (is_complex) {
2025         hl_decode_mb_complex(h);
2026     } else if (h->pixel_shift) {
2027         hl_decode_mb_simple_16(h);
2028     } else
2029         hl_decode_mb_simple_8(h);
2030 }
2031
2032 static int pred_weight_table(H264Context *h)
2033 {
2034     MpegEncContext *const s = &h->s;
2035     int list, i;
2036     int luma_def, chroma_def;
2037
2038     h->use_weight             = 0;
2039     h->use_weight_chroma      = 0;
2040     h->luma_log2_weight_denom = get_ue_golomb(&s->gb);
2041     if (h->sps.chroma_format_idc)
2042         h->chroma_log2_weight_denom = get_ue_golomb(&s->gb);
2043     luma_def   = 1 << h->luma_log2_weight_denom;
2044     chroma_def = 1 << h->chroma_log2_weight_denom;
2045
2046     for (list = 0; list < 2; list++) {
2047         h->luma_weight_flag[list]   = 0;
2048         h->chroma_weight_flag[list] = 0;
2049         for (i = 0; i < h->ref_count[list]; i++) {
2050             int luma_weight_flag, chroma_weight_flag;
2051
2052             luma_weight_flag = get_bits1(&s->gb);
2053             if (luma_weight_flag) {
2054                 h->luma_weight[i][list][0] = get_se_golomb(&s->gb);
2055                 h->luma_weight[i][list][1] = get_se_golomb(&s->gb);
2056                 if (h->luma_weight[i][list][0] != luma_def ||
2057                     h->luma_weight[i][list][1] != 0) {
2058                     h->use_weight             = 1;
2059                     h->luma_weight_flag[list] = 1;
2060                 }
2061             } else {
2062                 h->luma_weight[i][list][0] = luma_def;
2063                 h->luma_weight[i][list][1] = 0;
2064             }
2065
2066             if (h->sps.chroma_format_idc) {
2067                 chroma_weight_flag = get_bits1(&s->gb);
2068                 if (chroma_weight_flag) {
2069                     int j;
2070                     for (j = 0; j < 2; j++) {
2071                         h->chroma_weight[i][list][j][0] = get_se_golomb(&s->gb);
2072                         h->chroma_weight[i][list][j][1] = get_se_golomb(&s->gb);
2073                         if (h->chroma_weight[i][list][j][0] != chroma_def ||
2074                             h->chroma_weight[i][list][j][1] != 0) {
2075                             h->use_weight_chroma = 1;
2076                             h->chroma_weight_flag[list] = 1;
2077                         }
2078                     }
2079                 } else {
2080                     int j;
2081                     for (j = 0; j < 2; j++) {
2082                         h->chroma_weight[i][list][j][0] = chroma_def;
2083                         h->chroma_weight[i][list][j][1] = 0;
2084                     }
2085                 }
2086             }
2087         }
2088         if (h->slice_type_nos != AV_PICTURE_TYPE_B)
2089             break;
2090     }
2091     h->use_weight = h->use_weight || h->use_weight_chroma;
2092     return 0;
2093 }
2094
2095 /**
2096  * Initialize implicit_weight table.
2097  * @param field  0/1 initialize the weight for interlaced MBAFF
2098  *                -1 initializes the rest
2099  */
2100 static void implicit_weight_table(H264Context *h, int field)
2101 {
2102     MpegEncContext *const s = &h->s;
2103     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
2104
2105     for (i = 0; i < 2; i++) {
2106         h->luma_weight_flag[i]   = 0;
2107         h->chroma_weight_flag[i] = 0;
2108     }
2109
2110     if (field < 0) {
2111         if (s->picture_structure == PICT_FRAME) {
2112             cur_poc = s->current_picture_ptr->poc;
2113         } else {
2114             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
2115         }
2116         if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF &&
2117             h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
2118             h->use_weight = 0;
2119             h->use_weight_chroma = 0;
2120             return;
2121         }
2122         ref_start  = 0;
2123         ref_count0 = h->ref_count[0];
2124         ref_count1 = h->ref_count[1];
2125     } else {
2126         cur_poc    = s->current_picture_ptr->field_poc[field];
2127         ref_start  = 16;
2128         ref_count0 = 16 + 2 * h->ref_count[0];
2129         ref_count1 = 16 + 2 * h->ref_count[1];
2130     }
2131
2132     h->use_weight               = 2;
2133     h->use_weight_chroma        = 2;
2134     h->luma_log2_weight_denom   = 5;
2135     h->chroma_log2_weight_denom = 5;
2136
2137     for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
2138         int poc0 = h->ref_list[0][ref0].poc;
2139         for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
2140             int w = 32;
2141             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
2142                 int poc1 = h->ref_list[1][ref1].poc;
2143                 int td   = av_clip(poc1 - poc0, -128, 127);
2144                 if (td) {
2145                     int tb = av_clip(cur_poc - poc0, -128, 127);
2146                     int tx = (16384 + (FFABS(td) >> 1)) / td;
2147                     int dist_scale_factor = (tb * tx + 32) >> 8;
2148                     if (dist_scale_factor >= -64 && dist_scale_factor <= 128)
2149                         w = 64 - dist_scale_factor;
2150                 }
2151             }
2152             if (field < 0) {
2153                 h->implicit_weight[ref0][ref1][0] =
2154                 h->implicit_weight[ref0][ref1][1] = w;
2155             } else {
2156                 h->implicit_weight[ref0][ref1][field] = w;
2157             }
2158         }
2159     }
2160 }
2161
2162 /**
2163  * instantaneous decoder refresh.
2164  */
2165 static void idr(H264Context *h)
2166 {
2167     ff_h264_remove_all_refs(h);
2168     h->prev_frame_num        = 0;
2169     h->prev_frame_num_offset = 0;
2170     h->prev_poc_msb          =
2171     h->prev_poc_lsb          = 0;
2172 }
2173
2174 /* forget old pics after a seek */
2175 static void flush_change(H264Context *h)
2176 {
2177     int i;
2178     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2179         h->last_pocs[i] = INT_MIN;
2180     h->outputed_poc = h->next_outputed_poc = INT_MIN;
2181     h->prev_interlaced_frame = 1;
2182     idr(h);
2183     if (h->s.current_picture_ptr)
2184         h->s.current_picture_ptr->f.reference = 0;
2185     h->s.first_field = 0;
2186     memset(h->ref_list[0], 0, sizeof(h->ref_list[0]));
2187     memset(h->ref_list[1], 0, sizeof(h->ref_list[1]));
2188     memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0]));
2189     memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1]));
2190     ff_h264_reset_sei(h);
2191 }
2192
2193 /* forget old pics after a seek */
2194 static void flush_dpb(AVCodecContext *avctx)
2195 {
2196     H264Context *h = avctx->priv_data;
2197     int i;
2198
2199     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
2200         if (h->delayed_pic[i])
2201             h->delayed_pic[i]->f.reference = 0;
2202         h->delayed_pic[i] = NULL;
2203     }
2204
2205     flush_change(h);
2206     ff_mpeg_flush(avctx);
2207 }
2208
2209 static int init_poc(H264Context *h)
2210 {
2211     MpegEncContext *const s = &h->s;
2212     const int max_frame_num = 1 << h->sps.log2_max_frame_num;
2213     int field_poc[2];
2214     Picture *cur = s->current_picture_ptr;
2215
2216     h->frame_num_offset = h->prev_frame_num_offset;
2217     if (h->frame_num < h->prev_frame_num)
2218         h->frame_num_offset += max_frame_num;
2219
2220     if (h->sps.poc_type == 0) {
2221         const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
2222
2223         if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
2224             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2225         else if (h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
2226             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2227         else
2228             h->poc_msb = h->prev_poc_msb;
2229         field_poc[0] =
2230         field_poc[1] = h->poc_msb + h->poc_lsb;
2231         if (s->picture_structure == PICT_FRAME)
2232             field_poc[1] += h->delta_poc_bottom;
2233     } else if (h->sps.poc_type == 1) {
2234         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2235         int i;
2236
2237         if (h->sps.poc_cycle_length != 0)
2238             abs_frame_num = h->frame_num_offset + h->frame_num;
2239         else
2240             abs_frame_num = 0;
2241
2242         if (h->nal_ref_idc == 0 && abs_frame_num > 0)
2243             abs_frame_num--;
2244
2245         expected_delta_per_poc_cycle = 0;
2246         for (i = 0; i < h->sps.poc_cycle_length; i++)
2247             // FIXME integrate during sps parse
2248             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
2249
2250         if (abs_frame_num > 0) {
2251             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2252             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2253
2254             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2255             for (i = 0; i <= frame_num_in_poc_cycle; i++)
2256                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
2257         } else
2258             expectedpoc = 0;
2259
2260         if (h->nal_ref_idc == 0)
2261             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2262
2263         field_poc[0] = expectedpoc + h->delta_poc[0];
2264         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2265
2266         if (s->picture_structure == PICT_FRAME)
2267             field_poc[1] += h->delta_poc[1];
2268     } else {
2269         int poc = 2 * (h->frame_num_offset + h->frame_num);
2270
2271         if (!h->nal_ref_idc)
2272             poc--;
2273
2274         field_poc[0] = poc;
2275         field_poc[1] = poc;
2276     }
2277
2278     if (s->picture_structure != PICT_BOTTOM_FIELD)
2279         s->current_picture_ptr->field_poc[0] = field_poc[0];
2280     if (s->picture_structure != PICT_TOP_FIELD)
2281         s->current_picture_ptr->field_poc[1] = field_poc[1];
2282     cur->poc = FFMIN(cur->field_poc[0], cur->field_poc[1]);
2283
2284     return 0;
2285 }
2286
2287 /**
2288  * initialize scan tables
2289  */
2290 static void init_scan_tables(H264Context *h)
2291 {
2292     int i;
2293     for (i = 0; i < 16; i++) {
2294 #define T(x) (x >> 2) | ((x << 2) & 0xF)
2295         h->zigzag_scan[i] = T(zigzag_scan[i]);
2296         h->field_scan[i]  = T(field_scan[i]);
2297 #undef T
2298     }
2299     for (i = 0; i < 64; i++) {
2300 #define T(x) (x >> 3) | ((x & 7) << 3)
2301         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
2302         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
2303         h->field_scan8x8[i]        = T(field_scan8x8[i]);
2304         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
2305 #undef T
2306     }
2307     if (h->sps.transform_bypass) { // FIXME same ugly
2308         h->zigzag_scan_q0          = zigzag_scan;
2309         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
2310         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
2311         h->field_scan_q0           = field_scan;
2312         h->field_scan8x8_q0        = field_scan8x8;
2313         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
2314     } else {
2315         h->zigzag_scan_q0          = h->zigzag_scan;
2316         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
2317         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
2318         h->field_scan_q0           = h->field_scan;
2319         h->field_scan8x8_q0        = h->field_scan8x8;
2320         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
2321     }
2322 }
2323
2324 static int field_end(H264Context *h, int in_setup)
2325 {
2326     MpegEncContext *const s     = &h->s;
2327     AVCodecContext *const avctx = s->avctx;
2328     int err = 0;
2329     s->mb_y = 0;
2330
2331     if (!in_setup && !s->droppable)
2332         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
2333                                   s->picture_structure == PICT_BOTTOM_FIELD);
2334
2335     if (CONFIG_H264_VDPAU_DECODER &&
2336         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2337         ff_vdpau_h264_set_reference_frames(s);
2338
2339     if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
2340         if (!s->droppable) {
2341             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
2342             h->prev_poc_msb = h->poc_msb;
2343             h->prev_poc_lsb = h->poc_lsb;
2344         }
2345         h->prev_frame_num_offset = h->frame_num_offset;
2346         h->prev_frame_num        = h->frame_num;
2347         h->outputed_poc          = h->next_outputed_poc;
2348     }
2349
2350     if (avctx->hwaccel) {
2351         if (avctx->hwaccel->end_frame(avctx) < 0)
2352             av_log(avctx, AV_LOG_ERROR,
2353                    "hardware accelerator failed to decode picture\n");
2354     }
2355
2356     if (CONFIG_H264_VDPAU_DECODER &&
2357         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2358         ff_vdpau_h264_picture_complete(s);
2359
2360     /*
2361      * FIXME: Error handling code does not seem to support interlaced
2362      * when slices span multiple rows
2363      * The ff_er_add_slice calls don't work right for bottom
2364      * fields; they cause massive erroneous error concealing
2365      * Error marking covers both fields (top and bottom).
2366      * This causes a mismatched s->error_count
2367      * and a bad error table. Further, the error count goes to
2368      * INT_MAX when called for bottom field, because mb_y is
2369      * past end by one (callers fault) and resync_mb_y != 0
2370      * causes problems for the first MB line, too.
2371      */
2372     if (!FIELD_PICTURE)
2373         ff_er_frame_end(&s->er);
2374
2375     ff_MPV_frame_end(s);
2376
2377     h->current_slice = 0;
2378
2379     return err;
2380 }
2381
2382 /**
2383  * Replicate H264 "master" context to thread contexts.
2384  */
2385 static int clone_slice(H264Context *dst, H264Context *src)
2386 {
2387     int ret;
2388
2389     memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
2390     dst->s.current_picture_ptr = src->s.current_picture_ptr;
2391     dst->s.current_picture     = src->s.current_picture;
2392     dst->s.linesize            = src->s.linesize;
2393     dst->s.uvlinesize          = src->s.uvlinesize;
2394     dst->s.first_field         = src->s.first_field;
2395
2396     if (!dst->s.edge_emu_buffer &&
2397         (ret = ff_mpv_frame_size_alloc(&dst->s, dst->s.linesize))) {
2398         av_log(dst->s.avctx, AV_LOG_ERROR,
2399                "Failed to allocate scratch buffers\n");
2400         return ret;
2401     }
2402
2403     dst->prev_poc_msb          = src->prev_poc_msb;
2404     dst->prev_poc_lsb          = src->prev_poc_lsb;
2405     dst->prev_frame_num_offset = src->prev_frame_num_offset;
2406     dst->prev_frame_num        = src->prev_frame_num;
2407     dst->short_ref_count       = src->short_ref_count;
2408
2409     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
2410     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
2411     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
2412
2413     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
2414     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
2415
2416     return 0;
2417 }
2418
2419 /**
2420  * Compute profile from profile_idc and constraint_set?_flags.
2421  *
2422  * @param sps SPS
2423  *
2424  * @return profile as defined by FF_PROFILE_H264_*
2425  */
2426 int ff_h264_get_profile(SPS *sps)
2427 {
2428     int profile = sps->profile_idc;
2429
2430     switch (sps->profile_idc) {
2431     case FF_PROFILE_H264_BASELINE:
2432         // constraint_set1_flag set to 1
2433         profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0;
2434         break;
2435     case FF_PROFILE_H264_HIGH_10:
2436     case FF_PROFILE_H264_HIGH_422:
2437     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
2438         // constraint_set3_flag set to 1
2439         profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0;
2440         break;
2441     }
2442
2443     return profile;
2444 }
2445
2446 static int h264_set_parameter_from_sps(H264Context *h)
2447 {
2448     MpegEncContext *s = &h->s;
2449
2450     if (s->flags & CODEC_FLAG_LOW_DELAY ||
2451         (h->sps.bitstream_restriction_flag &&
2452          !h->sps.num_reorder_frames)) {
2453         if (s->avctx->has_b_frames > 1 || h->delayed_pic[0])
2454             av_log(h->s.avctx, AV_LOG_WARNING, "Delayed frames seen. "
2455                    "Reenabling low delay requires a codec flush.\n");
2456         else
2457             s->low_delay = 1;
2458     }
2459
2460     if (s->avctx->has_b_frames < 2)
2461         s->avctx->has_b_frames = !s->low_delay;
2462
2463     if (s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
2464         h->cur_chroma_format_idc      != h->sps.chroma_format_idc) {
2465         if (s->avctx->codec &&
2466             s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU &&
2467             (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) {
2468             av_log(s->avctx, AV_LOG_ERROR,
2469                    "VDPAU decoding does not support video colorspace.\n");
2470             return AVERROR_INVALIDDATA;
2471         }
2472         if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
2473             s->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
2474             h->cur_chroma_format_idc      = h->sps.chroma_format_idc;
2475             h->pixel_shift                = h->sps.bit_depth_luma > 8;
2476
2477             ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
2478                             h->sps.chroma_format_idc);
2479             ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
2480             ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
2481             ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma,
2482                               h->sps.chroma_format_idc);
2483             s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
2484             ff_dsputil_init(&s->dsp, s->avctx);
2485             ff_videodsp_init(&s->vdsp, h->sps.bit_depth_luma);
2486         } else {
2487             av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n",
2488                    h->sps.bit_depth_luma);
2489             return AVERROR_INVALIDDATA;
2490         }
2491     }
2492     return 0;
2493 }
2494
2495 static enum PixelFormat get_pixel_format(H264Context *h)
2496 {
2497     MpegEncContext *const s  = &h->s;
2498     switch (h->sps.bit_depth_luma) {
2499     case 9:
2500         if (CHROMA444) {
2501             if (s->avctx->colorspace == AVCOL_SPC_RGB) {
2502                 return AV_PIX_FMT_GBRP9;
2503             } else
2504                 return AV_PIX_FMT_YUV444P9;
2505         } else if (CHROMA422)
2506             return AV_PIX_FMT_YUV422P9;
2507         else
2508             return AV_PIX_FMT_YUV420P9;
2509         break;
2510     case 10:
2511         if (CHROMA444) {
2512             if (s->avctx->colorspace == AVCOL_SPC_RGB) {
2513                 return AV_PIX_FMT_GBRP10;
2514             } else
2515                 return AV_PIX_FMT_YUV444P10;
2516         } else if (CHROMA422)
2517             return AV_PIX_FMT_YUV422P10;
2518         else
2519             return AV_PIX_FMT_YUV420P10;
2520         break;
2521     case 8:
2522         if (CHROMA444) {
2523             if (s->avctx->colorspace == AVCOL_SPC_RGB) {
2524                 return AV_PIX_FMT_GBRP;
2525             } else
2526                 return s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
2527                                                                  : AV_PIX_FMT_YUV444P;
2528         } else if (CHROMA422) {
2529             return s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P
2530                                                              : AV_PIX_FMT_YUV422P;
2531         } else {
2532             return s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts ?
2533                                         s->avctx->codec->pix_fmts :
2534                                         s->avctx->color_range == AVCOL_RANGE_JPEG ?
2535                                         hwaccel_pixfmt_list_h264_jpeg_420 :
2536                                         ff_hwaccel_pixfmt_list_420);
2537         }
2538         break;
2539     default:
2540         av_log(s->avctx, AV_LOG_ERROR,
2541                "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
2542         return AVERROR_INVALIDDATA;
2543     }
2544 }
2545
2546 static int h264_slice_header_init(H264Context *h, int reinit)
2547 {
2548     MpegEncContext *const s  = &h->s;
2549     int i, ret;
2550
2551     avcodec_set_dimensions(s->avctx, s->width, s->height);
2552     s->avctx->sample_aspect_ratio = h->sps.sar;
2553     av_assert0(s->avctx->sample_aspect_ratio.den);
2554
2555     if (h->sps.timing_info_present_flag) {
2556         int64_t den = h->sps.time_scale;
2557         if (h->x264_build < 44U)
2558             den *= 2;
2559         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
2560                   h->sps.num_units_in_tick, den, 1 << 30);
2561     }
2562
2563     s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
2564
2565     if (reinit) {
2566         free_tables(h, 0);
2567         if ((ret = ff_MPV_common_frame_size_change(s)) < 0) {
2568             av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_frame_size_change() failed.\n");
2569             return ret;
2570         }
2571     } else {
2572         if ((ret = ff_MPV_common_init(s)) < 0) {
2573             av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n");
2574             return ret;
2575         }
2576     }
2577     s->first_field = 0;
2578     h->prev_interlaced_frame = 1;
2579
2580     init_scan_tables(h);
2581     if (ff_h264_alloc_tables(h) < 0) {
2582         av_log(h->s.avctx, AV_LOG_ERROR,
2583                "Could not allocate memory for h264\n");
2584         return AVERROR(ENOMEM);
2585     }
2586
2587     if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_SLICE)) {
2588         if (context_init(h) < 0) {
2589             av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
2590             return -1;
2591         }
2592     } else {
2593         for (i = 1; i < s->slice_context_count; i++) {
2594             H264Context *c;
2595             c = h->thread_context[i] = av_malloc(sizeof(H264Context));
2596             memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
2597             memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
2598             c->h264dsp     = h->h264dsp;
2599             c->h264qpel    = h->h264qpel;
2600             c->h264chroma  = h->h264chroma;
2601             c->sps         = h->sps;
2602             c->pps         = h->pps;
2603             c->pixel_shift = h->pixel_shift;
2604             init_scan_tables(c);
2605             clone_tables(c, h, i);
2606         }
2607
2608         for (i = 0; i < s->slice_context_count; i++)
2609             if (context_init(h->thread_context[i]) < 0) {
2610                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
2611                 return -1;
2612             }
2613     }
2614
2615     return 0;
2616 }
2617
2618 /**
2619  * Decode a slice header.
2620  * This will also call ff_MPV_common_init() and frame_start() as needed.
2621  *
2622  * @param h h264context
2623  * @param h0 h264 master context (differs from 'h' when doing sliced based
2624  *           parallel decoding)
2625  *
2626  * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
2627  */
2628 static int decode_slice_header(H264Context *h, H264Context *h0)
2629 {
2630     MpegEncContext *const s  = &h->s;
2631     MpegEncContext *const s0 = &h0->s;
2632     unsigned int first_mb_in_slice;
2633     unsigned int pps_id;
2634     int num_ref_idx_active_override_flag, max_refs, ret;
2635     unsigned int slice_type, tmp, i, j;
2636     int default_ref_list_done = 0;
2637     int last_pic_structure, last_pic_droppable;
2638     int needs_reinit = 0;
2639
2640     s->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
2641     s->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
2642
2643     first_mb_in_slice = get_ue_golomb(&s->gb);
2644
2645     if (first_mb_in_slice == 0) { // FIXME better field boundary detection
2646         if (h0->current_slice && FIELD_PICTURE) {
2647             field_end(h, 1);
2648         }
2649
2650         h0->current_slice = 0;
2651         if (!s0->first_field) {
2652             if (s->current_picture_ptr && !s->droppable &&
2653                 s->current_picture_ptr->owner2 == s) {
2654                 ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
2655                                           s->picture_structure == PICT_BOTTOM_FIELD);
2656             }
2657             s->current_picture_ptr = NULL;
2658         }
2659     }
2660
2661     slice_type = get_ue_golomb_31(&s->gb);
2662     if (slice_type > 9) {
2663         av_log(h->s.avctx, AV_LOG_ERROR,
2664                "slice type too large (%d) at %d %d\n",
2665                h->slice_type, s->mb_x, s->mb_y);
2666         return -1;
2667     }
2668     if (slice_type > 4) {
2669         slice_type -= 5;
2670         h->slice_type_fixed = 1;
2671     } else
2672         h->slice_type_fixed = 0;
2673
2674     slice_type = golomb_to_pict_type[slice_type];
2675     if (slice_type == AV_PICTURE_TYPE_I ||
2676         (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
2677         default_ref_list_done = 1;
2678     }
2679     h->slice_type     = slice_type;
2680     h->slice_type_nos = slice_type & 3;
2681
2682     // to make a few old functions happy, it's wrong though
2683     s->pict_type = h->slice_type;
2684
2685     pps_id = get_ue_golomb(&s->gb);
2686     if (pps_id >= MAX_PPS_COUNT) {
2687         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
2688         return -1;
2689     }
2690     if (!h0->pps_buffers[pps_id]) {
2691         av_log(h->s.avctx, AV_LOG_ERROR,
2692                "non-existing PPS %u referenced\n",
2693                pps_id);
2694         return -1;
2695     }
2696     h->pps = *h0->pps_buffers[pps_id];
2697
2698     if (!h0->sps_buffers[h->pps.sps_id]) {
2699         av_log(h->s.avctx, AV_LOG_ERROR,
2700                "non-existing SPS %u referenced\n",
2701                h->pps.sps_id);
2702         return -1;
2703     }
2704
2705     if (h->pps.sps_id != h->current_sps_id ||
2706         h->context_reinitialized           ||
2707         h0->sps_buffers[h->pps.sps_id]->new) {
2708         SPS *new_sps = h0->sps_buffers[h->pps.sps_id];
2709
2710         h0->sps_buffers[h->pps.sps_id]->new = 0;
2711
2712         if (h->sps.chroma_format_idc != new_sps->chroma_format_idc ||
2713             h->sps.bit_depth_luma    != new_sps->bit_depth_luma)
2714             needs_reinit = 1;
2715
2716         h->current_sps_id = h->pps.sps_id;
2717         h->sps            = *h0->sps_buffers[h->pps.sps_id];
2718
2719         if ((ret = h264_set_parameter_from_sps(h)) < 0)
2720             return ret;
2721     }
2722
2723     s->avctx->profile = ff_h264_get_profile(&h->sps);
2724     s->avctx->level   = h->sps.level_idc;
2725     s->avctx->refs    = h->sps.ref_frame_count;
2726
2727     if (s->mb_width  != h->sps.mb_width ||
2728         s->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag))
2729         needs_reinit = 1;
2730
2731     s->mb_width  = h->sps.mb_width;
2732     s->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
2733
2734     h->b_stride = s->mb_width * 4;
2735
2736     s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
2737
2738     s->width = 16 * s->mb_width - (2 >> CHROMA444) * FFMIN(h->sps.crop_right, (8 << CHROMA444) - 1);
2739     if (h->sps.frame_mbs_only_flag)
2740         s->height = 16 * s->mb_height - (1 << s->chroma_y_shift) * FFMIN(h->sps.crop_bottom, (16 >> s->chroma_y_shift) - 1);
2741     else
2742         s->height = 16 * s->mb_height - (2 << s->chroma_y_shift) * FFMIN(h->sps.crop_bottom, (16 >> s->chroma_y_shift) - 1);
2743
2744     if (FFALIGN(s->avctx->width,  16) == s->width &&
2745         FFALIGN(s->avctx->height, 16) == s->height) {
2746         s->width  = s->avctx->width;
2747         s->height = s->avctx->height;
2748     }
2749
2750     if (h->sps.video_signal_type_present_flag) {
2751         s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
2752                                                   : AVCOL_RANGE_MPEG;
2753         if (h->sps.colour_description_present_flag) {
2754             if (s->avctx->colorspace != h->sps.colorspace)
2755                 needs_reinit = 1;
2756             s->avctx->color_primaries = h->sps.color_primaries;
2757             s->avctx->color_trc       = h->sps.color_trc;
2758             s->avctx->colorspace      = h->sps.colorspace;
2759         }
2760     }
2761
2762     if (s->context_initialized &&
2763         (s->width  != s->avctx->width   ||
2764          s->height != s->avctx->height  ||
2765          needs_reinit                   ||
2766          av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
2767
2768         if (h != h0) {
2769             av_log(s->avctx, AV_LOG_ERROR, "changing width/height on "
2770                    "slice %d\n", h0->current_slice + 1);
2771             return AVERROR_INVALIDDATA;
2772         }
2773
2774         flush_change(h);
2775
2776         if ((ret = get_pixel_format(h)) < 0)
2777             return ret;
2778         s->avctx->pix_fmt = ret;
2779
2780         av_log(h->s.avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
2781                "pix_fmt: %d\n", s->width, s->height, s->avctx->pix_fmt);
2782
2783         if ((ret = h264_slice_header_init(h, 1)) < 0) {
2784             av_log(h->s.avctx, AV_LOG_ERROR,
2785                    "h264_slice_header_init() failed\n");
2786             return ret;
2787         }
2788         h->context_reinitialized = 1;
2789     }
2790     if (!s->context_initialized) {
2791         if (h != h0) {
2792             av_log(h->s.avctx, AV_LOG_ERROR,
2793                    "Cannot (re-)initialize context during parallel decoding.\n");
2794             return -1;
2795         }
2796
2797         if ((ret = get_pixel_format(h)) < 0)
2798             return ret;
2799         s->avctx->pix_fmt = ret;
2800
2801         if ((ret = h264_slice_header_init(h, 0)) < 0) {
2802             av_log(h->s.avctx, AV_LOG_ERROR,
2803                    "h264_slice_header_init() failed\n");
2804             return ret;
2805         }
2806     }
2807
2808     if (h == h0 && h->dequant_coeff_pps != pps_id) {
2809         h->dequant_coeff_pps = pps_id;
2810         init_dequant_tables(h);
2811     }
2812
2813     h->frame_num = get_bits(&s->gb, h->sps.log2_max_frame_num);
2814
2815     h->mb_mbaff        = 0;
2816     h->mb_aff_frame    = 0;
2817     last_pic_structure = s0->picture_structure;
2818     last_pic_droppable = s0->droppable;
2819     s->droppable       = h->nal_ref_idc == 0;
2820     if (h->sps.frame_mbs_only_flag) {
2821         s->picture_structure = PICT_FRAME;
2822     } else {
2823         if (get_bits1(&s->gb)) { // field_pic_flag
2824             s->picture_structure = PICT_TOP_FIELD + get_bits1(&s->gb); // bottom_field_flag
2825         } else {
2826             s->picture_structure = PICT_FRAME;
2827             h->mb_aff_frame      = h->sps.mb_aff;
2828         }
2829     }
2830     h->mb_field_decoding_flag = s->picture_structure != PICT_FRAME;
2831
2832     if (h0->current_slice != 0) {
2833         if (last_pic_structure != s->picture_structure ||
2834             last_pic_droppable != s->droppable) {
2835             av_log(h->s.avctx, AV_LOG_ERROR,
2836                    "Changing field mode (%d -> %d) between slices is not allowed\n",
2837                    last_pic_structure, s->picture_structure);
2838             s->picture_structure = last_pic_structure;
2839             s->droppable         = last_pic_droppable;
2840             return AVERROR_INVALIDDATA;
2841         } else if (!s0->current_picture_ptr) {
2842             av_log(s->avctx, AV_LOG_ERROR,
2843                    "unset current_picture_ptr on %d. slice\n",
2844                    h0->current_slice + 1);
2845             return AVERROR_INVALIDDATA;
2846         }
2847     } else {
2848         /* Shorten frame num gaps so we don't have to allocate reference
2849          * frames just to throw them away */
2850         if (h->frame_num != h->prev_frame_num) {
2851             int unwrap_prev_frame_num = h->prev_frame_num;
2852             int max_frame_num         = 1 << h->sps.log2_max_frame_num;
2853
2854             if (unwrap_prev_frame_num > h->frame_num)
2855                 unwrap_prev_frame_num -= max_frame_num;
2856
2857             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
2858                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
2859                 if (unwrap_prev_frame_num < 0)
2860                     unwrap_prev_frame_num += max_frame_num;
2861
2862                 h->prev_frame_num = unwrap_prev_frame_num;
2863             }
2864         }
2865
2866         /* See if we have a decoded first field looking for a pair...
2867          * Here, we're using that to see if we should mark previously
2868          * decode frames as "finished".
2869          * We have to do that before the "dummy" in-between frame allocation,
2870          * since that can modify s->current_picture_ptr. */
2871         if (s0->first_field) {
2872             assert(s0->current_picture_ptr);
2873             assert(s0->current_picture_ptr->f.data[0]);
2874             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
2875
2876             /* Mark old field/frame as completed */
2877             if (!last_pic_droppable && s0->current_picture_ptr->owner2 == s0) {
2878                 ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
2879                                           last_pic_structure == PICT_BOTTOM_FIELD);
2880             }
2881
2882             /* figure out if we have a complementary field pair */
2883             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
2884                 /* Previous field is unmatched. Don't display it, but let it
2885                  * remain for reference if marked as such. */
2886                 if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
2887                     ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
2888                                               last_pic_structure == PICT_TOP_FIELD);
2889                 }
2890             } else {
2891                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
2892                     /* This and previous field were reference, but had
2893                      * different frame_nums. Consider this field first in
2894                      * pair. Throw away previous field except for reference
2895                      * purposes. */
2896                     if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
2897                         ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
2898                                                   last_pic_structure == PICT_TOP_FIELD);
2899                     }
2900                 } else {
2901                     /* Second field in complementary pair */
2902                     if (!((last_pic_structure   == PICT_TOP_FIELD &&
2903                            s->picture_structure == PICT_BOTTOM_FIELD) ||
2904                           (last_pic_structure   == PICT_BOTTOM_FIELD &&
2905                            s->picture_structure == PICT_TOP_FIELD))) {
2906                         av_log(s->avctx, AV_LOG_ERROR,
2907                                "Invalid field mode combination %d/%d\n",
2908                                last_pic_structure, s->picture_structure);
2909                         s->picture_structure = last_pic_structure;
2910                         s->droppable         = last_pic_droppable;
2911                         return AVERROR_INVALIDDATA;
2912                     } else if (last_pic_droppable != s->droppable) {
2913                         av_log(s->avctx, AV_LOG_ERROR,
2914                                "Cannot combine reference and non-reference fields in the same frame\n");
2915                         av_log_ask_for_sample(s->avctx, NULL);
2916                         s->picture_structure = last_pic_structure;
2917                         s->droppable         = last_pic_droppable;
2918                         return AVERROR_PATCHWELCOME;
2919                     }
2920
2921                     /* Take ownership of this buffer. Note that if another thread owned
2922                      * the first field of this buffer, we're not operating on that pointer,
2923                      * so the original thread is still responsible for reporting progress
2924                      * on that first field (or if that was us, we just did that above).
2925                      * By taking ownership, we assign responsibility to ourselves to
2926                      * report progress on the second field. */
2927                     s0->current_picture_ptr->owner2 = s0;
2928                 }
2929             }
2930         }
2931
2932         while (h->frame_num != h->prev_frame_num &&
2933                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
2934             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
2935             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
2936                    h->frame_num, h->prev_frame_num);
2937             if (ff_h264_frame_start(h) < 0)
2938                 return -1;
2939             h->prev_frame_num++;
2940             h->prev_frame_num %= 1 << h->sps.log2_max_frame_num;
2941             s->current_picture_ptr->frame_num = h->prev_frame_num;
2942             ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
2943             ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 1);
2944             if ((ret = ff_generate_sliding_window_mmcos(h, 1)) < 0 &&
2945                 s->avctx->err_recognition & AV_EF_EXPLODE)
2946                 return ret;
2947             if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
2948                 (s->avctx->err_recognition & AV_EF_EXPLODE))
2949                 return AVERROR_INVALIDDATA;
2950             /* Error concealment: if a ref is missing, copy the previous ref in its place.
2951              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
2952              * about there being no actual duplicates.
2953              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
2954              * concealing a lost frame, this probably isn't noticeable by comparison, but it should
2955              * be fixed. */
2956             if (h->short_ref_count) {
2957                 if (prev) {
2958                     av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
2959                                   (const uint8_t **)prev->f.data, prev->f.linesize,
2960                                   s->avctx->pix_fmt, s->mb_width * 16, s->mb_height * 16);
2961                     h->short_ref[0]->poc = prev->poc + 2;
2962                 }
2963                 h->short_ref[0]->frame_num = h->prev_frame_num;
2964             }
2965         }
2966
2967         /* See if we have a decoded first field looking for a pair...
2968          * We're using that to see whether to continue decoding in that
2969          * frame, or to allocate a new one. */
2970         if (s0->first_field) {
2971             assert(s0->current_picture_ptr);
2972             assert(s0->current_picture_ptr->f.data[0]);
2973             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
2974
2975             /* figure out if we have a complementary field pair */
2976             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
2977                 /* Previous field is unmatched. Don't display it, but let it
2978                  * remain for reference if marked as such. */
2979                 s0->current_picture_ptr = NULL;
2980                 s0->first_field         = FIELD_PICTURE;
2981             } else {
2982                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
2983                     /* This and the previous field had different frame_nums.
2984                      * Consider this field first in pair. Throw away previous
2985                      * one except for reference purposes. */
2986                     s0->first_field         = 1;
2987                     s0->current_picture_ptr = NULL;
2988                 } else {
2989                     /* Second field in complementary pair */
2990                     s0->first_field = 0;
2991                 }
2992             }
2993         } else {
2994             /* Frame or first field in a potentially complementary pair */
2995             s0->first_field = FIELD_PICTURE;
2996         }
2997
2998         if (!FIELD_PICTURE || s0->first_field) {
2999             if (ff_h264_frame_start(h) < 0) {
3000                 s0->first_field = 0;
3001                 return -1;
3002             }
3003         } else {
3004             ff_release_unused_pictures(s, 0);
3005         }
3006     }
3007     if (h != h0 && (ret = clone_slice(h, h0)) < 0)
3008         return ret;
3009
3010     s->current_picture_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
3011
3012     assert(s->mb_num == s->mb_width * s->mb_height);
3013     if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3014         first_mb_in_slice >= s->mb_num) {
3015         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3016         return -1;
3017     }
3018     s->resync_mb_x = s->mb_x =  first_mb_in_slice % s->mb_width;
3019     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3020     if (s->picture_structure == PICT_BOTTOM_FIELD)
3021         s->resync_mb_y = s->mb_y = s->mb_y + 1;
3022     assert(s->mb_y < s->mb_height);
3023
3024     if (s->picture_structure == PICT_FRAME) {
3025         h->curr_pic_num = h->frame_num;
3026         h->max_pic_num  = 1 << h->sps.log2_max_frame_num;
3027     } else {
3028         h->curr_pic_num = 2 * h->frame_num + 1;
3029         h->max_pic_num  = 1 << (h->sps.log2_max_frame_num + 1);
3030     }
3031
3032     if (h->nal_unit_type == NAL_IDR_SLICE)
3033         get_ue_golomb(&s->gb); /* idr_pic_id */
3034
3035     if (h->sps.poc_type == 0) {
3036         h->poc_lsb = get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3037
3038         if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
3039             h->delta_poc_bottom = get_se_golomb(&s->gb);
3040     }
3041
3042     if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
3043         h->delta_poc[0] = get_se_golomb(&s->gb);
3044
3045         if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
3046             h->delta_poc[1] = get_se_golomb(&s->gb);
3047     }
3048
3049     init_poc(h);
3050
3051     if (h->pps.redundant_pic_cnt_present)
3052         h->redundant_pic_count = get_ue_golomb(&s->gb);
3053
3054     // set defaults, might be overridden a few lines later
3055     h->ref_count[0] = h->pps.ref_count[0];
3056     h->ref_count[1] = h->pps.ref_count[1];
3057
3058     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3059         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3060             h->direct_spatial_mv_pred = get_bits1(&s->gb);
3061         num_ref_idx_active_override_flag = get_bits1(&s->gb);
3062
3063         if (num_ref_idx_active_override_flag) {
3064             h->ref_count[0] = get_ue_golomb(&s->gb) + 1;
3065             if (h->ref_count[0] < 1)
3066                 return AVERROR_INVALIDDATA;
3067             if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
3068                 h->ref_count[1] = get_ue_golomb(&s->gb) + 1;
3069                 if (h->ref_count[1] < 1)
3070                     return AVERROR_INVALIDDATA;
3071             }
3072         }
3073
3074         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3075             h->list_count = 2;
3076         else
3077             h->list_count = 1;
3078     } else
3079         h->list_count = 0;
3080
3081     max_refs = s->picture_structure == PICT_FRAME ? 16 : 32;
3082
3083     if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) {
3084         av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3085         h->ref_count[0] = h->ref_count[1] = 1;
3086         return AVERROR_INVALIDDATA;
3087     }
3088
3089     if (!default_ref_list_done)
3090         ff_h264_fill_default_ref_list(h);
3091
3092     if (h->slice_type_nos != AV_PICTURE_TYPE_I &&
3093         ff_h264_decode_ref_pic_list_reordering(h) < 0) {
3094         h->ref_count[1] = h->ref_count[0] = 0;
3095         return -1;
3096     }
3097
3098     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3099         s->last_picture_ptr = &h->ref_list[0][0];
3100         s->last_picture_ptr->owner2 = s;
3101         s->er.last_pic = s->last_picture_ptr;
3102         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3103     }
3104     if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
3105         s->next_picture_ptr = &h->ref_list[1][0];
3106         s->next_picture_ptr->owner2 = s;
3107         s->er.next_pic = s->next_picture_ptr;
3108         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3109     }
3110
3111     if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
3112         (h->pps.weighted_bipred_idc == 1 &&
3113          h->slice_type_nos == AV_PICTURE_TYPE_B))
3114         pred_weight_table(h);
3115     else if (h->pps.weighted_bipred_idc == 2 &&
3116              h->slice_type_nos == AV_PICTURE_TYPE_B) {
3117         implicit_weight_table(h, -1);
3118     } else {
3119         h->use_weight = 0;
3120         for (i = 0; i < 2; i++) {
3121             h->luma_weight_flag[i]   = 0;
3122             h->chroma_weight_flag[i] = 0;
3123         }
3124     }
3125
3126     // If frame-mt is enabled, only update mmco tables for the first slice
3127     // in a field. Subsequent slices can temporarily clobber h->mmco_index
3128     // or h->mmco, which will cause ref list mix-ups and decoding errors
3129     // further down the line. This may break decoding if the first slice is
3130     // corrupt, thus we only do this if frame-mt is enabled.
3131     if (h->nal_ref_idc &&
3132         ff_h264_decode_ref_pic_marking(h0, &s->gb,
3133                             !(s->avctx->active_thread_type & FF_THREAD_FRAME) ||
3134                             h0->current_slice == 0) < 0 &&
3135         (s->avctx->err_recognition & AV_EF_EXPLODE))
3136         return AVERROR_INVALIDDATA;
3137
3138     if (FRAME_MBAFF) {
3139         ff_h264_fill_mbaff_ref_list(h);
3140
3141         if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) {
3142             implicit_weight_table(h, 0);
3143             implicit_weight_table(h, 1);
3144         }
3145     }
3146
3147     if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
3148         ff_h264_direct_dist_scale_factor(h);
3149     ff_h264_direct_ref_list_init(h);
3150
3151     if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) {
3152         tmp = get_ue_golomb_31(&s->gb);
3153         if (tmp > 2) {
3154             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3155             return -1;
3156         }
3157         h->cabac_init_idc = tmp;
3158     }
3159
3160     h->last_qscale_diff = 0;
3161     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3162     if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) {
3163         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3164         return -1;
3165     }
3166     s->qscale       = tmp;
3167     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3168     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3169     // FIXME qscale / qp ... stuff
3170     if (h->slice_type == AV_PICTURE_TYPE_SP)
3171         get_bits1(&s->gb); /* sp_for_switch_flag */
3172     if (h->slice_type == AV_PICTURE_TYPE_SP ||
3173         h->slice_type == AV_PICTURE_TYPE_SI)
3174         get_se_golomb(&s->gb); /* slice_qs_delta */
3175
3176     h->deblocking_filter     = 1;
3177     h->slice_alpha_c0_offset = 52;
3178     h->slice_beta_offset     = 52;
3179     if (h->pps.deblocking_filter_parameters_present) {
3180         tmp = get_ue_golomb_31(&s->gb);
3181         if (tmp > 2) {
3182             av_log(s->avctx, AV_LOG_ERROR,
3183                    "deblocking_filter_idc %u out of range\n", tmp);
3184             return -1;
3185         }
3186         h->deblocking_filter = tmp;
3187         if (h->deblocking_filter < 2)
3188             h->deblocking_filter ^= 1;  // 1<->0
3189
3190         if (h->deblocking_filter) {
3191             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
3192             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
3193             if (h->slice_alpha_c0_offset > 104U ||
3194                 h->slice_beta_offset     > 104U) {
3195                 av_log(s->avctx, AV_LOG_ERROR,
3196                        "deblocking filter parameters %d %d out of range\n",
3197                        h->slice_alpha_c0_offset, h->slice_beta_offset);
3198                 return -1;
3199             }
3200         }
3201     }
3202
3203     if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
3204         (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&
3205          h->slice_type_nos != AV_PICTURE_TYPE_I) ||
3206         (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  &&
3207          h->slice_type_nos == AV_PICTURE_TYPE_B) ||
3208         (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
3209          h->nal_ref_idc == 0))
3210         h->deblocking_filter = 0;
3211
3212     if (h->deblocking_filter == 1 && h0->max_contexts > 1) {
3213         if (s->avctx->flags2 & CODEC_FLAG2_FAST) {
3214             /* Cheat slightly for speed:
3215              * Do not bother to deblock across slices. */
3216             h->deblocking_filter = 2;
3217         } else {
3218             h0->max_contexts = 1;
3219             if (!h0->single_decode_warning) {
3220                 av_log(s->avctx, AV_LOG_INFO,
3221                        "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3222                 h0->single_decode_warning = 1;
3223             }
3224             if (h != h0) {
3225                 av_log(h->s.avctx, AV_LOG_ERROR,
3226                        "Deblocking switched inside frame.\n");
3227                 return 1;
3228             }
3229         }
3230     }
3231     h->qp_thresh = 15 + 52 -
3232                    FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) -
3233                    FFMAX3(0,
3234                           h->pps.chroma_qp_index_offset[0],
3235                           h->pps.chroma_qp_index_offset[1]) +
3236                    6 * (h->sps.bit_depth_luma - 8);
3237
3238     h0->last_slice_type = slice_type;
3239     h->slice_num = ++h0->current_slice;
3240     if (h->slice_num >= MAX_SLICES) {
3241         av_log(s->avctx, AV_LOG_ERROR,
3242                "Too many slices, increase MAX_SLICES and recompile\n");
3243     }
3244
3245     for (j = 0; j < 2; j++) {
3246         int id_list[16];
3247         int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j];
3248         for (i = 0; i < 16; i++) {
3249             id_list[i] = 60;
3250             if (h->ref_list[j][i].f.data[0]) {
3251                 int k;
3252                 uint8_t *base = h->ref_list[j][i].f.base[0];
3253                 for (k = 0; k < h->short_ref_count; k++)
3254                     if (h->short_ref[k]->f.base[0] == base) {
3255                         id_list[i] = k;
3256                         break;
3257                     }
3258                 for (k = 0; k < h->long_ref_count; k++)
3259                     if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
3260                         id_list[i] = h->short_ref_count + k;
3261                         break;
3262                     }
3263             }
3264         }
3265
3266         ref2frm[0]     =
3267             ref2frm[1] = -1;
3268         for (i = 0; i < 16; i++)
3269             ref2frm[i + 2] = 4 * id_list[i] +
3270                              (h->ref_list[j][i].f.reference & 3);
3271         ref2frm[18 + 0]     =
3272             ref2frm[18 + 1] = -1;
3273         for (i = 16; i < 48; i++)
3274             ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] +
3275                              (h->ref_list[j][i].f.reference & 3);
3276     }
3277
3278     // FIXME: fix draw_edges + PAFF + frame threads
3279     h->emu_edge_width  = (s->flags & CODEC_FLAG_EMU_EDGE ||
3280                           (!h->sps.frame_mbs_only_flag &&
3281                            s->avctx->active_thread_type))
3282                          ? 0 : 16;
3283     h->emu_edge_height = (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3284
3285     if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
3286         av_log(h->s.avctx, AV_LOG_DEBUG,
3287                "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3288                h->slice_num,
3289                (s->picture_structure == PICT_FRAME ? "F" : s->picture_structure == PICT_TOP_FIELD ? "T" : "B"),
3290                first_mb_in_slice,
3291                av_get_picture_type_char(h->slice_type),
3292                h->slice_type_fixed ? " fix" : "",
3293                h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3294                pps_id, h->frame_num,
3295                s->current_picture_ptr->field_poc[0],
3296                s->current_picture_ptr->field_poc[1],
3297                h->ref_count[0], h->ref_count[1],
3298                s->qscale,
3299                h->deblocking_filter,
3300                h->slice_alpha_c0_offset / 2 - 26, h->slice_beta_offset / 2 - 26,
3301                h->use_weight,
3302                h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
3303                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
3304     }
3305
3306     return 0;
3307 }
3308
3309 int ff_h264_get_slice_type(const H264Context *h)
3310 {
3311     switch (h->slice_type) {
3312     case AV_PICTURE_TYPE_P:
3313         return 0;
3314     case AV_PICTURE_TYPE_B:
3315         return 1;
3316     case AV_PICTURE_TYPE_I:
3317         return 2;
3318     case AV_PICTURE_TYPE_SP:
3319         return 3;
3320     case AV_PICTURE_TYPE_SI:
3321         return 4;
3322     default:
3323         return -1;
3324     }
3325 }
3326
3327 static av_always_inline void fill_filter_caches_inter(H264Context *h,
3328                                                       MpegEncContext *const s,
3329                                                       int mb_type, int top_xy,
3330                                                       int left_xy[LEFT_MBS],
3331                                                       int top_type,
3332                                                       int left_type[LEFT_MBS],
3333                                                       int mb_xy, int list)
3334 {
3335     int b_stride = h->b_stride;
3336     int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
3337     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
3338     if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) {
3339         if (USES_LIST(top_type, list)) {
3340             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
3341             const int b8_xy = 4 * top_xy + 2;
3342             int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2);
3343             AV_COPY128(mv_dst - 1 * 8, s->current_picture.f.motion_val[list][b_xy + 0]);
3344             ref_cache[0 - 1 * 8] =
3345             ref_cache[1 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
3346             ref_cache[2 - 1 * 8] =
3347             ref_cache[3 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
3348         } else {
3349             AV_ZERO128(mv_dst - 1 * 8);
3350             AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3351         }
3352
3353         if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) {
3354             if (USES_LIST(left_type[LTOP], list)) {
3355                 const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
3356                 const int b8_xy = 4 * left_xy[LTOP] + 1;
3357                 int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2);
3358                 AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride * 0]);
3359                 AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride * 1]);
3360                 AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride * 2]);
3361                 AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride * 3]);
3362                 ref_cache[-1 +  0] =
3363                 ref_cache[-1 +  8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2 * 0]];
3364                 ref_cache[-1 + 16] =
3365                 ref_cache[-1 + 24] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2 * 1]];
3366             } else {
3367                 AV_ZERO32(mv_dst - 1 +  0);
3368                 AV_ZERO32(mv_dst - 1 +  8);
3369                 AV_ZERO32(mv_dst - 1 + 16);
3370                 AV_ZERO32(mv_dst - 1 + 24);
3371                 ref_cache[-1 +  0] =
3372                 ref_cache[-1 +  8] =
3373                 ref_cache[-1 + 16] =
3374                 ref_cache[-1 + 24] = LIST_NOT_USED;
3375             }
3376         }
3377     }
3378
3379     if (!USES_LIST(mb_type, list)) {
3380         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4);
3381         AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3382         AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3383         AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3384         AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3385         return;
3386     }
3387
3388     {
3389         int8_t *ref = &s->current_picture.f.ref_index[list][4 * mb_xy];
3390         int (*ref2frm)[64] = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2);
3391         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
3392         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
3393         AV_WN32A(&ref_cache[0 * 8], ref01);
3394         AV_WN32A(&ref_cache[1 * 8], ref01);
3395         AV_WN32A(&ref_cache[2 * 8], ref23);
3396         AV_WN32A(&ref_cache[3 * 8], ref23);
3397     }
3398
3399     {
3400         int16_t(*mv_src)[2] = &s->current_picture.f.motion_val[list][4 * s->mb_x + 4 * s->mb_y * b_stride];
3401         AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride);
3402         AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride);
3403         AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride);
3404         AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride);
3405     }
3406 }
3407
3408 /**
3409  *
3410  * @return non zero if the loop filter can be skipped
3411  */
3412 static int fill_filter_caches(H264Context *h, int mb_type)
3413 {
3414     MpegEncContext *const s = &h->s;
3415     const int mb_xy = h->mb_xy;
3416     int top_xy, left_xy[LEFT_MBS];
3417     int top_type, left_type[LEFT_MBS];
3418     uint8_t *nnz;
3419     uint8_t *nnz_cache;
3420
3421     top_xy = mb_xy - (s->mb_stride << MB_FIELD);
3422
3423     /* Wow, what a mess, why didn't they simplify the interlacing & intra
3424      * stuff, I can't imagine that these complex rules are worth it. */
3425
3426     left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
3427     if (FRAME_MBAFF) {
3428         const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
3429         const int curr_mb_field_flag = IS_INTERLACED(mb_type);
3430         if (s->mb_y & 1) {
3431             if (left_mb_field_flag != curr_mb_field_flag)
3432                 left_xy[LTOP] -= s->mb_stride;
3433         } else {
3434             if (curr_mb_field_flag)
3435                 top_xy += s->mb_stride &
3436                     (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
3437             if (left_mb_field_flag != curr_mb_field_flag)
3438                 left_xy[LBOT] += s->mb_stride;
3439         }
3440     }
3441
3442     h->top_mb_xy        = top_xy;
3443     h->left_mb_xy[LTOP] = left_xy[LTOP];
3444     h->left_mb_xy[LBOT] = left_xy[LBOT];
3445     {
3446         /* For sufficiently low qp, filtering wouldn't do anything.
3447          * This is a conservative estimate: could also check beta_offset
3448          * and more accurate chroma_qp. */
3449         int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice
3450         int qp        = s->current_picture.f.qscale_table[mb_xy];
3451         if (qp <= qp_thresh &&
3452             (left_xy[LTOP] < 0 ||
3453              ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) &&
3454             (top_xy < 0 ||
3455              ((qp + s->current_picture.f.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) {
3456             if (!FRAME_MBAFF)
3457                 return 1;
3458             if ((left_xy[LTOP] < 0 ||
3459                  ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) &&
3460                 (top_xy < s->mb_stride ||
3461                  ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
3462                 return 1;
3463         }
3464     }
3465
3466     top_type        = s->current_picture.f.mb_type[top_xy];
3467     left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
3468     left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
3469     if (h->deblocking_filter == 2) {
3470         if (h->slice_table[top_xy] != h->slice_num)
3471             top_type = 0;
3472         if (h->slice_table[left_xy[LBOT]] != h->slice_num)
3473             left_type[LTOP] = left_type[LBOT] = 0;
3474     } else {
3475         if (h->slice_table[top_xy] == 0xFFFF)
3476             top_type = 0;
3477         if (h->slice_table[left_xy[LBOT]] == 0xFFFF)
3478             left_type[LTOP] = left_type[LBOT] = 0;
3479     }
3480     h->top_type        = top_type;
3481     h->left_type[LTOP] = left_type[LTOP];
3482     h->left_type[LBOT] = left_type[LBOT];
3483
3484     if (IS_INTRA(mb_type))
3485         return 0;
3486
3487     fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy,
3488                              top_type, left_type, mb_xy, 0);
3489     if (h->list_count == 2)
3490         fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy,
3491                                  top_type, left_type, mb_xy, 1);
3492
3493     nnz       = h->non_zero_count[mb_xy];
3494     nnz_cache = h->non_zero_count_cache;
3495     AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]);
3496     AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]);
3497     AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]);
3498     AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]);
3499     h->cbp = h->cbp_table[mb_xy];
3500
3501     if (top_type) {
3502         nnz = h->non_zero_count[top_xy];
3503         AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]);
3504     }
3505
3506     if (left_type[LTOP]) {
3507         nnz = h->non_zero_count[left_xy[LTOP]];
3508         nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4];
3509         nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4];
3510         nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4];
3511         nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4];
3512     }
3513
3514     /* CAVLC 8x8dct requires NNZ values for residual decoding that differ
3515      * from what the loop filter needs */
3516     if (!CABAC && h->pps.transform_8x8_mode) {
3517         if (IS_8x8DCT(top_type)) {
3518             nnz_cache[4 + 8 * 0]     =
3519                 nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12;
3520             nnz_cache[6 + 8 * 0]     =
3521                 nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12;
3522         }
3523         if (IS_8x8DCT(left_type[LTOP])) {
3524             nnz_cache[3 + 8 * 1]     =
3525                 nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF
3526         }
3527         if (IS_8x8DCT(left_type[LBOT])) {
3528             nnz_cache[3 + 8 * 3]     =
3529                 nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF
3530         }
3531
3532         if (IS_8x8DCT(mb_type)) {
3533             nnz_cache[scan8[0]] =
3534             nnz_cache[scan8[1]] =
3535             nnz_cache[scan8[2]] =
3536             nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12;
3537
3538             nnz_cache[scan8[0 + 4]] =
3539             nnz_cache[scan8[1 + 4]] =
3540             nnz_cache[scan8[2 + 4]] =
3541             nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12;
3542
3543             nnz_cache[scan8[0 + 8]] =
3544             nnz_cache[scan8[1 + 8]] =
3545             nnz_cache[scan8[2 + 8]] =
3546             nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12;
3547
3548             nnz_cache[scan8[0 + 12]] =
3549             nnz_cache[scan8[1 + 12]] =
3550             nnz_cache[scan8[2 + 12]] =
3551             nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12;
3552         }
3553     }
3554
3555     return 0;
3556 }
3557
3558 static void loop_filter(H264Context *h, int start_x, int end_x)
3559 {
3560     MpegEncContext *const s = &h->s;
3561     uint8_t *dest_y, *dest_cb, *dest_cr;
3562     int linesize, uvlinesize, mb_x, mb_y;
3563     const int end_mb_y       = s->mb_y + FRAME_MBAFF;
3564     const int old_slice_type = h->slice_type;
3565     const int pixel_shift    = h->pixel_shift;
3566     const int block_h        = 16 >> s->chroma_y_shift;
3567
3568     if (h->deblocking_filter) {
3569         for (mb_x = start_x; mb_x < end_x; mb_x++)
3570             for (mb_y = end_mb_y - FRAME_MBAFF; mb_y <= end_mb_y; mb_y++) {
3571                 int mb_xy, mb_type;
3572                 mb_xy         = h->mb_xy = mb_x + mb_y * s->mb_stride;
3573                 h->slice_num  = h->slice_table[mb_xy];
3574                 mb_type       = s->current_picture.f.mb_type[mb_xy];
3575                 h->list_count = h->list_counts[mb_xy];
3576
3577                 if (FRAME_MBAFF)
3578                     h->mb_mbaff               =
3579                     h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
3580
3581                 s->mb_x = mb_x;
3582                 s->mb_y = mb_y;
3583                 dest_y  = s->current_picture.f.data[0] +
3584                           ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
3585                 dest_cb = s->current_picture.f.data[1] +
3586                           (mb_x << pixel_shift) * (8 << CHROMA444) +
3587                           mb_y * s->uvlinesize * block_h;
3588                 dest_cr = s->current_picture.f.data[2] +
3589                           (mb_x << pixel_shift) * (8 << CHROMA444) +
3590                           mb_y * s->uvlinesize * block_h;
3591                 // FIXME simplify above
3592
3593                 if (MB_FIELD) {
3594                     linesize   = h->mb_linesize   = s->linesize   * 2;
3595                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3596                     if (mb_y & 1) { // FIXME move out of this function?
3597                         dest_y  -= s->linesize   * 15;
3598                         dest_cb -= s->uvlinesize * (block_h - 1);
3599                         dest_cr -= s->uvlinesize * (block_h - 1);
3600                     }
3601                 } else {
3602                     linesize   = h->mb_linesize   = s->linesize;
3603                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3604                 }
3605                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
3606                                  uvlinesize, 0);
3607                 if (fill_filter_caches(h, mb_type))
3608                     continue;
3609                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
3610                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
3611
3612                 if (FRAME_MBAFF) {
3613                     ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr,
3614                                       linesize, uvlinesize);
3615                 } else {
3616                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb,
3617                                            dest_cr, linesize, uvlinesize);
3618                 }
3619             }
3620     }
3621     h->slice_type   = old_slice_type;
3622     s->mb_x         = end_x;
3623     s->mb_y         = end_mb_y - FRAME_MBAFF;
3624     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3625     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3626 }
3627
3628 static void predict_field_decoding_flag(H264Context *h)
3629 {
3630     MpegEncContext *const s = &h->s;
3631     const int mb_xy = s->mb_x + s->mb_y * s->mb_stride;
3632     int mb_type     = (h->slice_table[mb_xy - 1] == h->slice_num) ?
3633                       s->current_picture.f.mb_type[mb_xy - 1] :
3634                       (h->slice_table[mb_xy - s->mb_stride] == h->slice_num) ?
3635                       s->current_picture.f.mb_type[mb_xy - s->mb_stride] : 0;
3636     h->mb_mbaff     = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
3637 }
3638
3639 /**
3640  * Draw edges and report progress for the last MB row.
3641  */
3642 static void decode_finish_row(H264Context *h)
3643 {
3644     MpegEncContext *const s = &h->s;
3645     int top            = 16 * (s->mb_y      >> FIELD_PICTURE);
3646     int pic_height     = 16 *  s->mb_height >> FIELD_PICTURE;
3647     int height         =  16      << FRAME_MBAFF;
3648     int deblock_border = (16 + 4) << FRAME_MBAFF;
3649
3650     if (h->deblocking_filter) {
3651         if ((top + height) >= pic_height)
3652             height += deblock_border;
3653         top -= deblock_border;
3654     }
3655
3656     if (top >= pic_height || (top + height) < h->emu_edge_height)
3657         return;
3658
3659     height = FFMIN(height, pic_height - top);
3660     if (top < h->emu_edge_height) {
3661         height = top + height;
3662         top    = 0;
3663     }
3664
3665     ff_mpeg_draw_horiz_band(s, top, height);
3666
3667     if (s->droppable)
3668         return;
3669
3670     ff_thread_report_progress(&s->current_picture_ptr->f, top + height - 1,
3671                               s->picture_structure == PICT_BOTTOM_FIELD);
3672 }
3673
3674 static void er_add_slice(H264Context *h, int startx, int starty,
3675                          int endx, int endy, int status)
3676 {
3677     ERContext *er = &h->s.er;
3678
3679     er->ref_count = h->ref_count[0];
3680     ff_er_add_slice(er, startx, starty, endx, endy, status);
3681 }
3682
3683 static int decode_slice(struct AVCodecContext *avctx, void *arg)
3684 {
3685     H264Context *h = *(void **)arg;
3686     MpegEncContext *const s = &h->s;
3687     int lf_x_start = s->mb_x;
3688
3689     s->mb_skip_run = -1;
3690
3691     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME ||
3692                     s->codec_id != AV_CODEC_ID_H264 ||
3693                     (CONFIG_GRAY && (s->flags & CODEC_FLAG_GRAY));
3694
3695     if (h->pps.cabac) {
3696         /* realign */
3697         align_get_bits(&s->gb);
3698
3699         /* init cabac */
3700         ff_init_cabac_states(&h->cabac);
3701         ff_init_cabac_decoder(&h->cabac,
3702                               s->gb.buffer + get_bits_count(&s->gb) / 8,
3703                               (get_bits_left(&s->gb) + 7) / 8);
3704
3705         ff_h264_init_cabac_states(h);
3706
3707         for (;;) {
3708             // START_TIMER
3709             int ret = ff_h264_decode_mb_cabac(h);
3710             int eos;
3711             // STOP_TIMER("decode_mb_cabac")
3712
3713             if (ret >= 0)
3714                 ff_h264_hl_decode_mb(h);
3715
3716             // FIXME optimal? or let mb_decode decode 16x32 ?
3717             if (ret >= 0 && FRAME_MBAFF) {
3718                 s->mb_y++;
3719
3720                 ret = ff_h264_decode_mb_cabac(h);
3721
3722                 if (ret >= 0)
3723                     ff_h264_hl_decode_mb(h);
3724                 s->mb_y--;
3725             }
3726             eos = get_cabac_terminate(&h->cabac);
3727
3728             if ((s->workaround_bugs & FF_BUG_TRUNCATED) &&
3729                 h->cabac.bytestream > h->cabac.bytestream_end + 2) {
3730                 er_add_slice(h, s->resync_mb_x, s->resync_mb_y, s->mb_x - 1,
3731                                 s->mb_y, ER_MB_END);
3732                 if (s->mb_x >= lf_x_start)
3733                     loop_filter(h, lf_x_start, s->mb_x + 1);
3734                 return 0;
3735             }
3736             if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
3737                 av_log(h->s.avctx, AV_LOG_ERROR,
3738                        "error while decoding MB %d %d, bytestream (%td)\n",
3739                        s->mb_x, s->mb_y,
3740                        h->cabac.bytestream_end - h->cabac.bytestream);
3741                 er_add_slice(h, s->resync_mb_x, s->resync_mb_y, s->mb_x,
3742                                 s->mb_y, ER_MB_ERROR);
3743                 return -1;
3744             }
3745
3746             if (++s->mb_x >= s->mb_width) {
3747                 loop_filter(h, lf_x_start, s->mb_x);
3748                 s->mb_x = lf_x_start = 0;
3749                 decode_finish_row(h);
3750                 ++s->mb_y;
3751                 if (FIELD_OR_MBAFF_PICTURE) {
3752                     ++s->mb_y;
3753                     if (FRAME_MBAFF && s->mb_y < s->mb_height)
3754                         predict_field_decoding_flag(h);
3755                 }
3756             }
3757
3758             if (eos || s->mb_y >= s->mb_height) {
3759                 tprintf(s->avctx, "slice end %d %d\n",
3760                         get_bits_count(&s->gb), s->gb.size_in_bits);
3761                 er_add_slice(h, s->resync_mb_x, s->resync_mb_y, s->mb_x - 1,
3762                                 s->mb_y, ER_MB_END);
3763                 if (s->mb_x > lf_x_start)
3764                     loop_filter(h, lf_x_start, s->mb_x);
3765                 return 0;
3766             }
3767         }
3768     } else {
3769         for (;;) {
3770             int ret = ff_h264_decode_mb_cavlc(h);
3771
3772             if (ret >= 0)
3773                 ff_h264_hl_decode_mb(h);
3774
3775             // FIXME optimal? or let mb_decode decode 16x32 ?
3776             if (ret >= 0 && FRAME_MBAFF) {
3777                 s->mb_y++;
3778                 ret = ff_h264_decode_mb_cavlc(h);
3779
3780                 if (ret >= 0)
3781                     ff_h264_hl_decode_mb(h);
3782                 s->mb_y--;
3783             }
3784
3785             if (ret < 0) {
3786                 av_log(h->s.avctx, AV_LOG_ERROR,
3787                        "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
3788                 er_add_slice(h, s->resync_mb_x, s->resync_mb_y, s->mb_x,
3789                                 s->mb_y, ER_MB_ERROR);
3790                 return -1;
3791             }
3792
3793             if (++s->mb_x >= s->mb_width) {
3794                 loop_filter(h, lf_x_start, s->mb_x);
3795                 s->mb_x = lf_x_start = 0;
3796                 decode_finish_row(h);
3797                 ++s->mb_y;
3798                 if (FIELD_OR_MBAFF_PICTURE) {
3799                     ++s->mb_y;
3800                     if (FRAME_MBAFF && s->mb_y < s->mb_height)
3801                         predict_field_decoding_flag(h);
3802                 }
3803                 if (s->mb_y >= s->mb_height) {
3804                     tprintf(s->avctx, "slice end %d %d\n",
3805                             get_bits_count(&s->gb), s->gb.size_in_bits);
3806
3807                     if (get_bits_left(&s->gb) == 0) {
3808                         er_add_slice(h, s->resync_mb_x, s->resync_mb_y,
3809                                         s->mb_x - 1, s->mb_y,
3810                                         ER_MB_END);
3811
3812                         return 0;
3813                     } else {
3814                         er_add_slice(h, s->resync_mb_x, s->resync_mb_y,
3815                                         s->mb_x - 1, s->mb_y,
3816                                         ER_MB_END);
3817
3818                         return -1;
3819                     }
3820                 }
3821             }
3822
3823             if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0) {
3824                 tprintf(s->avctx, "slice end %d %d\n",
3825                         get_bits_count(&s->gb), s->gb.size_in_bits);
3826                 if (get_bits_left(&s->gb) == 0) {
3827                     er_add_slice(h, s->resync_mb_x, s->resync_mb_y,
3828                                     s->mb_x - 1, s->mb_y,
3829                                     ER_MB_END);
3830                     if (s->mb_x > lf_x_start)
3831                         loop_filter(h, lf_x_start, s->mb_x);
3832
3833                     return 0;
3834                 } else {
3835                     er_add_slice(h, s->resync_mb_x, s->resync_mb_y, s->mb_x,
3836                                     s->mb_y, ER_MB_ERROR);
3837
3838                     return -1;
3839                 }
3840             }
3841         }
3842     }
3843 }
3844
3845 /**
3846  * Call decode_slice() for each context.
3847  *
3848  * @param h h264 master context
3849  * @param context_count number of contexts to execute
3850  */
3851 static int execute_decode_slices(H264Context *h, int context_count)
3852 {
3853     MpegEncContext *const s     = &h->s;
3854     AVCodecContext *const avctx = s->avctx;
3855     H264Context *hx;
3856     int i;
3857
3858     if (s->avctx->hwaccel ||
3859         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
3860         return 0;
3861     if (context_count == 1) {
3862         return decode_slice(avctx, &h);
3863     } else {
3864         for (i = 1; i < context_count; i++) {
3865             hx                    = h->thread_context[i];
3866             hx->s.err_recognition = avctx->err_recognition;
3867             hx->s.er.error_count  = 0;
3868         }
3869
3870         avctx->execute(avctx, decode_slice, h->thread_context,
3871                        NULL, context_count, sizeof(void *));
3872
3873         /* pull back stuff from slices to master context */
3874         hx                   = h->thread_context[context_count - 1];
3875         s->mb_x              = hx->s.mb_x;
3876         s->mb_y              = hx->s.mb_y;
3877         s->droppable         = hx->s.droppable;
3878         s->picture_structure = hx->s.picture_structure;
3879         for (i = 1; i < context_count; i++)
3880             h->s.er.error_count += h->thread_context[i]->s.er.error_count;
3881     }
3882
3883     return 0;
3884 }
3885
3886 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
3887                             int parse_extradata)
3888 {
3889     MpegEncContext *const s     = &h->s;
3890     AVCodecContext *const avctx = s->avctx;
3891     H264Context *hx; ///< thread context
3892     int buf_index;
3893     int context_count;
3894     int next_avc;
3895     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
3896     int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts
3897     int nal_index;
3898
3899     h->max_contexts = s->slice_context_count;
3900     if (!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
3901         h->current_slice = 0;
3902         if (!s->first_field)
3903             s->current_picture_ptr = NULL;
3904         ff_h264_reset_sei(h);
3905     }
3906
3907     for (; pass <= 1; pass++) {
3908         buf_index     = 0;
3909         context_count = 0;
3910         next_avc      = h->is_avc ? 0 : buf_size;
3911         nal_index     = 0;
3912         for (;;) {
3913             int consumed;
3914             int dst_length;
3915             int bit_length;
3916             const uint8_t *ptr;
3917             int i, nalsize = 0;
3918             int err;
3919
3920             if (buf_index >= next_avc) {
3921                 if (buf_index >= buf_size - h->nal_length_size)
3922                     break;
3923                 nalsize = 0;
3924                 for (i = 0; i < h->nal_length_size; i++)
3925                     nalsize = (nalsize << 8) | buf[buf_index++];
3926                 if (nalsize <= 0 || nalsize > buf_size - buf_index) {
3927                     av_log(h->s.avctx, AV_LOG_ERROR,
3928                            "AVC: nal size %d\n", nalsize);
3929                     break;
3930                 }
3931                 next_avc = buf_index + nalsize;
3932             } else {
3933                 // start code prefix search
3934                 for (; buf_index + 3 < next_avc; buf_index++)
3935                     // This should always succeed in the first iteration.
3936                     if (buf[buf_index]     == 0 &&
3937                         buf[buf_index + 1] == 0 &&
3938                         buf[buf_index + 2] == 1)
3939                         break;
3940
3941                 if (buf_index + 3 >= buf_size) {
3942                     buf_index = buf_size;
3943                     break;
3944                 }
3945
3946                 buf_index += 3;
3947                 if (buf_index >= next_avc)
3948                     continue;
3949             }
3950
3951             hx = h->thread_context[context_count];
3952
3953             ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
3954                                      &consumed, next_avc - buf_index);
3955             if (ptr == NULL || dst_length < 0) {
3956                 buf_index = -1;
3957                 goto end;
3958             }
3959             i = buf_index + consumed;
3960             if ((s->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc &&
3961                 buf[i]     == 0x00 && buf[i + 1] == 0x00 &&
3962                 buf[i + 2] == 0x01 && buf[i + 3] == 0xE0)
3963                 s->workaround_bugs |= FF_BUG_TRUNCATED;
3964
3965             if (!(s->workaround_bugs & FF_BUG_TRUNCATED))
3966                 while (ptr[dst_length - 1] == 0 && dst_length > 0)
3967                     dst_length--;
3968             bit_length = !dst_length ? 0
3969                                      : (8 * dst_length -
3970                                         decode_rbsp_trailing(h, ptr + dst_length - 1));
3971
3972             if (s->avctx->debug & FF_DEBUG_STARTCODE)
3973                 av_log(h->s.avctx, AV_LOG_DEBUG,
3974                        "NAL %d at %d/%d length %d\n",
3975                        hx->nal_unit_type, buf_index, buf_size, dst_length);
3976
3977             if (h->is_avc && (nalsize != consumed) && nalsize)
3978                 av_log(h->s.avctx, AV_LOG_DEBUG,
3979                        "AVC: Consumed only %d bytes instead of %d\n",
3980                        consumed, nalsize);
3981
3982             buf_index += consumed;
3983             nal_index++;
3984
3985             if (pass == 0) {
3986                 /* packets can sometimes contain multiple PPS/SPS,
3987                  * e.g. two PAFF field pictures in one packet, or a demuxer
3988                  * which splits NALs strangely if so, when frame threading we
3989                  * can't start the next thread until we've read all of them */
3990                 switch (hx->nal_unit_type) {
3991                 case NAL_SPS:
3992                 case NAL_PPS:
3993                     nals_needed = nal_index;
3994                     break;
3995                 case NAL_DPA:
3996                 case NAL_IDR_SLICE:
3997                 case NAL_SLICE:
3998                     init_get_bits(&hx->s.gb, ptr, bit_length);
3999                     if (!get_ue_golomb(&hx->s.gb))
4000                         nals_needed = nal_index;
4001                 }
4002                 continue;
4003             }
4004
4005             // FIXME do not discard SEI id
4006             if (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
4007                 continue;
4008
4009 again:
4010             /* Ignore every NAL unit type except PPS and SPS during extradata
4011              * parsing. Decoding slices is not possible in codec init
4012              * with frame-mt */
4013             if (parse_extradata && HAVE_THREADS &&
4014                 (s->avctx->active_thread_type & FF_THREAD_FRAME) &&
4015                 (hx->nal_unit_type != NAL_PPS &&
4016                  hx->nal_unit_type != NAL_SPS)) {
4017                 av_log(avctx, AV_LOG_INFO, "Ignoring NAL unit %d during "
4018                        "extradata parsing\n", hx->nal_unit_type);
4019                 hx->nal_unit_type = NAL_FF_IGNORE;
4020             }
4021             err = 0;
4022             switch (hx->nal_unit_type) {
4023             case NAL_IDR_SLICE:
4024                 if (h->nal_unit_type != NAL_IDR_SLICE) {
4025                     av_log(h->s.avctx, AV_LOG_ERROR,
4026                            "Invalid mix of idr and non-idr slices\n");
4027                     buf_index = -1;
4028                     goto end;
4029                 }
4030                 idr(h); // FIXME ensure we don't lose some frames if there is reordering
4031             case NAL_SLICE:
4032                 init_get_bits(&hx->s.gb, ptr, bit_length);
4033                 hx->intra_gb_ptr        =
4034                     hx->inter_gb_ptr    = &hx->s.gb;
4035                 hx->s.data_partitioning = 0;
4036
4037                 if ((err = decode_slice_header(hx, h)))
4038                     break;
4039
4040                 s->current_picture_ptr->f.key_frame |=
4041                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
4042                     (h->sei_recovery_frame_cnt >= 0);
4043
4044                 if (h->current_slice == 1) {
4045                     if (!(s->flags2 & CODEC_FLAG2_CHUNKS))
4046                         decode_postinit(h, nal_index >= nals_needed);
4047
4048                     if (s->avctx->hwaccel &&
4049                         s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
4050                         return -1;
4051                     if (CONFIG_H264_VDPAU_DECODER &&
4052                         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
4053                         ff_vdpau_h264_picture_start(s);
4054                 }
4055
4056                 if (hx->redundant_pic_count == 0 &&
4057                     (avctx->skip_frame < AVDISCARD_NONREF ||
4058                      hx->nal_ref_idc) &&
4059                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
4060                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
4061                     (avctx->skip_frame < AVDISCARD_NONKEY ||
4062                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
4063                     avctx->skip_frame < AVDISCARD_ALL) {
4064                     if (avctx->hwaccel) {
4065                         if (avctx->hwaccel->decode_slice(avctx,
4066                                                          &buf[buf_index - consumed],
4067                                                          consumed) < 0)
4068                             return -1;
4069                     } else if (CONFIG_H264_VDPAU_DECODER &&
4070                                s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) {
4071                         static const uint8_t start_code[] = {
4072                             0x00, 0x00, 0x01 };
4073                         ff_vdpau_add_data_chunk(s, start_code,
4074                                                 sizeof(start_code));
4075                         ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed],
4076                                                 consumed);
4077                     } else
4078                         context_count++;
4079                 }
4080                 break;
4081             case NAL_DPA:
4082                 init_get_bits(&hx->s.gb, ptr, bit_length);
4083                 hx->intra_gb_ptr =
4084                 hx->inter_gb_ptr = NULL;
4085
4086                 if ((err = decode_slice_header(hx, h)) < 0)
4087                     break;
4088
4089                 hx->s.data_partitioning = 1;
4090                 break;
4091             case NAL_DPB:
4092                 init_get_bits(&hx->intra_gb, ptr, bit_length);
4093                 hx->intra_gb_ptr = &hx->intra_gb;
4094                 break;
4095             case NAL_DPC:
4096                 init_get_bits(&hx->inter_gb, ptr, bit_length);
4097                 hx->inter_gb_ptr = &hx->inter_gb;
4098
4099                 if (hx->redundant_pic_count == 0 &&
4100                     hx->intra_gb_ptr &&
4101                     hx->s.data_partitioning &&
4102                     s->current_picture_ptr &&
4103                     s->context_initialized &&
4104                     (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) &&
4105                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
4106                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
4107                     (avctx->skip_frame < AVDISCARD_NONKEY ||
4108                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
4109                     avctx->skip_frame < AVDISCARD_ALL)
4110                     context_count++;
4111                 break;
4112             case NAL_SEI:
4113                 init_get_bits(&s->gb, ptr, bit_length);
4114                 ff_h264_decode_sei(h);
4115                 break;
4116             case NAL_SPS:
4117                 init_get_bits(&s->gb, ptr, bit_length);
4118                 if (ff_h264_decode_seq_parameter_set(h) < 0 &&
4119                     h->is_avc && (nalsize != consumed) && nalsize) {
4120                     av_log(h->s.avctx, AV_LOG_DEBUG,
4121                            "SPS decoding failure, trying again with the complete NAL\n");
4122                     init_get_bits(&s->gb, buf + buf_index + 1 - consumed,
4123                                   8 * (nalsize - 1));
4124                     ff_h264_decode_seq_parameter_set(h);
4125                 }
4126
4127                 if (h264_set_parameter_from_sps(h) < 0) {
4128                     buf_index = -1;
4129                     goto end;
4130                 }
4131                 break;
4132             case NAL_PPS:
4133                 init_get_bits(&s->gb, ptr, bit_length);
4134                 ff_h264_decode_picture_parameter_set(h, bit_length);
4135                 break;
4136             case NAL_AUD:
4137             case NAL_END_SEQUENCE:
4138             case NAL_END_STREAM:
4139             case NAL_FILLER_DATA:
4140             case NAL_SPS_EXT:
4141             case NAL_AUXILIARY_SLICE:
4142                 break;
4143             case NAL_FF_IGNORE:
4144                 break;
4145             default:
4146                 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n",
4147                        hx->nal_unit_type, bit_length);
4148             }
4149
4150             if (context_count == h->max_contexts) {
4151                 execute_decode_slices(h, context_count);
4152                 context_count = 0;
4153             }
4154
4155             if (err < 0)
4156                 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
4157             else if (err == 1) {
4158                 /* Slice could not be decoded in parallel mode, copy down
4159                  * NAL unit stuff to context 0 and restart. Note that
4160                  * rbsp_buffer is not transferred, but since we no longer
4161                  * run in parallel mode this should not be an issue. */
4162                 h->nal_unit_type = hx->nal_unit_type;
4163                 h->nal_ref_idc   = hx->nal_ref_idc;
4164                 hx               = h;
4165                 goto again;
4166             }
4167         }
4168     }
4169     if (context_count)
4170         execute_decode_slices(h, context_count);
4171
4172 end:
4173     /* clean up */
4174     if (s->current_picture_ptr && s->current_picture_ptr->owner2 == s &&
4175         !s->droppable) {
4176         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
4177                                   s->picture_structure == PICT_BOTTOM_FIELD);
4178     }
4179
4180     return buf_index;
4181 }
4182
4183 /**
4184  * Return the number of bytes consumed for building the current frame.
4185  */
4186 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size)
4187 {
4188     if (pos == 0)
4189         pos = 1;          // avoid infinite loops (i doubt that is needed but ...)
4190     if (pos + 10 > buf_size)
4191         pos = buf_size;                   // oops ;)
4192
4193     return pos;
4194 }
4195
4196 static int decode_frame(AVCodecContext *avctx, void *data,
4197                         int *got_frame, AVPacket *avpkt)
4198 {
4199     const uint8_t *buf = avpkt->data;
4200     int buf_size       = avpkt->size;
4201     H264Context *h     = avctx->priv_data;
4202     MpegEncContext *s  = &h->s;
4203     AVFrame *pict      = data;
4204     int buf_index      = 0;
4205
4206     s->flags  = avctx->flags;
4207     s->flags2 = avctx->flags2;
4208
4209     /* end of stream, output what is still in the buffers */
4210 out:
4211     if (buf_size == 0) {
4212         Picture *out;
4213         int i, out_idx;
4214
4215         s->current_picture_ptr = NULL;
4216
4217         // FIXME factorize this with the output code below
4218         out     = h->delayed_pic[0];
4219         out_idx = 0;
4220         for (i = 1;
4221              h->delayed_pic[i] &&
4222              !h->delayed_pic[i]->f.key_frame &&
4223              !h->delayed_pic[i]->mmco_reset;
4224              i++)
4225             if (h->delayed_pic[i]->poc < out->poc) {
4226                 out     = h->delayed_pic[i];
4227                 out_idx = i;
4228             }
4229
4230         for (i = out_idx; h->delayed_pic[i]; i++)
4231             h->delayed_pic[i] = h->delayed_pic[i + 1];
4232
4233         if (out) {
4234             *got_frame = 1;
4235             *pict      = out->f;
4236         }
4237
4238         return buf_index;
4239     }
4240
4241     buf_index = decode_nal_units(h, buf, buf_size, 0);
4242     if (buf_index < 0)
4243         return -1;
4244
4245     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
4246         buf_size = 0;
4247         goto out;
4248     }
4249
4250     if (!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr) {
4251         if (avctx->skip_frame >= AVDISCARD_NONREF)
4252             return 0;
4253         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
4254         return -1;
4255     }
4256
4257     if (!(s->flags2 & CODEC_FLAG2_CHUNKS) ||
4258         (s->mb_y >= s->mb_height && s->mb_height)) {
4259         if (s->flags2 & CODEC_FLAG2_CHUNKS)
4260             decode_postinit(h, 1);
4261
4262         field_end(h, 0);
4263         h->context_reinitialized = 0;
4264
4265         if (!h->next_output_pic) {
4266             /* Wait for second field. */
4267             *got_frame = 0;
4268         } else {
4269             *got_frame = 1;
4270             *pict      = h->next_output_pic->f;
4271         }
4272     }
4273
4274     assert(pict->data[0] || !*got_frame);
4275     ff_print_debug_info(s, pict);
4276
4277     return get_consumed_bytes(s, buf_index, buf_size);
4278 }
4279
4280 av_cold void ff_h264_free_context(H264Context *h)
4281 {
4282     int i;
4283
4284     free_tables(h, 1); // FIXME cleanup init stuff perhaps
4285
4286     for (i = 0; i < MAX_SPS_COUNT; i++)
4287         av_freep(h->sps_buffers + i);
4288
4289     for (i = 0; i < MAX_PPS_COUNT; i++)
4290         av_freep(h->pps_buffers + i);
4291 }
4292
4293 static av_cold int h264_decode_end(AVCodecContext *avctx)
4294 {
4295     H264Context *h    = avctx->priv_data;
4296     MpegEncContext *s = &h->s;
4297
4298     ff_h264_free_context(h);
4299
4300     ff_MPV_common_end(s);
4301
4302     // memset(h, 0, sizeof(H264Context));
4303
4304     return 0;
4305 }
4306
4307 static const AVProfile profiles[] = {
4308     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
4309     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
4310     { FF_PROFILE_H264_MAIN,                 "Main"                  },
4311     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
4312     { FF_PROFILE_H264_HIGH,                 "High"                  },
4313     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
4314     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
4315     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
4316     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
4317     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
4318     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
4319     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
4320     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
4321     { FF_PROFILE_UNKNOWN },
4322 };
4323
4324 AVCodec ff_h264_decoder = {
4325     .name                  = "h264",
4326     .type                  = AVMEDIA_TYPE_VIDEO,
4327     .id                    = AV_CODEC_ID_H264,
4328     .priv_data_size        = sizeof(H264Context),
4329     .init                  = ff_h264_decode_init,
4330     .close                 = h264_decode_end,
4331     .decode                = decode_frame,
4332     .capabilities          = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
4333                              CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS |
4334                              CODEC_CAP_FRAME_THREADS,
4335     .flush                 = flush_dpb,
4336     .long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
4337     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
4338     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
4339     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
4340 };
4341
4342 #if CONFIG_H264_VDPAU_DECODER
4343 AVCodec ff_h264_vdpau_decoder = {
4344     .name           = "h264_vdpau",
4345     .type           = AVMEDIA_TYPE_VIDEO,
4346     .id             = AV_CODEC_ID_H264,
4347     .priv_data_size = sizeof(H264Context),
4348     .init           = ff_h264_decode_init,
4349     .close          = h264_decode_end,
4350     .decode         = decode_frame,
4351     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
4352     .flush          = flush_dpb,
4353     .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
4354     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_H264,
4355                                                    AV_PIX_FMT_NONE},
4356     .profiles       = NULL_IF_CONFIG_SMALL(profiles),
4357 };
4358 #endif