]> git.sesse.net Git - ffmpeg/blob - libavcodec/h264.c
h264: add avpriv_h264_has_num_reorder_frames()
[ffmpeg] / libavcodec / h264.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... decoder
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG4 part10 codec.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27
28 #define UNCHECKED_BITSTREAM_READER 1
29
30 #include "libavutil/imgutils.h"
31 #include "libavutil/opt.h"
32 #include "internal.h"
33 #include "cabac.h"
34 #include "cabac_functions.h"
35 #include "dsputil.h"
36 #include "avcodec.h"
37 #include "mpegvideo.h"
38 #include "h264.h"
39 #include "h264data.h"
40 #include "h264_mvpred.h"
41 #include "golomb.h"
42 #include "mathops.h"
43 #include "rectangle.h"
44 #include "thread.h"
45 #include "vdpau_internal.h"
46 #include "libavutil/avassert.h"
47
48 // #undef NDEBUG
49 #include <assert.h>
50
51 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
52
53 static const uint8_t rem6[QP_MAX_NUM + 1] = {
54     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
55     3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
56     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
57 };
58
59 static const uint8_t div6[QP_MAX_NUM + 1] = {
60     0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
61     3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
62     7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
63 };
64
65 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
66     PIX_FMT_DXVA2_VLD,
67     PIX_FMT_VAAPI_VLD,
68     PIX_FMT_VDA_VLD,
69     PIX_FMT_YUVJ420P,
70     PIX_FMT_NONE
71 };
72
73 int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx)
74 {
75     H264Context *h = avctx->priv_data;
76     return h ? h->sps.num_reorder_frames : 0;
77 }
78
79 /**
80  * Check if the top & left blocks are available if needed and
81  * change the dc mode so it only uses the available blocks.
82  */
83 int ff_h264_check_intra4x4_pred_mode(H264Context *h)
84 {
85     MpegEncContext *const s     = &h->s;
86     static const int8_t top[12] = {
87         -1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
88     };
89     static const int8_t left[12] = {
90         0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
91     };
92     int i;
93
94     if (!(h->top_samples_available & 0x8000)) {
95         for (i = 0; i < 4; i++) {
96             int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
97             if (status < 0) {
98                 av_log(h->s.avctx, AV_LOG_ERROR,
99                        "top block unavailable for requested intra4x4 mode %d at %d %d\n",
100                        status, s->mb_x, s->mb_y);
101                 return -1;
102             } else if (status) {
103                 h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
104             }
105         }
106     }
107
108     if ((h->left_samples_available & 0x8888) != 0x8888) {
109         static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
110         for (i = 0; i < 4; i++)
111             if (!(h->left_samples_available & mask[i])) {
112                 int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
113                 if (status < 0) {
114                     av_log(h->s.avctx, AV_LOG_ERROR,
115                            "left block unavailable for requested intra4x4 mode %d at %d %d\n",
116                            status, s->mb_x, s->mb_y);
117                     return -1;
118                 } else if (status) {
119                     h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
120                 }
121             }
122     }
123
124     return 0;
125 } // FIXME cleanup like ff_h264_check_intra_pred_mode
126
127 /**
128  * Check if the top & left blocks are available if needed and
129  * change the dc mode so it only uses the available blocks.
130  */
131 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
132 {
133     MpegEncContext *const s     = &h->s;
134     static const int8_t top[7]  = { LEFT_DC_PRED8x8, 1, -1, -1 };
135     static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };
136
137     if (mode > 6U) {
138         av_log(h->s.avctx, AV_LOG_ERROR,
139                "out of range intra chroma pred mode at %d %d\n",
140                s->mb_x, s->mb_y);
141         return -1;
142     }
143
144     if (!(h->top_samples_available & 0x8000)) {
145         mode = top[mode];
146         if (mode < 0) {
147             av_log(h->s.avctx, AV_LOG_ERROR,
148                    "top block unavailable for requested intra mode at %d %d\n",
149                    s->mb_x, s->mb_y);
150             return -1;
151         }
152     }
153
154     if ((h->left_samples_available & 0x8080) != 0x8080) {
155         mode = left[mode];
156         if (is_chroma && (h->left_samples_available & 0x8080)) {
157             // mad cow disease mode, aka MBAFF + constrained_intra_pred
158             mode = ALZHEIMER_DC_L0T_PRED8x8 +
159                    (!(h->left_samples_available & 0x8000)) +
160                    2 * (mode == DC_128_PRED8x8);
161         }
162         if (mode < 0) {
163             av_log(h->s.avctx, AV_LOG_ERROR,
164                    "left block unavailable for requested intra mode at %d %d\n",
165                    s->mb_x, s->mb_y);
166             return -1;
167         }
168     }
169
170     return mode;
171 }
172
173 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
174                                   int *dst_length, int *consumed, int length)
175 {
176     int i, si, di;
177     uint8_t *dst;
178     int bufidx;
179
180     // src[0]&0x80; // forbidden bit
181     h->nal_ref_idc   = src[0] >> 5;
182     h->nal_unit_type = src[0] & 0x1F;
183
184     src++;
185     length--;
186
187 #if HAVE_FAST_UNALIGNED
188 #if HAVE_FAST_64BIT
189 #define RS 7
190     for (i = 0; i + 1 < length; i += 9) {
191         if (!((~AV_RN64A(src + i) &
192                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
193               0x8000800080008080ULL))
194 #else
195 #define RS 3
196     for (i = 0; i + 1 < length; i += 5) {
197         if (!((~AV_RN32A(src + i) &
198                (AV_RN32A(src + i) - 0x01000101U)) &
199               0x80008080U))
200 #endif
201             continue;
202         if (i > 0 && !src[i])
203             i--;
204         while (src[i])
205             i++;
206 #else
207 #define RS 0
208     for (i = 0; i + 1 < length; i += 2) {
209         if (src[i])
210             continue;
211         if (i > 0 && src[i - 1] == 0)
212             i--;
213 #endif
214         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {
215             if (src[i + 2] != 3) {
216                 /* startcode, so we must be past the end */
217                 length = i;
218             }
219             break;
220         }
221         i -= RS;
222     }
223
224     // use second escape buffer for inter data
225     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
226
227     si = h->rbsp_buffer_size[bufidx];
228     av_fast_padded_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+MAX_MBPAIR_SIZE);
229     dst = h->rbsp_buffer[bufidx];
230
231     if (dst == NULL)
232         return NULL;
233
234     if(i>=length-1){ //no escaped 0
235         *dst_length= length;
236         *consumed= length+1; //+1 for the header
237         if(h->s.avctx->flags2 & CODEC_FLAG2_FAST){
238             return src;
239         }else{
240             memcpy(dst, src, length);
241             return dst;
242         }
243     }
244
245     // printf("decoding esc\n");
246     memcpy(dst, src, i);
247     si = di = i;
248     while (si + 2 < length) {
249         // remove escapes (very rare 1:2^22)
250         if (src[si + 2] > 3) {
251             dst[di++] = src[si++];
252             dst[di++] = src[si++];
253         } else if (src[si] == 0 && src[si + 1] == 0) {
254             if (src[si + 2] == 3) { // escape
255                 dst[di++]  = 0;
256                 dst[di++]  = 0;
257                 si        += 3;
258                 continue;
259             } else // next start code
260                 goto nsc;
261         }
262
263         dst[di++] = src[si++];
264     }
265     while (si < length)
266         dst[di++] = src[si++];
267 nsc:
268
269     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
270
271     *dst_length = di;
272     *consumed   = si + 1; // +1 for the header
273     /* FIXME store exact number of bits in the getbitcontext
274      * (it is needed for decoding) */
275     return dst;
276 }
277
278 /**
279  * Identify the exact end of the bitstream
280  * @return the length of the trailing, or 0 if damaged
281  */
282 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
283 {
284     int v = *src;
285     int r;
286
287     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
288
289     for (r = 1; r < 9; r++) {
290         if (v & 1)
291             return r;
292         v >>= 1;
293     }
294     return 0;
295 }
296
297 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
298                                          int height, int y_offset, int list)
299 {
300     int raw_my        = h->mv_cache[list][scan8[n]][1];
301     int filter_height = (raw_my & 3) ? 2 : 0;
302     int full_my       = (raw_my >> 2) + y_offset;
303     int top           = full_my - filter_height;
304     int bottom        = full_my + filter_height + height;
305
306     return FFMAX(abs(top), bottom);
307 }
308
309 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
310                                      int height, int y_offset, int list0,
311                                      int list1, int *nrefs)
312 {
313     MpegEncContext *const s = &h->s;
314     int my;
315
316     y_offset += 16 * (s->mb_y >> MB_FIELD);
317
318     if (list0) {
319         int ref_n    = h->ref_cache[0][scan8[n]];
320         Picture *ref = &h->ref_list[0][ref_n];
321
322         // Error resilience puts the current picture in the ref list.
323         // Don't try to wait on these as it will cause a deadlock.
324         // Fields can wait on each other, though.
325         if (ref->f.thread_opaque   != s->current_picture.f.thread_opaque ||
326             (ref->f.reference & 3) != s->picture_structure) {
327             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
328             if (refs[0][ref_n] < 0)
329                 nrefs[0] += 1;
330             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
331         }
332     }
333
334     if (list1) {
335         int ref_n    = h->ref_cache[1][scan8[n]];
336         Picture *ref = &h->ref_list[1][ref_n];
337
338         if (ref->f.thread_opaque   != s->current_picture.f.thread_opaque ||
339             (ref->f.reference & 3) != s->picture_structure) {
340             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
341             if (refs[1][ref_n] < 0)
342                 nrefs[1] += 1;
343             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
344         }
345     }
346 }
347
348 /**
349  * Wait until all reference frames are available for MC operations.
350  *
351  * @param h the H264 context
352  */
353 static void await_references(H264Context *h)
354 {
355     MpegEncContext *const s = &h->s;
356     const int mb_xy   = h->mb_xy;
357     const int mb_type = s->current_picture.f.mb_type[mb_xy];
358     int refs[2][48];
359     int nrefs[2] = { 0 };
360     int ref, list;
361
362     memset(refs, -1, sizeof(refs));
363
364     if (IS_16X16(mb_type)) {
365         get_lowest_part_y(h, refs, 0, 16, 0,
366                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
367     } else if (IS_16X8(mb_type)) {
368         get_lowest_part_y(h, refs, 0, 8, 0,
369                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
370         get_lowest_part_y(h, refs, 8, 8, 8,
371                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
372     } else if (IS_8X16(mb_type)) {
373         get_lowest_part_y(h, refs, 0, 16, 0,
374                           IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
375         get_lowest_part_y(h, refs, 4, 16, 0,
376                           IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
377     } else {
378         int i;
379
380         assert(IS_8X8(mb_type));
381
382         for (i = 0; i < 4; i++) {
383             const int sub_mb_type = h->sub_mb_type[i];
384             const int n           = 4 * i;
385             int y_offset          = (i & 2) << 2;
386
387             if (IS_SUB_8X8(sub_mb_type)) {
388                 get_lowest_part_y(h, refs, n, 8, y_offset,
389                                   IS_DIR(sub_mb_type, 0, 0),
390                                   IS_DIR(sub_mb_type, 0, 1),
391                                   nrefs);
392             } else if (IS_SUB_8X4(sub_mb_type)) {
393                 get_lowest_part_y(h, refs, n, 4, y_offset,
394                                   IS_DIR(sub_mb_type, 0, 0),
395                                   IS_DIR(sub_mb_type, 0, 1),
396                                   nrefs);
397                 get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
398                                   IS_DIR(sub_mb_type, 0, 0),
399                                   IS_DIR(sub_mb_type, 0, 1),
400                                   nrefs);
401             } else if (IS_SUB_4X8(sub_mb_type)) {
402                 get_lowest_part_y(h, refs, n, 8, y_offset,
403                                   IS_DIR(sub_mb_type, 0, 0),
404                                   IS_DIR(sub_mb_type, 0, 1),
405                                   nrefs);
406                 get_lowest_part_y(h, refs, n + 1, 8, y_offset,
407                                   IS_DIR(sub_mb_type, 0, 0),
408                                   IS_DIR(sub_mb_type, 0, 1),
409                                   nrefs);
410             } else {
411                 int j;
412                 assert(IS_SUB_4X4(sub_mb_type));
413                 for (j = 0; j < 4; j++) {
414                     int sub_y_offset = y_offset + 2 * (j & 2);
415                     get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
416                                       IS_DIR(sub_mb_type, 0, 0),
417                                       IS_DIR(sub_mb_type, 0, 1),
418                                       nrefs);
419                 }
420             }
421         }
422     }
423
424     for (list = h->list_count - 1; list >= 0; list--)
425         for (ref = 0; ref < 48 && nrefs[list]; ref++) {
426             int row = refs[list][ref];
427             if (row >= 0) {
428                 Picture *ref_pic      = &h->ref_list[list][ref];
429                 int ref_field         = ref_pic->f.reference - 1;
430                 int ref_field_picture = ref_pic->field_picture;
431                 int pic_height        = 16 * s->mb_height >> ref_field_picture;
432
433                 row <<= MB_MBAFF;
434                 nrefs[list]--;
435
436                 if (!FIELD_PICTURE && ref_field_picture) { // frame referencing two fields
437                     ff_thread_await_progress(&ref_pic->f,
438                                              FFMIN((row >> 1) - !(row & 1),
439                                                    pic_height - 1),
440                                              1);
441                     ff_thread_await_progress(&ref_pic->f,
442                                              FFMIN((row >> 1), pic_height - 1),
443                                              0);
444                 } else if (FIELD_PICTURE && !ref_field_picture) { // field referencing one field of a frame
445                     ff_thread_await_progress(&ref_pic->f,
446                                              FFMIN(row * 2 + ref_field,
447                                                    pic_height - 1),
448                                              0);
449                 } else if (FIELD_PICTURE) {
450                     ff_thread_await_progress(&ref_pic->f,
451                                              FFMIN(row, pic_height - 1),
452                                              ref_field);
453                 } else {
454                     ff_thread_await_progress(&ref_pic->f,
455                                              FFMIN(row, pic_height - 1),
456                                              0);
457                 }
458             }
459         }
460 }
461
462 static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
463                                          int n, int square, int height,
464                                          int delta, int list,
465                                          uint8_t *dest_y, uint8_t *dest_cb,
466                                          uint8_t *dest_cr,
467                                          int src_x_offset, int src_y_offset,
468                                          qpel_mc_func *qpix_op,
469                                          h264_chroma_mc_func chroma_op,
470                                          int pixel_shift, int chroma_idc)
471 {
472     MpegEncContext *const s = &h->s;
473     const int mx      = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
474     int my            = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
475     const int luma_xy = (mx & 3) + ((my & 3) << 2);
476     int offset        = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
477     uint8_t *src_y    = pic->f.data[0] + offset;
478     uint8_t *src_cb, *src_cr;
479     int extra_width  = h->emu_edge_width;
480     int extra_height = h->emu_edge_height;
481     int emu = 0;
482     const int full_mx    = mx >> 2;
483     const int full_my    = my >> 2;
484     const int pic_width  = 16 * s->mb_width;
485     const int pic_height = 16 * s->mb_height >> MB_FIELD;
486     int ysh;
487
488     if (mx & 7)
489         extra_width -= 3;
490     if (my & 7)
491         extra_height -= 3;
492
493     if (full_mx                <          0 - extra_width  ||
494         full_my                <          0 - extra_height ||
495         full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
496         full_my + 16 /*FIXME*/ > pic_height + extra_height) {
497         s->dsp.emulated_edge_mc(s->edge_emu_buffer,
498                                 src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
499                                 h->mb_linesize,
500                                 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
501                                 full_my - 2, pic_width, pic_height);
502         src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
503         emu   = 1;
504     }
505
506     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
507     if (!square)
508         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
509
510     if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
511         return;
512
513     if (chroma_idc == 3 /* yuv444 */) {
514         src_cb = pic->f.data[1] + offset;
515         if (emu) {
516             s->dsp.emulated_edge_mc(s->edge_emu_buffer,
517                                     src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
518                                     h->mb_linesize,
519                                     16 + 5, 16 + 5 /*FIXME*/,
520                                     full_mx - 2, full_my - 2,
521                                     pic_width, pic_height);
522             src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
523         }
524         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
525         if (!square)
526             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
527
528         src_cr = pic->f.data[2] + offset;
529         if (emu) {
530             s->dsp.emulated_edge_mc(s->edge_emu_buffer,
531                                     src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
532                                     h->mb_linesize,
533                                     16 + 5, 16 + 5 /*FIXME*/,
534                                     full_mx - 2, full_my - 2,
535                                     pic_width, pic_height);
536             src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
537         }
538         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
539         if (!square)
540             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
541         return;
542     }
543
544     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
545     if (chroma_idc == 1 /* yuv420 */ && MB_FIELD) {
546         // chroma offset when predicting from a field of opposite parity
547         my  += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
548         emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
549     }
550
551     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
552              (my >> ysh) * h->mb_uvlinesize;
553     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
554              (my >> ysh) * h->mb_uvlinesize;
555
556     if (emu) {
557         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
558                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
559                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
560         src_cb = s->edge_emu_buffer;
561     }
562     chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
563               height >> (chroma_idc == 1 /* yuv420 */),
564               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
565
566     if (emu) {
567         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
568                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
569                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
570         src_cr = s->edge_emu_buffer;
571     }
572     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
573               mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
574 }
575
576 static av_always_inline void mc_part_std(H264Context *h, int n, int square,
577                                          int height, int delta,
578                                          uint8_t *dest_y, uint8_t *dest_cb,
579                                          uint8_t *dest_cr,
580                                          int x_offset, int y_offset,
581                                          qpel_mc_func *qpix_put,
582                                          h264_chroma_mc_func chroma_put,
583                                          qpel_mc_func *qpix_avg,
584                                          h264_chroma_mc_func chroma_avg,
585                                          int list0, int list1,
586                                          int pixel_shift, int chroma_idc)
587 {
588     MpegEncContext *const s       = &h->s;
589     qpel_mc_func *qpix_op         = qpix_put;
590     h264_chroma_mc_func chroma_op = chroma_put;
591
592     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
593     if (chroma_idc == 3 /* yuv444 */) {
594         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
595         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
596     } else if (chroma_idc == 2 /* yuv422 */) {
597         dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
598         dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
599     } else { /* yuv420 */
600         dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
601         dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
602     }
603     x_offset += 8 * s->mb_x;
604     y_offset += 8 * (s->mb_y >> MB_FIELD);
605
606     if (list0) {
607         Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
608         mc_dir_part(h, ref, n, square, height, delta, 0,
609                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
610                     qpix_op, chroma_op, pixel_shift, chroma_idc);
611
612         qpix_op   = qpix_avg;
613         chroma_op = chroma_avg;
614     }
615
616     if (list1) {
617         Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
618         mc_dir_part(h, ref, n, square, height, delta, 1,
619                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
620                     qpix_op, chroma_op, pixel_shift, chroma_idc);
621     }
622 }
623
624 static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
625                                               int height, int delta,
626                                               uint8_t *dest_y, uint8_t *dest_cb,
627                                               uint8_t *dest_cr,
628                                               int x_offset, int y_offset,
629                                               qpel_mc_func *qpix_put,
630                                               h264_chroma_mc_func chroma_put,
631                                               h264_weight_func luma_weight_op,
632                                               h264_weight_func chroma_weight_op,
633                                               h264_biweight_func luma_weight_avg,
634                                               h264_biweight_func chroma_weight_avg,
635                                               int list0, int list1,
636                                               int pixel_shift, int chroma_idc)
637 {
638     MpegEncContext *const s = &h->s;
639     int chroma_height;
640
641     dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
642     if (chroma_idc == 3 /* yuv444 */) {
643         chroma_height     = height;
644         chroma_weight_avg = luma_weight_avg;
645         chroma_weight_op  = luma_weight_op;
646         dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
647         dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
648     } else if (chroma_idc == 2 /* yuv422 */) {
649         chroma_height = height;
650         dest_cb      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
651         dest_cr      += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
652     } else { /* yuv420 */
653         chroma_height = height >> 1;
654         dest_cb      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
655         dest_cr      += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
656     }
657     x_offset += 8 * s->mb_x;
658     y_offset += 8 * (s->mb_y >> MB_FIELD);
659
660     if (list0 && list1) {
661         /* don't optimize for luma-only case, since B-frames usually
662          * use implicit weights => chroma too. */
663         uint8_t *tmp_cb = s->obmc_scratchpad;
664         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
665         uint8_t *tmp_y  = s->obmc_scratchpad + 16 * h->mb_uvlinesize;
666         int refn0       = h->ref_cache[0][scan8[n]];
667         int refn1       = h->ref_cache[1][scan8[n]];
668
669         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
670                     dest_y, dest_cb, dest_cr,
671                     x_offset, y_offset, qpix_put, chroma_put,
672                     pixel_shift, chroma_idc);
673         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
674                     tmp_y, tmp_cb, tmp_cr,
675                     x_offset, y_offset, qpix_put, chroma_put,
676                     pixel_shift, chroma_idc);
677
678         if (h->use_weight == 2) {
679             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y & 1];
680             int weight1 = 64 - weight0;
681             luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
682                             height, 5, weight0, weight1, 0);
683             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
684                               chroma_height, 5, weight0, weight1, 0);
685             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
686                               chroma_height, 5, weight0, weight1, 0);
687         } else {
688             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
689                             h->luma_log2_weight_denom,
690                             h->luma_weight[refn0][0][0],
691                             h->luma_weight[refn1][1][0],
692                             h->luma_weight[refn0][0][1] +
693                             h->luma_weight[refn1][1][1]);
694             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
695                               h->chroma_log2_weight_denom,
696                               h->chroma_weight[refn0][0][0][0],
697                               h->chroma_weight[refn1][1][0][0],
698                               h->chroma_weight[refn0][0][0][1] +
699                               h->chroma_weight[refn1][1][0][1]);
700             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
701                               h->chroma_log2_weight_denom,
702                               h->chroma_weight[refn0][0][1][0],
703                               h->chroma_weight[refn1][1][1][0],
704                               h->chroma_weight[refn0][0][1][1] +
705                               h->chroma_weight[refn1][1][1][1]);
706         }
707     } else {
708         int list     = list1 ? 1 : 0;
709         int refn     = h->ref_cache[list][scan8[n]];
710         Picture *ref = &h->ref_list[list][refn];
711         mc_dir_part(h, ref, n, square, height, delta, list,
712                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
713                     qpix_put, chroma_put, pixel_shift, chroma_idc);
714
715         luma_weight_op(dest_y, h->mb_linesize, height,
716                        h->luma_log2_weight_denom,
717                        h->luma_weight[refn][list][0],
718                        h->luma_weight[refn][list][1]);
719         if (h->use_weight_chroma) {
720             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
721                              h->chroma_log2_weight_denom,
722                              h->chroma_weight[refn][list][0][0],
723                              h->chroma_weight[refn][list][0][1]);
724             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
725                              h->chroma_log2_weight_denom,
726                              h->chroma_weight[refn][list][1][0],
727                              h->chroma_weight[refn][list][1][1]);
728         }
729     }
730 }
731
732 static av_always_inline void mc_part(H264Context *h, int n, int square,
733                                      int height, int delta,
734                                      uint8_t *dest_y, uint8_t *dest_cb,
735                                      uint8_t *dest_cr,
736                                      int x_offset, int y_offset,
737                                      qpel_mc_func *qpix_put,
738                                      h264_chroma_mc_func chroma_put,
739                                      qpel_mc_func *qpix_avg,
740                                      h264_chroma_mc_func chroma_avg,
741                                      h264_weight_func *weight_op,
742                                      h264_biweight_func *weight_avg,
743                                      int list0, int list1,
744                                      int pixel_shift, int chroma_idc)
745 {
746     if ((h->use_weight == 2 && list0 && list1 &&
747          (h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->s.mb_y & 1] != 32)) ||
748         h->use_weight == 1)
749         mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
750                          x_offset, y_offset, qpix_put, chroma_put,
751                          weight_op[0], weight_op[1], weight_avg[0],
752                          weight_avg[1], list0, list1, pixel_shift, chroma_idc);
753     else
754         mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
755                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
756                     chroma_avg, list0, list1, pixel_shift, chroma_idc);
757 }
758
759 static av_always_inline void prefetch_motion(H264Context *h, int list,
760                                              int pixel_shift, int chroma_idc)
761 {
762     /* fetch pixels for estimated mv 4 macroblocks ahead
763      * optimized for 64byte cache lines */
764     MpegEncContext *const s = &h->s;
765     const int refn = h->ref_cache[list][scan8[0]];
766     if (refn >= 0) {
767         const int mx  = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * s->mb_x + 8;
768         const int my  = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * s->mb_y;
769         uint8_t **src = h->ref_list[list][refn].f.data;
770         int off       = (mx << pixel_shift) +
771                         (my + (s->mb_x & 3) * 4) * h->mb_linesize +
772                         (64 << pixel_shift);
773         s->dsp.prefetch(src[0] + off, s->linesize, 4);
774         if (chroma_idc == 3 /* yuv444 */) {
775             s->dsp.prefetch(src[1] + off, s->linesize, 4);
776             s->dsp.prefetch(src[2] + off, s->linesize, 4);
777         } else {
778             off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
779             s->dsp.prefetch(src[1] + off, src[2] - src[1], 2);
780         }
781     }
782 }
783
784 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y,
785                                        uint8_t *dest_cb, uint8_t *dest_cr,
786                                        qpel_mc_func(*qpix_put)[16],
787                                        h264_chroma_mc_func(*chroma_put),
788                                        qpel_mc_func(*qpix_avg)[16],
789                                        h264_chroma_mc_func(*chroma_avg),
790                                        h264_weight_func *weight_op,
791                                        h264_biweight_func *weight_avg,
792                                        int pixel_shift, int chroma_idc)
793 {
794     MpegEncContext *const s = &h->s;
795     const int mb_xy   = h->mb_xy;
796     const int mb_type = s->current_picture.f.mb_type[mb_xy];
797
798     assert(IS_INTER(mb_type));
799
800     if (HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
801         await_references(h);
802     prefetch_motion(h, 0, pixel_shift, chroma_idc);
803
804     if (IS_16X16(mb_type)) {
805         mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
806                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
807                 weight_op, weight_avg,
808                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
809                 pixel_shift, chroma_idc);
810     } else if (IS_16X8(mb_type)) {
811         mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
812                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
813                 weight_op, weight_avg,
814                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
815                 pixel_shift, chroma_idc);
816         mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
817                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
818                 weight_op, weight_avg,
819                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
820                 pixel_shift, chroma_idc);
821     } else if (IS_8X16(mb_type)) {
822         mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
823                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
824                 &weight_op[1], &weight_avg[1],
825                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
826                 pixel_shift, chroma_idc);
827         mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
828                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
829                 &weight_op[1], &weight_avg[1],
830                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
831                 pixel_shift, chroma_idc);
832     } else {
833         int i;
834
835         assert(IS_8X8(mb_type));
836
837         for (i = 0; i < 4; i++) {
838             const int sub_mb_type = h->sub_mb_type[i];
839             const int n  = 4 * i;
840             int x_offset = (i & 1) << 2;
841             int y_offset = (i & 2) << 1;
842
843             if (IS_SUB_8X8(sub_mb_type)) {
844                 mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
845                         x_offset, y_offset,
846                         qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
847                         &weight_op[1], &weight_avg[1],
848                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
849                         pixel_shift, chroma_idc);
850             } else if (IS_SUB_8X4(sub_mb_type)) {
851                 mc_part(h, n, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr,
852                         x_offset, y_offset,
853                         qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
854                         &weight_op[1], &weight_avg[1],
855                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
856                         pixel_shift, chroma_idc);
857                 mc_part(h, n + 2, 0, 4, 4 << pixel_shift,
858                         dest_y, dest_cb, dest_cr, x_offset, y_offset + 2,
859                         qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
860                         &weight_op[1], &weight_avg[1],
861                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
862                         pixel_shift, chroma_idc);
863             } else if (IS_SUB_4X8(sub_mb_type)) {
864                 mc_part(h, n, 0, 8, 4 * h->mb_linesize,
865                         dest_y, dest_cb, dest_cr, x_offset, y_offset,
866                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
867                         &weight_op[2], &weight_avg[2],
868                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
869                         pixel_shift, chroma_idc);
870                 mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize,
871                         dest_y, dest_cb, dest_cr, x_offset + 2, y_offset,
872                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
873                         &weight_op[2], &weight_avg[2],
874                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
875                         pixel_shift, chroma_idc);
876             } else {
877                 int j;
878                 assert(IS_SUB_4X4(sub_mb_type));
879                 for (j = 0; j < 4; j++) {
880                     int sub_x_offset = x_offset + 2 * (j & 1);
881                     int sub_y_offset = y_offset + (j & 2);
882                     mc_part(h, n + j, 1, 4, 0,
883                             dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
884                             qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
885                             &weight_op[2], &weight_avg[2],
886                             IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
887                             pixel_shift, chroma_idc);
888                 }
889             }
890         }
891     }
892
893     prefetch_motion(h, 1, pixel_shift, chroma_idc);
894 }
895
896 static av_always_inline void hl_motion_420(H264Context *h, uint8_t *dest_y,
897                                            uint8_t *dest_cb, uint8_t *dest_cr,
898                                            qpel_mc_func(*qpix_put)[16],
899                                            h264_chroma_mc_func(*chroma_put),
900                                            qpel_mc_func(*qpix_avg)[16],
901                                            h264_chroma_mc_func(*chroma_avg),
902                                            h264_weight_func *weight_op,
903                                            h264_biweight_func *weight_avg,
904                                            int pixel_shift)
905 {
906     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
907               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
908 }
909
910 static av_always_inline void hl_motion_422(H264Context *h, uint8_t *dest_y,
911                                            uint8_t *dest_cb, uint8_t *dest_cr,
912                                            qpel_mc_func(*qpix_put)[16],
913                                            h264_chroma_mc_func(*chroma_put),
914                                            qpel_mc_func(*qpix_avg)[16],
915                                            h264_chroma_mc_func(*chroma_avg),
916                                            h264_weight_func *weight_op,
917                                            h264_biweight_func *weight_avg,
918                                            int pixel_shift)
919 {
920     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
921               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
922 }
923
924 static void free_tables(H264Context *h, int free_rbsp)
925 {
926     int i;
927     H264Context *hx;
928
929     av_freep(&h->intra4x4_pred_mode);
930     av_freep(&h->chroma_pred_mode_table);
931     av_freep(&h->cbp_table);
932     av_freep(&h->mvd_table[0]);
933     av_freep(&h->mvd_table[1]);
934     av_freep(&h->direct_table);
935     av_freep(&h->non_zero_count);
936     av_freep(&h->slice_table_base);
937     h->slice_table = NULL;
938     av_freep(&h->list_counts);
939
940     av_freep(&h->mb2b_xy);
941     av_freep(&h->mb2br_xy);
942
943     for (i = 0; i < MAX_THREADS; i++) {
944         hx = h->thread_context[i];
945         if (!hx)
946             continue;
947         av_freep(&hx->top_borders[1]);
948         av_freep(&hx->top_borders[0]);
949         av_freep(&hx->s.obmc_scratchpad);
950         if (free_rbsp) {
951             av_freep(&hx->rbsp_buffer[1]);
952             av_freep(&hx->rbsp_buffer[0]);
953             hx->rbsp_buffer_size[0] = 0;
954             hx->rbsp_buffer_size[1] = 0;
955         }
956         if (i)
957             av_freep(&h->thread_context[i]);
958     }
959 }
960
961 static void init_dequant8_coeff_table(H264Context *h)
962 {
963     int i, j, q, x;
964     const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
965
966     for (i = 0; i < 6; i++) {
967         h->dequant8_coeff[i] = h->dequant8_buffer[i];
968         for (j = 0; j < i; j++)
969             if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
970                         64 * sizeof(uint8_t))) {
971                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
972                 break;
973             }
974         if (j < i)
975             continue;
976
977         for (q = 0; q < max_qp + 1; q++) {
978             int shift = div6[q];
979             int idx   = rem6[q];
980             for (x = 0; x < 64; x++)
981                 h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
982                     ((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
983                      h->pps.scaling_matrix8[i][x]) << shift;
984         }
985     }
986 }
987
988 static void init_dequant4_coeff_table(H264Context *h)
989 {
990     int i, j, q, x;
991     const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
992     for (i = 0; i < 6; i++) {
993         h->dequant4_coeff[i] = h->dequant4_buffer[i];
994         for (j = 0; j < i; j++)
995             if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
996                         16 * sizeof(uint8_t))) {
997                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
998                 break;
999             }
1000         if (j < i)
1001             continue;
1002
1003         for (q = 0; q < max_qp + 1; q++) {
1004             int shift = div6[q] + 2;
1005             int idx   = rem6[q];
1006             for (x = 0; x < 16; x++)
1007                 h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
1008                     ((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
1009                      h->pps.scaling_matrix4[i][x]) << shift;
1010         }
1011     }
1012 }
1013
1014 static void init_dequant_tables(H264Context *h)
1015 {
1016     int i, x;
1017     init_dequant4_coeff_table(h);
1018     if (h->pps.transform_8x8_mode)
1019         init_dequant8_coeff_table(h);
1020     if (h->sps.transform_bypass) {
1021         for (i = 0; i < 6; i++)
1022             for (x = 0; x < 16; x++)
1023                 h->dequant4_coeff[i][0][x] = 1 << 6;
1024         if (h->pps.transform_8x8_mode)
1025             for (i = 0; i < 6; i++)
1026                 for (x = 0; x < 64; x++)
1027                     h->dequant8_coeff[i][0][x] = 1 << 6;
1028     }
1029 }
1030
1031 int ff_h264_alloc_tables(H264Context *h)
1032 {
1033     MpegEncContext *const s = &h->s;
1034     const int big_mb_num    = s->mb_stride * (s->mb_height + 1);
1035     const int row_mb_num    = 2*s->mb_stride*FFMAX(s->avctx->thread_count, 1);
1036     int x, y;
1037
1038     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode,
1039                       row_mb_num * 8 * sizeof(uint8_t), fail)
1040     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count,
1041                       big_mb_num * 48 * sizeof(uint8_t), fail)
1042     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base,
1043                       (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base), fail)
1044     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table,
1045                       big_mb_num * sizeof(uint16_t), fail)
1046     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table,
1047                       big_mb_num * sizeof(uint8_t), fail)
1048     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0],
1049                       16 * row_mb_num * sizeof(uint8_t), fail);
1050     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1],
1051                       16 * row_mb_num * sizeof(uint8_t), fail);
1052     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table,
1053                       4 * big_mb_num * sizeof(uint8_t), fail);
1054     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts,
1055                       big_mb_num * sizeof(uint8_t), fail)
1056
1057     memset(h->slice_table_base, -1,
1058            (big_mb_num + s->mb_stride) * sizeof(*h->slice_table_base));
1059     h->slice_table = h->slice_table_base + s->mb_stride * 2 + 1;
1060
1061     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy,
1062                       big_mb_num * sizeof(uint32_t), fail);
1063     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy,
1064                       big_mb_num * sizeof(uint32_t), fail);
1065     for (y = 0; y < s->mb_height; y++)
1066         for (x = 0; x < s->mb_width; x++) {
1067             const int mb_xy = x + y * s->mb_stride;
1068             const int b_xy  = 4 * x + 4 * y * h->b_stride;
1069
1070             h->mb2b_xy[mb_xy]  = b_xy;
1071             h->mb2br_xy[mb_xy] = 8 * (FMO ? mb_xy : (mb_xy % (2 * s->mb_stride)));
1072         }
1073
1074     s->obmc_scratchpad = NULL;
1075
1076     if (!h->dequant4_coeff[0])
1077         init_dequant_tables(h);
1078
1079     return 0;
1080
1081 fail:
1082     free_tables(h, 1);
1083     return -1;
1084 }
1085
1086 /**
1087  * Mimic alloc_tables(), but for every context thread.
1088  */
1089 static void clone_tables(H264Context *dst, H264Context *src, int i)
1090 {
1091     MpegEncContext *const s     = &src->s;
1092     dst->intra4x4_pred_mode     = src->intra4x4_pred_mode + i * 8 * 2 * s->mb_stride;
1093     dst->non_zero_count         = src->non_zero_count;
1094     dst->slice_table            = src->slice_table;
1095     dst->cbp_table              = src->cbp_table;
1096     dst->mb2b_xy                = src->mb2b_xy;
1097     dst->mb2br_xy               = src->mb2br_xy;
1098     dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
1099     dst->mvd_table[0]           = src->mvd_table[0] + i * 8 * 2 * s->mb_stride;
1100     dst->mvd_table[1]           = src->mvd_table[1] + i * 8 * 2 * s->mb_stride;
1101     dst->direct_table           = src->direct_table;
1102     dst->list_counts            = src->list_counts;
1103     dst->s.obmc_scratchpad      = NULL;
1104     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma,
1105                       src->sps.chroma_format_idc);
1106 }
1107
1108 /**
1109  * Init context
1110  * Allocate buffers which are not shared amongst multiple threads.
1111  */
1112 static int context_init(H264Context *h)
1113 {
1114     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0],
1115                       h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1116     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1],
1117                       h->s.mb_width * 16 * 3 * sizeof(uint8_t) * 2, fail)
1118
1119     h->ref_cache[0][scan8[5]  + 1] =
1120     h->ref_cache[0][scan8[7]  + 1] =
1121     h->ref_cache[0][scan8[13] + 1] =
1122     h->ref_cache[1][scan8[5]  + 1] =
1123     h->ref_cache[1][scan8[7]  + 1] =
1124     h->ref_cache[1][scan8[13] + 1] = PART_NOT_AVAILABLE;
1125
1126     return 0;
1127
1128 fail:
1129     return -1; // free_tables will clean up for us
1130 }
1131
1132 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
1133
1134 static av_cold void common_init(H264Context *h)
1135 {
1136     MpegEncContext *const s = &h->s;
1137
1138     s->width    = s->avctx->width;
1139     s->height   = s->avctx->height;
1140     s->codec_id = s->avctx->codec->id;
1141
1142     s->avctx->bits_per_raw_sample = 8;
1143     h->cur_chroma_format_idc = 1;
1144
1145     ff_h264dsp_init(&h->h264dsp,
1146                     s->avctx->bits_per_raw_sample, h->cur_chroma_format_idc);
1147     ff_h264_pred_init(&h->hpc, s->codec_id,
1148                       s->avctx->bits_per_raw_sample, h->cur_chroma_format_idc);
1149
1150     h->dequant_coeff_pps = -1;
1151     s->unrestricted_mv   = 1;
1152
1153     s->dsp.dct_bits = 16;
1154     /* needed so that IDCT permutation is known early */
1155     ff_dsputil_init(&s->dsp, s->avctx);
1156
1157     memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1158     memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1159 }
1160
1161 int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
1162 {
1163     AVCodecContext *avctx = h->s.avctx;
1164
1165     if (!buf || size <= 0)
1166         return -1;
1167
1168     if (buf[0] == 1) {
1169         int i, cnt, nalsize;
1170         const unsigned char *p = buf;
1171
1172         h->is_avc = 1;
1173
1174         if (size < 7) {
1175             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
1176             return -1;
1177         }
1178         /* sps and pps in the avcC always have length coded with 2 bytes,
1179          * so put a fake nal_length_size = 2 while parsing them */
1180         h->nal_length_size = 2;
1181         // Decode sps from avcC
1182         cnt = *(p + 5) & 0x1f; // Number of sps
1183         p  += 6;
1184         for (i = 0; i < cnt; i++) {
1185             nalsize = AV_RB16(p) + 2;
1186             if(nalsize > size - (p-buf))
1187                 return -1;
1188             if (decode_nal_units(h, p, nalsize) < 0) {
1189                 av_log(avctx, AV_LOG_ERROR,
1190                        "Decoding sps %d from avcC failed\n", i);
1191                 return -1;
1192             }
1193             p += nalsize;
1194         }
1195         // Decode pps from avcC
1196         cnt = *(p++); // Number of pps
1197         for (i = 0; i < cnt; i++) {
1198             nalsize = AV_RB16(p) + 2;
1199             if(nalsize > size - (p-buf))
1200                 return -1;
1201             if (decode_nal_units(h, p, nalsize) < 0) {
1202                 av_log(avctx, AV_LOG_ERROR,
1203                        "Decoding pps %d from avcC failed\n", i);
1204                 return -1;
1205             }
1206             p += nalsize;
1207         }
1208         // Now store right nal length size, that will be used to parse all other nals
1209         h->nal_length_size = (buf[4] & 0x03) + 1;
1210     } else {
1211         h->is_avc = 0;
1212         if (decode_nal_units(h, buf, size) < 0)
1213             return -1;
1214     }
1215     return size;
1216 }
1217
1218 av_cold int ff_h264_decode_init(AVCodecContext *avctx)
1219 {
1220     H264Context *h = avctx->priv_data;
1221     MpegEncContext *const s = &h->s;
1222     int i;
1223
1224     ff_MPV_decode_defaults(s);
1225
1226     s->avctx = avctx;
1227     common_init(h);
1228
1229     s->out_format      = FMT_H264;
1230     s->workaround_bugs = avctx->workaround_bugs;
1231
1232     /* set defaults */
1233     // s->decode_mb = ff_h263_decode_mb;
1234     s->quarter_sample = 1;
1235     if (!avctx->has_b_frames)
1236         s->low_delay = 1;
1237
1238     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
1239
1240     ff_h264_decode_init_vlc();
1241
1242     h->pixel_shift = 0;
1243     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
1244
1245     h->thread_context[0] = h;
1246     h->outputed_poc      = h->next_outputed_poc = INT_MIN;
1247     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
1248         h->last_pocs[i] = INT_MIN;
1249     h->prev_poc_msb = 1 << 16;
1250     h->prev_frame_num = -1;
1251     h->x264_build   = -1;
1252     ff_h264_reset_sei(h);
1253     if (avctx->codec_id == CODEC_ID_H264) {
1254         if (avctx->ticks_per_frame == 1)
1255             s->avctx->time_base.den *= 2;
1256         avctx->ticks_per_frame = 2;
1257     }
1258
1259     if (avctx->extradata_size > 0 && avctx->extradata &&
1260         ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size) < 0) {
1261         ff_h264_free_context(h);
1262         return -1;
1263     }
1264
1265     if (h->sps.bitstream_restriction_flag &&
1266         s->avctx->has_b_frames < h->sps.num_reorder_frames) {
1267         s->avctx->has_b_frames = h->sps.num_reorder_frames;
1268         s->low_delay           = 0;
1269     }
1270
1271     return 0;
1272 }
1273
1274 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
1275
1276 static void copy_picture_range(Picture **to, Picture **from, int count,
1277                                MpegEncContext *new_base,
1278                                MpegEncContext *old_base)
1279 {
1280     int i;
1281
1282     for (i = 0; i < count; i++) {
1283         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
1284                 IN_RANGE(from[i], old_base->picture,
1285                          sizeof(Picture) * old_base->picture_count) ||
1286                 !from[i]));
1287         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
1288     }
1289 }
1290
1291 static void copy_parameter_set(void **to, void **from, int count, int size)
1292 {
1293     int i;
1294
1295     for (i = 0; i < count; i++) {
1296         if (to[i] && !from[i])
1297             av_freep(&to[i]);
1298         else if (from[i] && !to[i])
1299             to[i] = av_malloc(size);
1300
1301         if (from[i])
1302             memcpy(to[i], from[i], size);
1303     }
1304 }
1305
1306 static int decode_init_thread_copy(AVCodecContext *avctx)
1307 {
1308     H264Context *h = avctx->priv_data;
1309
1310     if (!avctx->internal->is_copy)
1311         return 0;
1312     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1313     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1314
1315     return 0;
1316 }
1317
1318 #define copy_fields(to, from, start_field, end_field)                   \
1319     memcpy(&to->start_field, &from->start_field,                        \
1320            (char *)&to->end_field - (char *)&to->start_field)
1321
1322 static int decode_update_thread_context(AVCodecContext *dst,
1323                                         const AVCodecContext *src)
1324 {
1325     H264Context *h = dst->priv_data, *h1 = src->priv_data;
1326     MpegEncContext *const s = &h->s, *const s1 = &h1->s;
1327     int inited = s->context_initialized, err;
1328     int i;
1329
1330     if (dst == src)
1331         return 0;
1332
1333     err = ff_mpeg_update_thread_context(dst, src);
1334     if (err)
1335         return err;
1336
1337     // FIXME handle width/height changing
1338     if (!inited) {
1339         for (i = 0; i < MAX_SPS_COUNT; i++)
1340             av_freep(h->sps_buffers + i);
1341
1342         for (i = 0; i < MAX_PPS_COUNT; i++)
1343             av_freep(h->pps_buffers + i);
1344
1345         // copy all fields after MpegEnc
1346         memcpy(&h->s + 1, &h1->s + 1,
1347                sizeof(H264Context) - sizeof(MpegEncContext));
1348         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
1349         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
1350
1351         if (s1->context_initialized) {
1352         if (ff_h264_alloc_tables(h) < 0) {
1353             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
1354             return AVERROR(ENOMEM);
1355         }
1356         context_init(h);
1357
1358         /* frame_start may not be called for the next thread (if it's decoding
1359          * a bottom field) so this has to be allocated here */
1360         h->s.obmc_scratchpad = av_malloc(16 * 6 * s->linesize);
1361         }
1362
1363         for (i = 0; i < 2; i++) {
1364             h->rbsp_buffer[i]      = NULL;
1365             h->rbsp_buffer_size[i] = 0;
1366         }
1367
1368         h->thread_context[0] = h;
1369
1370         s->dsp.clear_blocks(h->mb);
1371         s->dsp.clear_blocks(h->mb + (24 * 16 << h->pixel_shift));
1372     }
1373
1374     // extradata/NAL handling
1375     h->is_avc = h1->is_avc;
1376
1377     // SPS/PPS
1378     copy_parameter_set((void **)h->sps_buffers, (void **)h1->sps_buffers,
1379                        MAX_SPS_COUNT, sizeof(SPS));
1380     h->sps = h1->sps;
1381     copy_parameter_set((void **)h->pps_buffers, (void **)h1->pps_buffers,
1382                        MAX_PPS_COUNT, sizeof(PPS));
1383     h->pps = h1->pps;
1384
1385     // Dequantization matrices
1386     // FIXME these are big - can they be only copied when PPS changes?
1387     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
1388
1389     for (i = 0; i < 6; i++)
1390         h->dequant4_coeff[i] = h->dequant4_buffer[0] +
1391                                (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
1392
1393     for (i = 0; i < 6; i++)
1394         h->dequant8_coeff[i] = h->dequant8_buffer[0] +
1395                                (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
1396
1397     h->dequant_coeff_pps = h1->dequant_coeff_pps;
1398
1399     // POC timing
1400     copy_fields(h, h1, poc_lsb, redundant_pic_count);
1401
1402     // reference lists
1403     copy_fields(h, h1, ref_count, list_count);
1404     copy_fields(h, h1, ref_list, intra_gb);
1405     copy_fields(h, h1, short_ref, cabac_init_idc);
1406
1407     copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);
1408     copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1);
1409     copy_picture_range(h->delayed_pic, h1->delayed_pic,
1410                        MAX_DELAYED_PIC_COUNT + 2, s, s1);
1411
1412     h->last_slice_type = h1->last_slice_type;
1413     h->sync            = h1->sync;
1414
1415     if (!s->current_picture_ptr)
1416         return 0;
1417
1418     if (!s->dropable) {
1419         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
1420         h->prev_poc_msb = h->poc_msb;
1421         h->prev_poc_lsb = h->poc_lsb;
1422     }
1423     h->prev_frame_num_offset = h->frame_num_offset;
1424     h->prev_frame_num        = h->frame_num;
1425     h->outputed_poc          = h->next_outputed_poc;
1426
1427     return err;
1428 }
1429
1430 int ff_h264_frame_start(H264Context *h)
1431 {
1432     MpegEncContext *const s = &h->s;
1433     int i;
1434     const int pixel_shift = h->pixel_shift;
1435
1436     if (ff_MPV_frame_start(s, s->avctx) < 0)
1437         return -1;
1438     ff_er_frame_start(s);
1439     /*
1440      * ff_MPV_frame_start uses pict_type to derive key_frame.
1441      * This is incorrect for H.264; IDR markings must be used.
1442      * Zero here; IDR markings per slice in frame or fields are ORed in later.
1443      * See decode_nal_units().
1444      */
1445     s->current_picture_ptr->f.key_frame = 0;
1446     s->current_picture_ptr->sync        = 0;
1447     s->current_picture_ptr->mmco_reset  = 0;
1448
1449     assert(s->linesize && s->uvlinesize);
1450
1451     for (i = 0; i < 16; i++) {
1452         h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->linesize * ((scan8[i] - scan8[0]) >> 3);
1453         h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->linesize * ((scan8[i] - scan8[0]) >> 3);
1454     }
1455     for (i = 0; i < 16; i++) {
1456         h->block_offset[16 + i]      =
1457         h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 4 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1458         h->block_offset[48 + 16 + i] =
1459         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * s->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1460     }
1461
1462     /* can't be in alloc_tables because linesize isn't known there.
1463      * FIXME: redo bipred weight to not require extra buffer? */
1464     for (i = 0; i < s->slice_context_count; i++)
1465         if (h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
1466             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16 * 6 * s->linesize);
1467
1468     /* Some macroblocks can be accessed before they're available in case
1469      * of lost slices, MBAFF or threading. */
1470     memset(h->slice_table, -1,
1471            (s->mb_height * s->mb_stride - 1) * sizeof(*h->slice_table));
1472
1473     // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1474     //             s->current_picture.f.reference /* || h->contains_intra */ || 1;
1475
1476     /* We mark the current picture as non-reference after allocating it, so
1477      * that if we break out due to an error it can be released automatically
1478      * in the next ff_MPV_frame_start().
1479      * SVQ3 as well as most other codecs have only last/next/current and thus
1480      * get released even with set reference, besides SVQ3 and others do not
1481      * mark frames as reference later "naturally". */
1482     if (s->codec_id != CODEC_ID_SVQ3)
1483         s->current_picture_ptr->f.reference = 0;
1484
1485     s->current_picture_ptr->field_poc[0]     =
1486         s->current_picture_ptr->field_poc[1] = INT_MAX;
1487
1488     h->next_output_pic = NULL;
1489
1490     assert(s->current_picture_ptr->long_ref == 0);
1491
1492     return 0;
1493 }
1494
1495 /**
1496  * Run setup operations that must be run after slice header decoding.
1497  * This includes finding the next displayed frame.
1498  *
1499  * @param h h264 master context
1500  * @param setup_finished enough NALs have been read that we can call
1501  * ff_thread_finish_setup()
1502  */
1503 static void decode_postinit(H264Context *h, int setup_finished)
1504 {
1505     MpegEncContext *const s = &h->s;
1506     Picture *out = s->current_picture_ptr;
1507     Picture *cur = s->current_picture_ptr;
1508     int i, pics, out_of_order, out_idx;
1509
1510     s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
1511     s->current_picture_ptr->f.pict_type   = s->pict_type;
1512
1513     if (h->next_output_pic)
1514         return;
1515
1516     if (cur->field_poc[0] == INT_MAX || cur->field_poc[1] == INT_MAX) {
1517         /* FIXME: if we have two PAFF fields in one packet, we can't start
1518          * the next thread here. If we have one field per packet, we can.
1519          * The check in decode_nal_units() is not good enough to find this
1520          * yet, so we assume the worst for now. */
1521         // if (setup_finished)
1522         //    ff_thread_finish_setup(s->avctx);
1523         return;
1524     }
1525
1526     cur->f.interlaced_frame = 0;
1527     cur->f.repeat_pict      = 0;
1528
1529     /* Signal interlacing information externally. */
1530     /* Prioritize picture timing SEI information over used
1531      * decoding process if it exists. */
1532
1533     if (h->sps.pic_struct_present_flag) {
1534         switch (h->sei_pic_struct) {
1535         case SEI_PIC_STRUCT_FRAME:
1536             break;
1537         case SEI_PIC_STRUCT_TOP_FIELD:
1538         case SEI_PIC_STRUCT_BOTTOM_FIELD:
1539             cur->f.interlaced_frame = 1;
1540             break;
1541         case SEI_PIC_STRUCT_TOP_BOTTOM:
1542         case SEI_PIC_STRUCT_BOTTOM_TOP:
1543             if (FIELD_OR_MBAFF_PICTURE)
1544                 cur->f.interlaced_frame = 1;
1545             else
1546                 // try to flag soft telecine progressive
1547                 cur->f.interlaced_frame = h->prev_interlaced_frame;
1548             break;
1549         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
1550         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
1551             /* Signal the possibility of telecined film externally
1552              * (pic_struct 5,6). From these hints, let the applications
1553              * decide if they apply deinterlacing. */
1554             cur->f.repeat_pict = 1;
1555             break;
1556         case SEI_PIC_STRUCT_FRAME_DOUBLING:
1557             // Force progressive here, doubling interlaced frame is a bad idea.
1558             cur->f.repeat_pict = 2;
1559             break;
1560         case SEI_PIC_STRUCT_FRAME_TRIPLING:
1561             cur->f.repeat_pict = 4;
1562             break;
1563         }
1564
1565         if ((h->sei_ct_type & 3) &&
1566             h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
1567             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
1568     } else {
1569         /* Derive interlacing flag from used decoding process. */
1570         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
1571     }
1572     h->prev_interlaced_frame = cur->f.interlaced_frame;
1573
1574     if (cur->field_poc[0] != cur->field_poc[1]) {
1575         /* Derive top_field_first from field pocs. */
1576         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
1577     } else {
1578         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
1579             /* Use picture timing SEI information. Even if it is a
1580              * information of a past frame, better than nothing. */
1581             if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ||
1582                 h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
1583                 cur->f.top_field_first = 1;
1584             else
1585                 cur->f.top_field_first = 0;
1586         } else {
1587             /* Most likely progressive */
1588             cur->f.top_field_first = 0;
1589         }
1590     }
1591
1592     cur->mmco_reset = h->mmco_reset;
1593     h->mmco_reset = 0;
1594     // FIXME do something with unavailable reference frames
1595
1596     /* Sort B-frames into display order */
1597
1598     if (h->sps.bitstream_restriction_flag &&
1599         s->avctx->has_b_frames < h->sps.num_reorder_frames) {
1600         s->avctx->has_b_frames = h->sps.num_reorder_frames;
1601         s->low_delay           = 0;
1602     }
1603
1604     if (s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT &&
1605         !h->sps.bitstream_restriction_flag) {
1606         s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
1607         s->low_delay           = 0;
1608     }
1609
1610     for (i = 0; 1; i++) {
1611         if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){
1612             if(i)
1613                 h->last_pocs[i-1] = cur->poc;
1614             break;
1615         } else if(i) {
1616             h->last_pocs[i-1]= h->last_pocs[i];
1617         }
1618     }
1619     out_of_order = MAX_DELAYED_PIC_COUNT - i;
1620     if(   cur->f.pict_type == AV_PICTURE_TYPE_B
1621        || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2))
1622         out_of_order = FFMAX(out_of_order, 1);
1623     if(s->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){
1624         av_log(s->avctx, AV_LOG_VERBOSE, "Increasing reorder buffer to %d\n", out_of_order);
1625         s->avctx->has_b_frames = out_of_order;
1626         s->low_delay = 0;
1627     }
1628
1629     pics = 0;
1630     while (h->delayed_pic[pics])
1631         pics++;
1632
1633     av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
1634
1635     h->delayed_pic[pics++] = cur;
1636     if (cur->f.reference == 0)
1637         cur->f.reference = DELAYED_PIC_REF;
1638
1639     out = h->delayed_pic[0];
1640     out_idx = 0;
1641     for (i = 1; h->delayed_pic[i] &&
1642                 !h->delayed_pic[i]->f.key_frame &&
1643                 !h->delayed_pic[i]->mmco_reset;
1644          i++)
1645         if (h->delayed_pic[i]->poc < out->poc) {
1646             out     = h->delayed_pic[i];
1647             out_idx = i;
1648         }
1649     if (s->avctx->has_b_frames == 0 &&
1650         (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset))
1651         h->next_outputed_poc = INT_MIN;
1652     out_of_order = out->poc < h->next_outputed_poc;
1653
1654     if (out_of_order || pics > s->avctx->has_b_frames) {
1655         out->f.reference &= ~DELAYED_PIC_REF;
1656         // for frame threading, the owner must be the second field's thread or
1657         // else the first thread can release the picture and reuse it unsafely
1658         out->owner2       = s;
1659         for (i = out_idx; h->delayed_pic[i]; i++)
1660             h->delayed_pic[i] = h->delayed_pic[i + 1];
1661     }
1662     if (!out_of_order && pics > s->avctx->has_b_frames) {
1663         h->next_output_pic = out;
1664         if (out_idx == 0 && h->delayed_pic[0] && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) {
1665             h->next_outputed_poc = INT_MIN;
1666         } else
1667             h->next_outputed_poc = out->poc;
1668     } else {
1669         av_log(s->avctx, AV_LOG_DEBUG, "no picture %s\n", out_of_order ? "ooo" : "");
1670     }
1671
1672     if (h->next_output_pic && h->next_output_pic->sync) {
1673         h->sync |= 2;
1674     }
1675
1676     if (setup_finished)
1677         ff_thread_finish_setup(s->avctx);
1678 }
1679
1680 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
1681                                               uint8_t *src_cb, uint8_t *src_cr,
1682                                               int linesize, int uvlinesize,
1683                                               int simple)
1684 {
1685     MpegEncContext *const s = &h->s;
1686     uint8_t *top_border;
1687     int top_idx = 1;
1688     const int pixel_shift = h->pixel_shift;
1689     int chroma444 = CHROMA444;
1690     int chroma422 = CHROMA422;
1691
1692     src_y  -= linesize;
1693     src_cb -= uvlinesize;
1694     src_cr -= uvlinesize;
1695
1696     if (!simple && FRAME_MBAFF) {
1697         if (s->mb_y & 1) {
1698             if (!MB_MBAFF) {
1699                 top_border = h->top_borders[0][s->mb_x];
1700                 AV_COPY128(top_border, src_y + 15 * linesize);
1701                 if (pixel_shift)
1702                     AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
1703                 if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
1704                     if (chroma444) {
1705                         if (pixel_shift) {
1706                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
1707                             AV_COPY128(top_border + 48, src_cb + 15 * uvlinesize + 16);
1708                             AV_COPY128(top_border + 64, src_cr + 15 * uvlinesize);
1709                             AV_COPY128(top_border + 80, src_cr + 15 * uvlinesize + 16);
1710                         } else {
1711                             AV_COPY128(top_border + 16, src_cb + 15 * uvlinesize);
1712                             AV_COPY128(top_border + 32, src_cr + 15 * uvlinesize);
1713                         }
1714                     } else if (chroma422) {
1715                         if (pixel_shift) {
1716                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
1717                             AV_COPY128(top_border + 48, src_cr + 15 * uvlinesize);
1718                         } else {
1719                             AV_COPY64(top_border + 16, src_cb + 15 * uvlinesize);
1720                             AV_COPY64(top_border + 24, src_cr + 15 * uvlinesize);
1721                         }
1722                     } else {
1723                         if (pixel_shift) {
1724                             AV_COPY128(top_border + 32, src_cb + 7 * uvlinesize);
1725                             AV_COPY128(top_border + 48, src_cr + 7 * uvlinesize);
1726                         } else {
1727                             AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1728                             AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1729                         }
1730                     }
1731                 }
1732             }
1733         } else if (MB_MBAFF) {
1734             top_idx = 0;
1735         } else
1736             return;
1737     }
1738
1739     top_border = h->top_borders[top_idx][s->mb_x];
1740     /* There are two lines saved, the line above the the top macroblock
1741      * of a pair, and the line above the bottom macroblock. */
1742     AV_COPY128(top_border, src_y + 16 * linesize);
1743     if (pixel_shift)
1744         AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);
1745
1746     if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
1747         if (chroma444) {
1748             if (pixel_shift) {
1749                 AV_COPY128(top_border + 32, src_cb + 16 * linesize);
1750                 AV_COPY128(top_border + 48, src_cb + 16 * linesize + 16);
1751                 AV_COPY128(top_border + 64, src_cr + 16 * linesize);
1752                 AV_COPY128(top_border + 80, src_cr + 16 * linesize + 16);
1753             } else {
1754                 AV_COPY128(top_border + 16, src_cb + 16 * linesize);
1755                 AV_COPY128(top_border + 32, src_cr + 16 * linesize);
1756             }
1757         } else if (chroma422) {
1758             if (pixel_shift) {
1759                 AV_COPY128(top_border + 32, src_cb + 16 * uvlinesize);
1760                 AV_COPY128(top_border + 48, src_cr + 16 * uvlinesize);
1761             } else {
1762                 AV_COPY64(top_border + 16, src_cb + 16 * uvlinesize);
1763                 AV_COPY64(top_border + 24, src_cr + 16 * uvlinesize);
1764             }
1765         } else {
1766             if (pixel_shift) {
1767                 AV_COPY128(top_border + 32, src_cb + 8 * uvlinesize);
1768                 AV_COPY128(top_border + 48, src_cr + 8 * uvlinesize);
1769             } else {
1770                 AV_COPY64(top_border + 16, src_cb + 8 * uvlinesize);
1771                 AV_COPY64(top_border + 24, src_cr + 8 * uvlinesize);
1772             }
1773         }
1774     }
1775 }
1776
1777 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
1778                                             uint8_t *src_cb, uint8_t *src_cr,
1779                                             int linesize, int uvlinesize,
1780                                             int xchg, int chroma444,
1781                                             int simple, int pixel_shift)
1782 {
1783     MpegEncContext *const s = &h->s;
1784     int deblock_topleft;
1785     int deblock_top;
1786     int top_idx = 1;
1787     uint8_t *top_border_m1;
1788     uint8_t *top_border;
1789
1790     if (!simple && FRAME_MBAFF) {
1791         if (s->mb_y & 1) {
1792             if (!MB_MBAFF)
1793                 return;
1794         } else {
1795             top_idx = MB_MBAFF ? 0 : 1;
1796         }
1797     }
1798
1799     if (h->deblocking_filter == 2) {
1800         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
1801         deblock_top     = h->top_type;
1802     } else {
1803         deblock_topleft = (s->mb_x > 0);
1804         deblock_top     = (s->mb_y > !!MB_FIELD);
1805     }
1806
1807     src_y  -= linesize   + 1 + pixel_shift;
1808     src_cb -= uvlinesize + 1 + pixel_shift;
1809     src_cr -= uvlinesize + 1 + pixel_shift;
1810
1811     top_border_m1 = h->top_borders[top_idx][s->mb_x - 1];
1812     top_border    = h->top_borders[top_idx][s->mb_x];
1813
1814 #define XCHG(a, b, xchg)                        \
1815     if (pixel_shift) {                          \
1816         if (xchg) {                             \
1817             AV_SWAP64(b + 0, a + 0);            \
1818             AV_SWAP64(b + 8, a + 8);            \
1819         } else {                                \
1820             AV_COPY128(b, a);                   \
1821         }                                       \
1822     } else if (xchg)                            \
1823         AV_SWAP64(b, a);                        \
1824     else                                        \
1825         AV_COPY64(b, a);
1826
1827     if (deblock_top) {
1828         if (deblock_topleft) {
1829             XCHG(top_border_m1 + (8 << pixel_shift),
1830                  src_y - (7 << pixel_shift), 1);
1831         }
1832         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
1833         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
1834         if (s->mb_x + 1 < s->mb_width) {
1835             XCHG(h->top_borders[top_idx][s->mb_x + 1],
1836                  src_y + (17 << pixel_shift), 1);
1837         }
1838     }
1839     if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
1840         if (chroma444) {
1841             if (deblock_topleft) {
1842                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
1843                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
1844             }
1845             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
1846             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
1847             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
1848             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
1849             if (s->mb_x + 1 < s->mb_width) {
1850                 XCHG(h->top_borders[top_idx][s->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
1851                 XCHG(h->top_borders[top_idx][s->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
1852             }
1853         } else {
1854             if (deblock_top) {
1855                 if (deblock_topleft) {
1856                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
1857                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
1858                 }
1859                 XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1);
1860                 XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1);
1861             }
1862         }
1863     }
1864 }
1865
1866 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
1867                                         int index)
1868 {
1869     if (high_bit_depth) {
1870         return AV_RN32A(((int32_t *)mb) + index);
1871     } else
1872         return AV_RN16A(mb + index);
1873 }
1874
1875 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth,
1876                                          int index, int value)
1877 {
1878     if (high_bit_depth) {
1879         AV_WN32A(((int32_t *)mb) + index, value);
1880     } else
1881         AV_WN16A(mb + index, value);
1882 }
1883
1884 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
1885                                                        int mb_type, int is_h264,
1886                                                        int simple,
1887                                                        int transform_bypass,
1888                                                        int pixel_shift,
1889                                                        int *block_offset,
1890                                                        int linesize,
1891                                                        uint8_t *dest_y, int p)
1892 {
1893     MpegEncContext *const s = &h->s;
1894     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1895     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
1896     int i;
1897     int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1];
1898     block_offset += 16 * p;
1899     if (IS_INTRA4x4(mb_type)) {
1900         if (simple || !s->encoding) {
1901             if (IS_8x8DCT(mb_type)) {
1902                 if (transform_bypass) {
1903                     idct_dc_add  =
1904                     idct_add     = s->dsp.add_pixels8;
1905                 } else {
1906                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
1907                     idct_add    = h->h264dsp.h264_idct8_add;
1908                 }
1909                 for (i = 0; i < 16; i += 4) {
1910                     uint8_t *const ptr = dest_y + block_offset[i];
1911                     const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
1912                     if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
1913                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1914                     } else {
1915                         const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
1916                         h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
1917                                              (h->topright_samples_available << i) & 0x4000, linesize);
1918                         if (nnz) {
1919                             if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
1920                                 idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1921                             else
1922                                 idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1923                         }
1924                     }
1925                 }
1926             } else {
1927                 if (transform_bypass) {
1928                     idct_dc_add  =
1929                         idct_add = s->dsp.add_pixels4;
1930                 } else {
1931                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
1932                     idct_add    = h->h264dsp.h264_idct_add;
1933                 }
1934                 for (i = 0; i < 16; i++) {
1935                     uint8_t *const ptr = dest_y + block_offset[i];
1936                     const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
1937
1938                     if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
1939                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1940                     } else {
1941                         uint8_t *topright;
1942                         int nnz, tr;
1943                         uint64_t tr_high;
1944                         if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
1945                             const int topright_avail = (h->topright_samples_available << i) & 0x8000;
1946                             assert(s->mb_y || linesize <= block_offset[i]);
1947                             if (!topright_avail) {
1948                                 if (pixel_shift) {
1949                                     tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
1950                                     topright = (uint8_t *)&tr_high;
1951                                 } else {
1952                                     tr       = ptr[3 - linesize] * 0x01010101u;
1953                                     topright = (uint8_t *)&tr;
1954                                 }
1955                             } else
1956                                 topright = ptr + (4 << pixel_shift) - linesize;
1957                         } else
1958                             topright = NULL;
1959
1960                         h->hpc.pred4x4[dir](ptr, topright, linesize);
1961                         nnz = h->non_zero_count_cache[scan8[i + p * 16]];
1962                         if (nnz) {
1963                             if (is_h264) {
1964                                 if (nnz == 1 && dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
1965                                     idct_dc_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1966                                 else
1967                                     idct_add(ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
1968                             } else if (CONFIG_SVQ3_DECODER)
1969                                 ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize, qscale, 0);
1970                         }
1971                     }
1972                 }
1973             }
1974         }
1975     } else {
1976         h->hpc.pred16x16[h->intra16x16_pred_mode](dest_y, linesize);
1977         if (is_h264) {
1978             if (h->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) {
1979                 if (!transform_bypass)
1980                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb + (p * 256 << pixel_shift),
1981                                                          h->mb_luma_dc[p],
1982                                                          h->dequant4_coeff[p][qscale][0]);
1983                 else {
1984                     static const uint8_t dc_mapping[16] = {
1985                          0 * 16,  1 * 16,  4 * 16,  5 * 16,
1986                          2 * 16,  3 * 16,  6 * 16,  7 * 16,
1987                          8 * 16,  9 * 16, 12 * 16, 13 * 16,
1988                         10 * 16, 11 * 16, 14 * 16, 15 * 16 };
1989                     for (i = 0; i < 16; i++)
1990                         dctcoef_set(h->mb + (p * 256 << pixel_shift),
1991                                     pixel_shift, dc_mapping[i],
1992                                     dctcoef_get(h->mb_luma_dc[p],
1993                                                 pixel_shift, i));
1994                 }
1995             }
1996         } else if (CONFIG_SVQ3_DECODER)
1997             ff_svq3_luma_dc_dequant_idct_c(h->mb + p * 256,
1998                                            h->mb_luma_dc[p], qscale);
1999     }
2000 }
2001
2002 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
2003                                                     int is_h264, int simple,
2004                                                     int transform_bypass,
2005                                                     int pixel_shift,
2006                                                     int *block_offset,
2007                                                     int linesize,
2008                                                     uint8_t *dest_y, int p)
2009 {
2010     MpegEncContext *const s = &h->s;
2011     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2012     int i;
2013     block_offset += 16 * p;
2014     if (!IS_INTRA4x4(mb_type)) {
2015         if (is_h264) {
2016             if (IS_INTRA16x16(mb_type)) {
2017                 if (transform_bypass) {
2018                     if (h->sps.profile_idc == 244 &&
2019                         (h->intra16x16_pred_mode == VERT_PRED8x8 ||
2020                          h->intra16x16_pred_mode == HOR_PRED8x8)) {
2021                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset,
2022                                                                       h->mb + (p * 256 << pixel_shift),
2023                                                                       linesize);
2024                     } else {
2025                         for (i = 0; i < 16; i++)
2026                             if (h->non_zero_count_cache[scan8[i + p * 16]] ||
2027                                 dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
2028                                 s->dsp.add_pixels4(dest_y + block_offset[i],
2029                                                    h->mb + (i * 16 + p * 256 << pixel_shift),
2030                                                    linesize);
2031                     }
2032                 } else {
2033                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
2034                                                     h->mb + (p * 256 << pixel_shift),
2035                                                     linesize,
2036                                                     h->non_zero_count_cache + p * 5 * 8);
2037                 }
2038             } else if (h->cbp & 15) {
2039                 if (transform_bypass) {
2040                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2041                     idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8
2042                                                   : s->dsp.add_pixels4;
2043                     for (i = 0; i < 16; i += di)
2044                         if (h->non_zero_count_cache[scan8[i + p * 16]])
2045                             idct_add(dest_y + block_offset[i],
2046                                      h->mb + (i * 16 + p * 256 << pixel_shift),
2047                                      linesize);
2048                 } else {
2049                     if (IS_8x8DCT(mb_type))
2050                         h->h264dsp.h264_idct8_add4(dest_y, block_offset,
2051                                                    h->mb + (p * 256 << pixel_shift),
2052                                                    linesize,
2053                                                    h->non_zero_count_cache + p * 5 * 8);
2054                     else
2055                         h->h264dsp.h264_idct_add16(dest_y, block_offset,
2056                                                    h->mb + (p * 256 << pixel_shift),
2057                                                    linesize,
2058                                                    h->non_zero_count_cache + p * 5 * 8);
2059                 }
2060             }
2061         } else if (CONFIG_SVQ3_DECODER) {
2062             for (i = 0; i < 16; i++)
2063                 if (h->non_zero_count_cache[scan8[i + p * 16]] || h->mb[i * 16 + p * 256]) {
2064                     // FIXME benchmark weird rule, & below
2065                     uint8_t *const ptr = dest_y + block_offset[i];
2066                     ff_svq3_add_idct_c(ptr, h->mb + i * 16 + p * 256, linesize,
2067                                        s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2068                 }
2069         }
2070     }
2071 }
2072
2073 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple,
2074                                                    int pixel_shift)
2075 {
2076     MpegEncContext *const s = &h->s;
2077     const int mb_x    = s->mb_x;
2078     const int mb_y    = s->mb_y;
2079     const int mb_xy   = h->mb_xy;
2080     const int mb_type = s->current_picture.f.mb_type[mb_xy];
2081     uint8_t *dest_y, *dest_cb, *dest_cr;
2082     int linesize, uvlinesize /*dct_offset*/;
2083     int i, j;
2084     int *block_offset = &h->block_offset[0];
2085     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2086     /* is_h264 should always be true if SVQ3 is disabled. */
2087     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2088     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2089     const int block_h   = 16 >> s->chroma_y_shift;
2090     const int chroma422 = CHROMA422;
2091
2092     dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift)     + mb_y * s->linesize)  * 16;
2093     dest_cb = s->current_picture.f.data[1] +  (mb_x << pixel_shift) * 8 + mb_y * s->uvlinesize * block_h;
2094     dest_cr = s->current_picture.f.data[2] +  (mb_x << pixel_shift) * 8 + mb_y * s->uvlinesize * block_h;
2095
2096     s->dsp.prefetch(dest_y  + (s->mb_x & 3) * 4 * s->linesize   + (64 << pixel_shift), s->linesize,       4);
2097     s->dsp.prefetch(dest_cb + (s->mb_x & 7)     * s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
2098
2099     h->list_counts[mb_xy] = h->list_count;
2100
2101     if (!simple && MB_FIELD) {
2102         linesize     = h->mb_linesize = s->linesize * 2;
2103         uvlinesize   = h->mb_uvlinesize = s->uvlinesize * 2;
2104         block_offset = &h->block_offset[48];
2105         if (mb_y & 1) { // FIXME move out of this function?
2106             dest_y  -= s->linesize * 15;
2107             dest_cb -= s->uvlinesize * (block_h - 1);
2108             dest_cr -= s->uvlinesize * (block_h - 1);
2109         }
2110         if (FRAME_MBAFF) {
2111             int list;
2112             for (list = 0; list < h->list_count; list++) {
2113                 if (!USES_LIST(mb_type, list))
2114                     continue;
2115                 if (IS_16X16(mb_type)) {
2116                     int8_t *ref = &h->ref_cache[list][scan8[0]];
2117                     fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
2118                 } else {
2119                     for (i = 0; i < 16; i += 4) {
2120                         int ref = h->ref_cache[list][scan8[i]];
2121                         if (ref >= 0)
2122                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
2123                                            8, (16 + ref) ^ (s->mb_y & 1), 1);
2124                     }
2125                 }
2126             }
2127         }
2128     } else {
2129         linesize   = h->mb_linesize   = s->linesize;
2130         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2131         // dct_offset = s->linesize * 16;
2132     }
2133
2134     if (!simple && IS_INTRA_PCM(mb_type)) {
2135         const int bit_depth = h->sps.bit_depth_luma;
2136         if (pixel_shift) {
2137             int j;
2138             GetBitContext gb;
2139             init_get_bits(&gb, (uint8_t *)h->mb,
2140                           ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);
2141
2142             for (i = 0; i < 16; i++) {
2143                 uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
2144                 for (j = 0; j < 16; j++)
2145                     tmp_y[j] = get_bits(&gb, bit_depth);
2146             }
2147             if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
2148                 if (!h->sps.chroma_format_idc) {
2149                     for (i = 0; i < block_h; i++) {
2150                         uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
2151                         uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
2152                         for (j = 0; j < 8; j++) {
2153                             tmp_cb[j] = tmp_cr[j] = 1 << (bit_depth - 1);
2154                         }
2155                     }
2156                 } else {
2157                     for (i = 0; i < block_h; i++) {
2158                         uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
2159                         for (j = 0; j < 8; j++)
2160                             tmp_cb[j] = get_bits(&gb, bit_depth);
2161                     }
2162                     for (i = 0; i < block_h; i++) {
2163                         uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
2164                         for (j = 0; j < 8; j++)
2165                             tmp_cr[j] = get_bits(&gb, bit_depth);
2166                     }
2167                 }
2168             }
2169         } else {
2170             for (i = 0; i < 16; i++)
2171                 memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
2172             if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
2173                 if (!h->sps.chroma_format_idc) {
2174                     for (i = 0; i < 8; i++) {
2175                         memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
2176                         memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
2177                     }
2178                 } else {
2179                     uint8_t *src_cb = (uint8_t *)h->mb + 256;
2180                     uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
2181                     for (i = 0; i < block_h; i++) {
2182                         memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
2183                         memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
2184                     }
2185                 }
2186             }
2187         }
2188     } else {
2189         if (IS_INTRA(mb_type)) {
2190             if (h->deblocking_filter)
2191                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
2192                                uvlinesize, 1, 0, simple, pixel_shift);
2193
2194             if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
2195                 if (CHROMA) {
2196                     h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
2197                     h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
2198                 }
2199             }
2200
2201             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple,
2202                                       transform_bypass, pixel_shift,
2203                                       block_offset, linesize, dest_y, 0);
2204
2205             if (h->deblocking_filter)
2206                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
2207                                uvlinesize, 0, 0, simple, pixel_shift);
2208         } else if (is_h264) {
2209             if (chroma422) {
2210                 hl_motion_422(h, dest_y, dest_cb, dest_cr,
2211                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2212                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2213                               h->h264dsp.weight_h264_pixels_tab,
2214                               h->h264dsp.biweight_h264_pixels_tab,
2215                               pixel_shift);
2216             } else {
2217                 hl_motion_420(h, dest_y, dest_cb, dest_cr,
2218                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2219                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2220                               h->h264dsp.weight_h264_pixels_tab,
2221                               h->h264dsp.biweight_h264_pixels_tab,
2222                               pixel_shift);
2223             }
2224         }
2225
2226         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass,
2227                                pixel_shift, block_offset, linesize, dest_y, 0);
2228
2229         if ((simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) &&
2230             (h->cbp & 0x30)) {
2231             uint8_t *dest[2] = { dest_cb, dest_cr };
2232             if (transform_bypass) {
2233                 if (IS_INTRA(mb_type) && h->sps.profile_idc == 244 &&
2234                     (h->chroma_pred_mode == VERT_PRED8x8 ||
2235                      h->chroma_pred_mode == HOR_PRED8x8)) {
2236                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0],
2237                                                             block_offset + 16,
2238                                                             h->mb + (16 * 16 * 1 << pixel_shift),
2239                                                             uvlinesize);
2240                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1],
2241                                                             block_offset + 32,
2242                                                             h->mb + (16 * 16 * 2 << pixel_shift),
2243                                                             uvlinesize);
2244                 } else {
2245                     idct_add = s->dsp.add_pixels4;
2246                     for (j = 1; j < 3; j++) {
2247                         for (i = j * 16; i < j * 16 + 4; i++)
2248                             if (h->non_zero_count_cache[scan8[i]] ||
2249                                 dctcoef_get(h->mb, pixel_shift, i * 16))
2250                                 idct_add(dest[j - 1] + block_offset[i],
2251                                          h->mb + (i * 16 << pixel_shift),
2252                                          uvlinesize);
2253                         if (chroma422) {
2254                             for (i = j * 16 + 4; i < j * 16 + 8; i++)
2255                                 if (h->non_zero_count_cache[scan8[i + 4]] ||
2256                                     dctcoef_get(h->mb, pixel_shift, i * 16))
2257                                     idct_add(dest[j - 1] + block_offset[i + 4],
2258                                              h->mb + (i * 16 << pixel_shift),
2259                                              uvlinesize);
2260                         }
2261                     }
2262                 }
2263             } else {
2264                 if (is_h264) {
2265                     int qp[2];
2266                     if (chroma422) {
2267                         qp[0] = h->chroma_qp[0] + 3;
2268                         qp[1] = h->chroma_qp[1] + 3;
2269                     } else {
2270                         qp[0] = h->chroma_qp[0];
2271                         qp[1] = h->chroma_qp[1];
2272                     }
2273                     if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
2274                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 1 << pixel_shift),
2275                                                                h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
2276                     if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
2277                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 2 << pixel_shift),
2278                                                                h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
2279                     h->h264dsp.h264_idct_add8(dest, block_offset,
2280                                               h->mb, uvlinesize,
2281                                               h->non_zero_count_cache);
2282                 } else if (CONFIG_SVQ3_DECODER) {
2283                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 1,
2284                                                            h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][h->chroma_qp[0]][0]);
2285                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 2,
2286                                                            h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][h->chroma_qp[1]][0]);
2287                     for (j = 1; j < 3; j++) {
2288                         for (i = j * 16; i < j * 16 + 4; i++)
2289                             if (h->non_zero_count_cache[scan8[i]] || h->mb[i * 16]) {
2290                                 uint8_t *const ptr = dest[j - 1] + block_offset[i];
2291                                 ff_svq3_add_idct_c(ptr, h->mb + i * 16,
2292                                                    uvlinesize,
2293                                                    ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
2294                             }
2295                     }
2296                 }
2297             }
2298         }
2299     }
2300     if (h->cbp || IS_INTRA(mb_type)) {
2301         s->dsp.clear_blocks(h->mb);
2302         s->dsp.clear_blocks(h->mb + (24 * 16 << pixel_shift));
2303     }
2304 }
2305
2306 static av_always_inline void hl_decode_mb_444_internal(H264Context *h,
2307                                                        int simple,
2308                                                        int pixel_shift)
2309 {
2310     MpegEncContext *const s = &h->s;
2311     const int mb_x    = s->mb_x;
2312     const int mb_y    = s->mb_y;
2313     const int mb_xy   = h->mb_xy;
2314     const int mb_type = s->current_picture.f.mb_type[mb_xy];
2315     uint8_t *dest[3];
2316     int linesize;
2317     int i, j, p;
2318     int *block_offset = &h->block_offset[0];
2319     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2320     const int plane_count      = (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) ? 3 : 1;
2321
2322     for (p = 0; p < plane_count; p++) {
2323         dest[p] = s->current_picture.f.data[p] +
2324                   ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
2325         s->dsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << pixel_shift),
2326                         s->linesize, 4);
2327     }
2328
2329     h->list_counts[mb_xy] = h->list_count;
2330
2331     if (!simple && MB_FIELD) {
2332         linesize     = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
2333         block_offset = &h->block_offset[48];
2334         if (mb_y & 1) // FIXME move out of this function?
2335             for (p = 0; p < 3; p++)
2336                 dest[p] -= s->linesize * 15;
2337         if (FRAME_MBAFF) {
2338             int list;
2339             for (list = 0; list < h->list_count; list++) {
2340                 if (!USES_LIST(mb_type, list))
2341                     continue;
2342                 if (IS_16X16(mb_type)) {
2343                     int8_t *ref = &h->ref_cache[list][scan8[0]];
2344                     fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
2345                 } else {
2346                     for (i = 0; i < 16; i += 4) {
2347                         int ref = h->ref_cache[list][scan8[i]];
2348                         if (ref >= 0)
2349                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
2350                                            8, (16 + ref) ^ (s->mb_y & 1), 1);
2351                     }
2352                 }
2353             }
2354         }
2355     } else {
2356         linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize;
2357     }
2358
2359     if (!simple && IS_INTRA_PCM(mb_type)) {
2360         if (pixel_shift) {
2361             const int bit_depth = h->sps.bit_depth_luma;
2362             GetBitContext gb;
2363             init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth);
2364
2365             for (p = 0; p < plane_count; p++)
2366                 for (i = 0; i < 16; i++) {
2367                     uint16_t *tmp = (uint16_t *)(dest[p] + i * linesize);
2368                     for (j = 0; j < 16; j++)
2369                         tmp[j] = get_bits(&gb, bit_depth);
2370                 }
2371         } else {
2372             for (p = 0; p < plane_count; p++)
2373                 for (i = 0; i < 16; i++)
2374                     memcpy(dest[p] + i * linesize,
2375                            (uint8_t *)h->mb + p * 256 + i * 16, 16);
2376         }
2377     } else {
2378         if (IS_INTRA(mb_type)) {
2379             if (h->deblocking_filter)
2380                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
2381                                linesize, 1, 1, simple, pixel_shift);
2382
2383             for (p = 0; p < plane_count; p++)
2384                 hl_decode_mb_predict_luma(h, mb_type, 1, simple,
2385                                           transform_bypass, pixel_shift,
2386                                           block_offset, linesize, dest[p], p);
2387
2388             if (h->deblocking_filter)
2389                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
2390                                linesize, 0, 1, simple, pixel_shift);
2391         } else {
2392             hl_motion(h, dest[0], dest[1], dest[2],
2393                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2394                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2395                       h->h264dsp.weight_h264_pixels_tab,
2396                       h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
2397         }
2398
2399         for (p = 0; p < plane_count; p++)
2400             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass,
2401                                    pixel_shift, block_offset, linesize,
2402                                    dest[p], p);
2403     }
2404     if (h->cbp || IS_INTRA(mb_type)) {
2405         s->dsp.clear_blocks(h->mb);
2406         s->dsp.clear_blocks(h->mb + (24 * 16 << pixel_shift));
2407     }
2408 }
2409
2410 /**
2411  * Process a macroblock; this case avoids checks for expensive uncommon cases.
2412  */
2413 #define hl_decode_mb_simple(sh, bits)                          \
2414 static void hl_decode_mb_simple_ ## bits(H264Context *h)       \
2415 {                                                              \
2416     hl_decode_mb_internal(h, 1, sh);                           \
2417 }
2418
2419 hl_decode_mb_simple(0, 8)
2420 hl_decode_mb_simple(1, 16)
2421
2422 /**
2423  * Process a macroblock; this handles edge cases, such as interlacing.
2424  */
2425 static av_noinline void hl_decode_mb_complex(H264Context *h)
2426 {
2427     hl_decode_mb_internal(h, 0, h->pixel_shift);
2428 }
2429
2430 static av_noinline void hl_decode_mb_444_complex(H264Context *h)
2431 {
2432     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
2433 }
2434
2435 static av_noinline void hl_decode_mb_444_simple(H264Context *h)
2436 {
2437     hl_decode_mb_444_internal(h, 1, 0);
2438 }
2439
2440 void ff_h264_hl_decode_mb(H264Context *h)
2441 {
2442     MpegEncContext *const s = &h->s;
2443     const int mb_xy   = h->mb_xy;
2444     const int mb_type = s->current_picture.f.mb_type[mb_xy];
2445     int is_complex    = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2446
2447     if (CHROMA444) {
2448         if (is_complex || h->pixel_shift)
2449             hl_decode_mb_444_complex(h);
2450         else
2451             hl_decode_mb_444_simple(h);
2452     } else if (is_complex) {
2453         hl_decode_mb_complex(h);
2454     } else if (h->pixel_shift) {
2455         hl_decode_mb_simple_16(h);
2456     } else
2457         hl_decode_mb_simple_8(h);
2458 }
2459
2460 static int pred_weight_table(H264Context *h)
2461 {
2462     MpegEncContext *const s = &h->s;
2463     int list, i;
2464     int luma_def, chroma_def;
2465
2466     h->use_weight             = 0;
2467     h->use_weight_chroma      = 0;
2468     h->luma_log2_weight_denom = get_ue_golomb(&s->gb);
2469     if (h->sps.chroma_format_idc)
2470         h->chroma_log2_weight_denom = get_ue_golomb(&s->gb);
2471     luma_def   = 1 << h->luma_log2_weight_denom;
2472     chroma_def = 1 << h->chroma_log2_weight_denom;
2473
2474     for (list = 0; list < 2; list++) {
2475         h->luma_weight_flag[list]   = 0;
2476         h->chroma_weight_flag[list] = 0;
2477         for (i = 0; i < h->ref_count[list]; i++) {
2478             int luma_weight_flag, chroma_weight_flag;
2479
2480             luma_weight_flag = get_bits1(&s->gb);
2481             if (luma_weight_flag) {
2482                 h->luma_weight[i][list][0] = get_se_golomb(&s->gb);
2483                 h->luma_weight[i][list][1] = get_se_golomb(&s->gb);
2484                 if (h->luma_weight[i][list][0] != luma_def ||
2485                     h->luma_weight[i][list][1] != 0) {
2486                     h->use_weight             = 1;
2487                     h->luma_weight_flag[list] = 1;
2488                 }
2489             } else {
2490                 h->luma_weight[i][list][0] = luma_def;
2491                 h->luma_weight[i][list][1] = 0;
2492             }
2493
2494             if (h->sps.chroma_format_idc) {
2495                 chroma_weight_flag = get_bits1(&s->gb);
2496                 if (chroma_weight_flag) {
2497                     int j;
2498                     for (j = 0; j < 2; j++) {
2499                         h->chroma_weight[i][list][j][0] = get_se_golomb(&s->gb);
2500                         h->chroma_weight[i][list][j][1] = get_se_golomb(&s->gb);
2501                         if (h->chroma_weight[i][list][j][0] != chroma_def ||
2502                             h->chroma_weight[i][list][j][1] != 0) {
2503                             h->use_weight_chroma = 1;
2504                             h->chroma_weight_flag[list] = 1;
2505                         }
2506                     }
2507                 } else {
2508                     int j;
2509                     for (j = 0; j < 2; j++) {
2510                         h->chroma_weight[i][list][j][0] = chroma_def;
2511                         h->chroma_weight[i][list][j][1] = 0;
2512                     }
2513                 }
2514             }
2515         }
2516         if (h->slice_type_nos != AV_PICTURE_TYPE_B)
2517             break;
2518     }
2519     h->use_weight = h->use_weight || h->use_weight_chroma;
2520     return 0;
2521 }
2522
2523 /**
2524  * Initialize implicit_weight table.
2525  * @param field  0/1 initialize the weight for interlaced MBAFF
2526  *                -1 initializes the rest
2527  */
2528 static void implicit_weight_table(H264Context *h, int field)
2529 {
2530     MpegEncContext *const s = &h->s;
2531     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
2532
2533     for (i = 0; i < 2; i++) {
2534         h->luma_weight_flag[i]   = 0;
2535         h->chroma_weight_flag[i] = 0;
2536     }
2537
2538     if (field < 0) {
2539         if (s->picture_structure == PICT_FRAME) {
2540             cur_poc = s->current_picture_ptr->poc;
2541         } else {
2542             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
2543         }
2544         if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF &&
2545             h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
2546             h->use_weight = 0;
2547             h->use_weight_chroma = 0;
2548             return;
2549         }
2550         ref_start  = 0;
2551         ref_count0 = h->ref_count[0];
2552         ref_count1 = h->ref_count[1];
2553     } else {
2554         cur_poc    = s->current_picture_ptr->field_poc[field];
2555         ref_start  = 16;
2556         ref_count0 = 16 + 2 * h->ref_count[0];
2557         ref_count1 = 16 + 2 * h->ref_count[1];
2558     }
2559
2560     h->use_weight               = 2;
2561     h->use_weight_chroma        = 2;
2562     h->luma_log2_weight_denom   = 5;
2563     h->chroma_log2_weight_denom = 5;
2564
2565     for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
2566         int poc0 = h->ref_list[0][ref0].poc;
2567         for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
2568             int w = 32;
2569             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
2570                 int poc1 = h->ref_list[1][ref1].poc;
2571                 int td   = av_clip(poc1 - poc0, -128, 127);
2572                 if (td) {
2573                     int tb = av_clip(cur_poc - poc0, -128, 127);
2574                     int tx = (16384 + (FFABS(td) >> 1)) / td;
2575                     int dist_scale_factor = (tb * tx + 32) >> 8;
2576                     if (dist_scale_factor >= -64 && dist_scale_factor <= 128)
2577                         w = 64 - dist_scale_factor;
2578                 }
2579             }
2580             if (field < 0) {
2581                 h->implicit_weight[ref0][ref1][0] =
2582                 h->implicit_weight[ref0][ref1][1] = w;
2583             } else {
2584                 h->implicit_weight[ref0][ref1][field] = w;
2585             }
2586         }
2587     }
2588 }
2589
2590 /**
2591  * instantaneous decoder refresh.
2592  */
2593 static void idr(H264Context *h)
2594 {
2595     int i;
2596     ff_h264_remove_all_refs(h);
2597     h->prev_frame_num        = 0;
2598     h->prev_frame_num_offset = 0;
2599     h->prev_poc_msb          = 1<<16;
2600     h->prev_poc_lsb          = 0;
2601     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
2602         h->last_pocs[i] = INT_MIN;
2603 }
2604
2605 /* forget old pics after a seek */
2606 static void flush_dpb(AVCodecContext *avctx)
2607 {
2608     H264Context *h = avctx->priv_data;
2609     int i;
2610     for (i=0; i<=MAX_DELAYED_PIC_COUNT; i++) {
2611         if (h->delayed_pic[i])
2612             h->delayed_pic[i]->f.reference = 0;
2613         h->delayed_pic[i] = NULL;
2614     }
2615     h->outputed_poc = h->next_outputed_poc = INT_MIN;
2616     h->prev_interlaced_frame = 1;
2617     idr(h);
2618     h->prev_frame_num = -1;
2619     if (h->s.current_picture_ptr)
2620         h->s.current_picture_ptr->f.reference = 0;
2621     h->s.first_field = 0;
2622     ff_h264_reset_sei(h);
2623     ff_mpeg_flush(avctx);
2624     h->recovery_frame= -1;
2625     h->sync= 0;
2626 }
2627
2628 static int init_poc(H264Context *h)
2629 {
2630     MpegEncContext *const s = &h->s;
2631     const int max_frame_num = 1 << h->sps.log2_max_frame_num;
2632     int field_poc[2];
2633     Picture *cur = s->current_picture_ptr;
2634
2635     h->frame_num_offset = h->prev_frame_num_offset;
2636     if (h->frame_num < h->prev_frame_num)
2637         h->frame_num_offset += max_frame_num;
2638
2639     if (h->sps.poc_type == 0) {
2640         const int max_poc_lsb = 1 << h->sps.log2_max_poc_lsb;
2641
2642         if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb / 2)
2643             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2644         else if (h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb / 2)
2645             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2646         else
2647             h->poc_msb = h->prev_poc_msb;
2648         // printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2649         field_poc[0] =
2650         field_poc[1] = h->poc_msb + h->poc_lsb;
2651         if (s->picture_structure == PICT_FRAME)
2652             field_poc[1] += h->delta_poc_bottom;
2653     } else if (h->sps.poc_type == 1) {
2654         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2655         int i;
2656
2657         if (h->sps.poc_cycle_length != 0)
2658             abs_frame_num = h->frame_num_offset + h->frame_num;
2659         else
2660             abs_frame_num = 0;
2661
2662         if (h->nal_ref_idc == 0 && abs_frame_num > 0)
2663             abs_frame_num--;
2664
2665         expected_delta_per_poc_cycle = 0;
2666         for (i = 0; i < h->sps.poc_cycle_length; i++)
2667             // FIXME integrate during sps parse
2668             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[i];
2669
2670         if (abs_frame_num > 0) {
2671             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2672             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2673
2674             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2675             for (i = 0; i <= frame_num_in_poc_cycle; i++)
2676                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[i];
2677         } else
2678             expectedpoc = 0;
2679
2680         if (h->nal_ref_idc == 0)
2681             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2682
2683         field_poc[0] = expectedpoc + h->delta_poc[0];
2684         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2685
2686         if (s->picture_structure == PICT_FRAME)
2687             field_poc[1] += h->delta_poc[1];
2688     } else {
2689         int poc = 2 * (h->frame_num_offset + h->frame_num);
2690
2691         if (!h->nal_ref_idc)
2692             poc--;
2693
2694         field_poc[0] = poc;
2695         field_poc[1] = poc;
2696     }
2697
2698     if (s->picture_structure != PICT_BOTTOM_FIELD)
2699         s->current_picture_ptr->field_poc[0] = field_poc[0];
2700     if (s->picture_structure != PICT_TOP_FIELD)
2701         s->current_picture_ptr->field_poc[1] = field_poc[1];
2702     cur->poc = FFMIN(cur->field_poc[0], cur->field_poc[1]);
2703
2704     return 0;
2705 }
2706
2707 /**
2708  * initialize scan tables
2709  */
2710 static void init_scan_tables(H264Context *h)
2711 {
2712     int i;
2713     for (i = 0; i < 16; i++) {
2714 #define T(x) (x >> 2) | ((x << 2) & 0xF)
2715         h->zigzag_scan[i] = T(zigzag_scan[i]);
2716         h->field_scan[i]  = T(field_scan[i]);
2717 #undef T
2718     }
2719     for (i = 0; i < 64; i++) {
2720 #define T(x) (x >> 3) | ((x & 7) << 3)
2721         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
2722         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
2723         h->field_scan8x8[i]        = T(field_scan8x8[i]);
2724         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
2725 #undef T
2726     }
2727     if (h->sps.transform_bypass) { // FIXME same ugly
2728         memcpy(h->zigzag_scan_q0          , zigzag_scan             , sizeof(h->zigzag_scan_q0         ));
2729         memcpy(h->zigzag_scan8x8_q0       , ff_zigzag_direct        , sizeof(h->zigzag_scan8x8_q0      ));
2730         memcpy(h->zigzag_scan8x8_cavlc_q0 , zigzag_scan8x8_cavlc    , sizeof(h->zigzag_scan8x8_cavlc_q0));
2731         memcpy(h->field_scan_q0           , field_scan              , sizeof(h->field_scan_q0          ));
2732         memcpy(h->field_scan8x8_q0        , field_scan8x8           , sizeof(h->field_scan8x8_q0       ));
2733         memcpy(h->field_scan8x8_cavlc_q0  , field_scan8x8_cavlc     , sizeof(h->field_scan8x8_cavlc_q0 ));
2734     } else {
2735         memcpy(h->zigzag_scan_q0          , h->zigzag_scan          , sizeof(h->zigzag_scan_q0         ));
2736         memcpy(h->zigzag_scan8x8_q0       , h->zigzag_scan8x8       , sizeof(h->zigzag_scan8x8_q0      ));
2737         memcpy(h->zigzag_scan8x8_cavlc_q0 , h->zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0));
2738         memcpy(h->field_scan_q0           , h->field_scan           , sizeof(h->field_scan_q0          ));
2739         memcpy(h->field_scan8x8_q0        , h->field_scan8x8        , sizeof(h->field_scan8x8_q0       ));
2740         memcpy(h->field_scan8x8_cavlc_q0  , h->field_scan8x8_cavlc  , sizeof(h->field_scan8x8_cavlc_q0 ));
2741     }
2742 }
2743
2744 static int field_end(H264Context *h, int in_setup)
2745 {
2746     MpegEncContext *const s     = &h->s;
2747     AVCodecContext *const avctx = s->avctx;
2748     int err = 0;
2749     s->mb_y = 0;
2750
2751     if (!in_setup && !s->dropable)
2752         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
2753                                   s->picture_structure == PICT_BOTTOM_FIELD);
2754
2755     if (CONFIG_H264_VDPAU_DECODER &&
2756         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2757         ff_vdpau_h264_set_reference_frames(s);
2758
2759     if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
2760         if (!s->dropable) {
2761             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
2762             h->prev_poc_msb = h->poc_msb;
2763             h->prev_poc_lsb = h->poc_lsb;
2764         }
2765         h->prev_frame_num_offset = h->frame_num_offset;
2766         h->prev_frame_num        = h->frame_num;
2767         h->outputed_poc          = h->next_outputed_poc;
2768     }
2769
2770     if (avctx->hwaccel) {
2771         if (avctx->hwaccel->end_frame(avctx) < 0)
2772             av_log(avctx, AV_LOG_ERROR,
2773                    "hardware accelerator failed to decode picture\n");
2774     }
2775
2776     if (CONFIG_H264_VDPAU_DECODER &&
2777         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
2778         ff_vdpau_h264_picture_complete(s);
2779
2780     /*
2781      * FIXME: Error handling code does not seem to support interlaced
2782      * when slices span multiple rows
2783      * The ff_er_add_slice calls don't work right for bottom
2784      * fields; they cause massive erroneous error concealing
2785      * Error marking covers both fields (top and bottom).
2786      * This causes a mismatched s->error_count
2787      * and a bad error table. Further, the error count goes to
2788      * INT_MAX when called for bottom field, because mb_y is
2789      * past end by one (callers fault) and resync_mb_y != 0
2790      * causes problems for the first MB line, too.
2791      */
2792     if (!FIELD_PICTURE)
2793         ff_er_frame_end(s);
2794
2795     ff_MPV_frame_end(s);
2796
2797     h->current_slice = 0;
2798
2799     return err;
2800 }
2801
2802 /**
2803  * Replicate H264 "master" context to thread contexts.
2804  */
2805 static void clone_slice(H264Context *dst, H264Context *src)
2806 {
2807     memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
2808     dst->s.current_picture_ptr = src->s.current_picture_ptr;
2809     dst->s.current_picture     = src->s.current_picture;
2810     dst->s.linesize            = src->s.linesize;
2811     dst->s.uvlinesize          = src->s.uvlinesize;
2812     dst->s.first_field         = src->s.first_field;
2813
2814     dst->prev_poc_msb          = src->prev_poc_msb;
2815     dst->prev_poc_lsb          = src->prev_poc_lsb;
2816     dst->prev_frame_num_offset = src->prev_frame_num_offset;
2817     dst->prev_frame_num        = src->prev_frame_num;
2818     dst->short_ref_count       = src->short_ref_count;
2819
2820     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
2821     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
2822     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
2823     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
2824
2825     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
2826     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
2827 }
2828
2829 /**
2830  * Compute profile from profile_idc and constraint_set?_flags.
2831  *
2832  * @param sps SPS
2833  *
2834  * @return profile as defined by FF_PROFILE_H264_*
2835  */
2836 int ff_h264_get_profile(SPS *sps)
2837 {
2838     int profile = sps->profile_idc;
2839
2840     switch (sps->profile_idc) {
2841     case FF_PROFILE_H264_BASELINE:
2842         // constraint_set1_flag set to 1
2843         profile |= (sps->constraint_set_flags & 1 << 1) ? FF_PROFILE_H264_CONSTRAINED : 0;
2844         break;
2845     case FF_PROFILE_H264_HIGH_10:
2846     case FF_PROFILE_H264_HIGH_422:
2847     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
2848         // constraint_set3_flag set to 1
2849         profile |= (sps->constraint_set_flags & 1 << 3) ? FF_PROFILE_H264_INTRA : 0;
2850         break;
2851     }
2852
2853     return profile;
2854 }
2855
2856 /**
2857  * Decode a slice header.
2858  * This will also call ff_MPV_common_init() and frame_start() as needed.
2859  *
2860  * @param h h264context
2861  * @param h0 h264 master context (differs from 'h' when doing sliced based
2862  *           parallel decoding)
2863  *
2864  * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
2865  */
2866 static int decode_slice_header(H264Context *h, H264Context *h0)
2867 {
2868     MpegEncContext *const s  = &h->s;
2869     MpegEncContext *const s0 = &h0->s;
2870     unsigned int first_mb_in_slice;
2871     unsigned int pps_id;
2872     int num_ref_idx_active_override_flag;
2873     unsigned int slice_type, tmp, i, j;
2874     int default_ref_list_done = 0;
2875     int last_pic_structure, last_pic_dropable;
2876     int must_reinit;
2877
2878     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
2879     if ((s->avctx->flags2 & CODEC_FLAG2_FAST) &&
2880         !h->nal_ref_idc && !h->pixel_shift) {
2881         s->me.qpel_put = s->dsp.put_2tap_qpel_pixels_tab;
2882         s->me.qpel_avg = s->dsp.avg_2tap_qpel_pixels_tab;
2883     } else {
2884         s->me.qpel_put = s->dsp.put_h264_qpel_pixels_tab;
2885         s->me.qpel_avg = s->dsp.avg_h264_qpel_pixels_tab;
2886     }
2887
2888     first_mb_in_slice = get_ue_golomb_long(&s->gb);
2889
2890     if (first_mb_in_slice == 0) { // FIXME better field boundary detection
2891         if (h0->current_slice && FIELD_PICTURE) {
2892             field_end(h, 1);
2893         }
2894
2895         h0->current_slice = 0;
2896         if (!s0->first_field) {
2897             if (s->current_picture_ptr && !s->dropable &&
2898                 s->current_picture_ptr->owner2 == s) {
2899                 ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
2900                                           s->picture_structure == PICT_BOTTOM_FIELD);
2901             }
2902             s->current_picture_ptr = NULL;
2903         }
2904     }
2905
2906     slice_type = get_ue_golomb_31(&s->gb);
2907     if (slice_type > 9) {
2908         av_log(h->s.avctx, AV_LOG_ERROR,
2909                "slice type too large (%d) at %d %d\n",
2910                h->slice_type, s->mb_x, s->mb_y);
2911         return -1;
2912     }
2913     if (slice_type > 4) {
2914         slice_type -= 5;
2915         h->slice_type_fixed = 1;
2916     } else
2917         h->slice_type_fixed = 0;
2918
2919     slice_type = golomb_to_pict_type[slice_type];
2920     if (slice_type == AV_PICTURE_TYPE_I ||
2921         (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
2922         default_ref_list_done = 1;
2923     }
2924     h->slice_type     = slice_type;
2925     h->slice_type_nos = slice_type & 3;
2926
2927     // to make a few old functions happy, it's wrong though
2928     s->pict_type = h->slice_type;
2929
2930     pps_id = get_ue_golomb(&s->gb);
2931     if (pps_id >= MAX_PPS_COUNT) {
2932         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id %d out of range\n", pps_id);
2933         return -1;
2934     }
2935     if (!h0->pps_buffers[pps_id]) {
2936         av_log(h->s.avctx, AV_LOG_ERROR,
2937                "non-existing PPS %u referenced\n",
2938                pps_id);
2939         return -1;
2940     }
2941     h->pps = *h0->pps_buffers[pps_id];
2942
2943     if (!h0->sps_buffers[h->pps.sps_id]) {
2944         av_log(h->s.avctx, AV_LOG_ERROR,
2945                "non-existing SPS %u referenced\n",
2946                h->pps.sps_id);
2947         return -1;
2948     }
2949     h->sps = *h0->sps_buffers[h->pps.sps_id];
2950
2951     s->avctx->profile = ff_h264_get_profile(&h->sps);
2952     s->avctx->level   = h->sps.level_idc;
2953     s->avctx->refs    = h->sps.ref_frame_count;
2954
2955     must_reinit = (s->context_initialized &&
2956                     (   16*h->sps.mb_width != s->avctx->coded_width
2957                      || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != s->avctx->coded_height
2958                      || s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma
2959                      || h->cur_chroma_format_idc != h->sps.chroma_format_idc
2960                      || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio)));
2961
2962     if(must_reinit && (h != h0 || (s->avctx->active_thread_type & FF_THREAD_FRAME))) {
2963         av_log_missing_feature(s->avctx,
2964                                 "Width/height/bit depth/chroma idc changing with threads is", 0);
2965         return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
2966     }
2967
2968     s->mb_width  = h->sps.mb_width;
2969     s->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
2970
2971     h->b_stride = s->mb_width * 4;
2972
2973     s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
2974
2975     s->width  = 16 * s->mb_width;
2976     s->height = 16 * s->mb_height;
2977
2978     if(must_reinit) {
2979         free_tables(h, 0);
2980         flush_dpb(s->avctx);
2981         ff_MPV_common_end(s);
2982         h->list_count = 0;
2983         h->current_slice = 0;
2984     }
2985     if (!s->context_initialized) {
2986         if (h != h0) {
2987             av_log(h->s.avctx, AV_LOG_ERROR,
2988                    "Cannot (re-)initialize context during parallel decoding.\n");
2989             return -1;
2990         }
2991         if(   FFALIGN(s->avctx->width , 16                                 ) == s->width
2992            && FFALIGN(s->avctx->height, 16*(2 - h->sps.frame_mbs_only_flag)) == s->height
2993            && !h->sps.crop_right && !h->sps.crop_bottom
2994            && (s->avctx->width != s->width || s->avctx->height && s->height)
2995         ) {
2996             av_log(h->s.avctx, AV_LOG_DEBUG, "Using externally provided dimensions\n");
2997             s->avctx->coded_width  = s->width;
2998             s->avctx->coded_height = s->height;
2999         } else{
3000             avcodec_set_dimensions(s->avctx, s->width, s->height);
3001             s->avctx->width  -= (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
3002             s->avctx->height -= (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1) * (2 - h->sps.frame_mbs_only_flag);
3003         }
3004         s->avctx->sample_aspect_ratio = h->sps.sar;
3005         av_assert0(s->avctx->sample_aspect_ratio.den);
3006
3007         if (s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
3008             h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
3009             if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10 &&
3010                 (h->sps.bit_depth_luma != 9 || !CHROMA422)) {
3011                 s->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
3012                 h->cur_chroma_format_idc = h->sps.chroma_format_idc;
3013                 h->pixel_shift = h->sps.bit_depth_luma > 8;
3014
3015                 ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
3016                 ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
3017                 s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
3018                 ff_dsputil_init(&s->dsp, s->avctx);
3019             } else {
3020                 av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d chroma_idc: %d\n",
3021                        h->sps.bit_depth_luma, h->sps.chroma_format_idc);
3022                 return -1;
3023             }
3024         }
3025
3026         if (h->sps.video_signal_type_present_flag) {
3027             s->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG
3028                                                       : AVCOL_RANGE_MPEG;
3029             if (h->sps.colour_description_present_flag) {
3030                 s->avctx->color_primaries = h->sps.color_primaries;
3031                 s->avctx->color_trc       = h->sps.color_trc;
3032                 s->avctx->colorspace      = h->sps.colorspace;
3033             }
3034         }
3035
3036         if (h->sps.timing_info_present_flag) {
3037             int64_t den = h->sps.time_scale;
3038             if (h->x264_build < 44U)
3039                 den *= 2;
3040             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3041                       h->sps.num_units_in_tick, den, 1 << 30);
3042         }
3043
3044         switch (h->sps.bit_depth_luma) {
3045         case 9:
3046             if (CHROMA444) {
3047                 if (s->avctx->colorspace == AVCOL_SPC_RGB) {
3048                     s->avctx->pix_fmt = PIX_FMT_GBRP9;
3049                 } else
3050                     s->avctx->pix_fmt = PIX_FMT_YUV444P9;
3051             } else if (CHROMA422)
3052                 s->avctx->pix_fmt = PIX_FMT_YUV422P9;
3053             else
3054                 s->avctx->pix_fmt = PIX_FMT_YUV420P9;
3055             break;
3056         case 10:
3057             if (CHROMA444) {
3058                 if (s->avctx->colorspace == AVCOL_SPC_RGB) {
3059                     s->avctx->pix_fmt = PIX_FMT_GBRP10;
3060                 } else
3061                     s->avctx->pix_fmt = PIX_FMT_YUV444P10;
3062             } else if (CHROMA422)
3063                 s->avctx->pix_fmt = PIX_FMT_YUV422P10;
3064             else
3065                 s->avctx->pix_fmt = PIX_FMT_YUV420P10;
3066             break;
3067         case 8:
3068             if (CHROMA444) {
3069                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P
3070                                                                                   : PIX_FMT_YUV444P;
3071                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
3072                         s->avctx->pix_fmt = PIX_FMT_GBR24P;
3073                         av_log(h->s.avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n");
3074                     } else if (s->avctx->colorspace == AVCOL_SPC_YCGCO) {
3075                         av_log(h->s.avctx, AV_LOG_WARNING, "Detected unsupported YCgCo colorspace.\n");
3076                     }
3077             } else if (CHROMA422) {
3078                 s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P
3079                                                                               : PIX_FMT_YUV422P;
3080             } else {
3081                 s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
3082                                                          s->avctx->codec->pix_fmts ?
3083                                                          s->avctx->codec->pix_fmts :
3084                                                          s->avctx->color_range == AVCOL_RANGE_JPEG ?
3085                                                          hwaccel_pixfmt_list_h264_jpeg_420 :
3086                                                          ff_hwaccel_pixfmt_list_420);
3087             }
3088             break;
3089         default:
3090             av_log(s->avctx, AV_LOG_ERROR,
3091                    "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
3092             return AVERROR_INVALIDDATA;
3093         }
3094
3095         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id,
3096                                             s->avctx->pix_fmt);
3097
3098         if (ff_MPV_common_init(s) < 0) {
3099             av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n");
3100             return -1;
3101         }
3102         s->first_field = 0;
3103         h->prev_interlaced_frame = 1;
3104
3105         init_scan_tables(h);
3106         if (ff_h264_alloc_tables(h) < 0) {
3107             av_log(h->s.avctx, AV_LOG_ERROR,
3108                    "Could not allocate memory for h264\n");
3109             return AVERROR(ENOMEM);
3110         }
3111
3112         if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_SLICE)) {
3113             if (context_init(h) < 0) {
3114                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
3115                 return -1;
3116             }
3117         } else {
3118             for (i = 1; i < s->slice_context_count; i++) {
3119                 H264Context *c;
3120                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3121                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3122                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3123                 c->h264dsp     = h->h264dsp;
3124                 c->sps         = h->sps;
3125                 c->pps         = h->pps;
3126                 c->pixel_shift = h->pixel_shift;
3127                 c->cur_chroma_format_idc = h->cur_chroma_format_idc;
3128                 init_scan_tables(c);
3129                 clone_tables(c, h, i);
3130             }
3131
3132             for (i = 0; i < s->slice_context_count; i++)
3133                 if (context_init(h->thread_context[i]) < 0) {
3134                     av_log(h->s.avctx, AV_LOG_ERROR,
3135                            "context_init() failed.\n");
3136                     return -1;
3137                 }
3138         }
3139     }
3140
3141     if (h == h0 && h->dequant_coeff_pps != pps_id) {
3142         h->dequant_coeff_pps = pps_id;
3143         init_dequant_tables(h);
3144     }
3145
3146     h->frame_num = get_bits(&s->gb, h->sps.log2_max_frame_num);
3147
3148     h->mb_mbaff        = 0;
3149     h->mb_aff_frame    = 0;
3150     last_pic_structure = s0->picture_structure;
3151     last_pic_dropable  = s->dropable;
3152     s->dropable        = h->nal_ref_idc == 0;
3153     if (h->sps.frame_mbs_only_flag) {
3154         s->picture_structure = PICT_FRAME;
3155     } else {
3156         if (!h->sps.direct_8x8_inference_flag && slice_type == AV_PICTURE_TYPE_B) {
3157             av_log(h->s.avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n");
3158             return -1;
3159         }
3160         if (get_bits1(&s->gb)) { // field_pic_flag
3161             s->picture_structure = PICT_TOP_FIELD + get_bits1(&s->gb); // bottom_field_flag
3162         } else {
3163             s->picture_structure = PICT_FRAME;
3164             h->mb_aff_frame      = h->sps.mb_aff;
3165         }
3166     }
3167     h->mb_field_decoding_flag = s->picture_structure != PICT_FRAME;
3168
3169     if (h0->current_slice != 0) {
3170         if (last_pic_structure != s->picture_structure ||
3171             last_pic_dropable  != s->dropable) {
3172             av_log(h->s.avctx, AV_LOG_ERROR,
3173                    "Changing field mode (%d -> %d) between slices is not allowed\n",
3174                    last_pic_structure, s->picture_structure);
3175             s->picture_structure = last_pic_structure;
3176             s->dropable          = last_pic_dropable;
3177             return AVERROR_INVALIDDATA;
3178         }
3179     } else {
3180         /* Shorten frame num gaps so we don't have to allocate reference
3181          * frames just to throw them away */
3182         if (h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0) {
3183             int unwrap_prev_frame_num = h->prev_frame_num;
3184             int max_frame_num         = 1 << h->sps.log2_max_frame_num;
3185
3186             if (unwrap_prev_frame_num > h->frame_num)
3187                 unwrap_prev_frame_num -= max_frame_num;
3188
3189             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
3190                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
3191                 if (unwrap_prev_frame_num < 0)
3192                     unwrap_prev_frame_num += max_frame_num;
3193
3194                 h->prev_frame_num = unwrap_prev_frame_num;
3195             }
3196         }
3197
3198         /* See if we have a decoded first field looking for a pair...
3199          * Here, we're using that to see if we should mark previously
3200          * decode frames as "finished".
3201          * We have to do that before the "dummy" in-between frame allocation,
3202          * since that can modify s->current_picture_ptr. */
3203         if (s0->first_field) {
3204             assert(s0->current_picture_ptr);
3205             assert(s0->current_picture_ptr->f.data[0]);
3206             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
3207
3208             /* Mark old field/frame as completed */
3209             if (!last_pic_dropable && s0->current_picture_ptr->owner2 == s0) {
3210                 ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
3211                                           last_pic_structure == PICT_BOTTOM_FIELD);
3212             }
3213
3214             /* figure out if we have a complementary field pair */
3215             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3216                 /* Previous field is unmatched. Don't display it, but let it
3217                  * remain for reference if marked as such. */
3218                 if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
3219                     ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
3220                                               last_pic_structure == PICT_TOP_FIELD);
3221                 }
3222             } else {
3223                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
3224                     /* This and previous field were reference, but had
3225                      * different frame_nums. Consider this field first in
3226                      * pair. Throw away previous field except for reference
3227                      * purposes. */
3228                     if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
3229                         ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
3230                                                   last_pic_structure == PICT_TOP_FIELD);
3231                     }
3232                 } else {
3233                     /* Second field in complementary pair */
3234                     if (!((last_pic_structure   == PICT_TOP_FIELD &&
3235                            s->picture_structure == PICT_BOTTOM_FIELD) ||
3236                           (last_pic_structure   == PICT_BOTTOM_FIELD &&
3237                            s->picture_structure == PICT_TOP_FIELD))) {
3238                         av_log(s->avctx, AV_LOG_ERROR,
3239                                "Invalid field mode combination %d/%d\n",
3240                                last_pic_structure, s->picture_structure);
3241                         s->picture_structure = last_pic_structure;
3242                         s->dropable          = last_pic_dropable;
3243                         return AVERROR_INVALIDDATA;
3244                     } else if (last_pic_dropable != s->dropable) {
3245                         av_log(s->avctx, AV_LOG_ERROR,
3246                                "Cannot combine reference and non-reference fields in the same frame\n");
3247                         av_log_ask_for_sample(s->avctx, NULL);
3248                         s->picture_structure = last_pic_structure;
3249                         s->dropable          = last_pic_dropable;
3250                         return AVERROR_INVALIDDATA;
3251                     }
3252
3253                     /* Take ownership of this buffer. Note that if another thread owned
3254                      * the first field of this buffer, we're not operating on that pointer,
3255                      * so the original thread is still responsible for reporting progress
3256                      * on that first field (or if that was us, we just did that above).
3257                      * By taking ownership, we assign responsibility to ourselves to
3258                      * report progress on the second field. */
3259                     s0->current_picture_ptr->owner2 = s0;
3260                 }
3261             }
3262         }
3263
3264         while (h->frame_num != h->prev_frame_num && h->prev_frame_num >= 0 &&
3265                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
3266             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
3267             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
3268                    h->frame_num, h->prev_frame_num);
3269             if (ff_h264_frame_start(h) < 0)
3270                 return -1;
3271             h->prev_frame_num++;
3272             h->prev_frame_num %= 1 << h->sps.log2_max_frame_num;
3273             s->current_picture_ptr->frame_num = h->prev_frame_num;
3274             ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
3275             ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 1);
3276             ff_generate_sliding_window_mmcos(h);
3277             if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
3278                 (s->avctx->err_recognition & AV_EF_EXPLODE))
3279                 return AVERROR_INVALIDDATA;
3280             /* Error concealment: if a ref is missing, copy the previous ref in its place.
3281              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
3282              * about there being no actual duplicates.
3283              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
3284              * concealing a lost frame, this probably isn't noticeable by comparison, but it should
3285              * be fixed. */
3286             if (h->short_ref_count) {
3287                 if (prev) {
3288                     av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
3289                                   (const uint8_t **)prev->f.data, prev->f.linesize,
3290                                   s->avctx->pix_fmt, s->mb_width * 16, s->mb_height * 16);
3291                     h->short_ref[0]->poc = prev->poc + 2;
3292                 }
3293                 h->short_ref[0]->frame_num = h->prev_frame_num;
3294             }
3295         }
3296
3297         /* See if we have a decoded first field looking for a pair...
3298          * We're using that to see whether to continue decoding in that
3299          * frame, or to allocate a new one. */
3300         if (s0->first_field) {
3301             assert(s0->current_picture_ptr);
3302             assert(s0->current_picture_ptr->f.data[0]);
3303             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
3304
3305             /* figure out if we have a complementary field pair */
3306             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3307                 /* Previous field is unmatched. Don't display it, but let it
3308                  * remain for reference if marked as such. */
3309                 s0->current_picture_ptr = NULL;
3310                 s0->first_field         = FIELD_PICTURE;
3311             } else {
3312                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
3313                     ff_thread_report_progress((AVFrame*)s0->current_picture_ptr, INT_MAX,
3314                                               s0->picture_structure==PICT_BOTTOM_FIELD);
3315                     /* This and the previous field had different frame_nums.
3316                      * Consider this field first in pair. Throw away previous
3317                      * one except for reference purposes. */
3318                     s0->first_field         = 1;
3319                     s0->current_picture_ptr = NULL;
3320                 } else {
3321                     /* Second field in complementary pair */
3322                     s0->first_field = 0;
3323                 }
3324             }
3325         } else {
3326             /* Frame or first field in a potentially complementary pair */
3327             assert(!s0->current_picture_ptr);
3328             s0->first_field = FIELD_PICTURE;
3329         }
3330
3331         if (!FIELD_PICTURE || s0->first_field) {
3332             if (ff_h264_frame_start(h) < 0) {
3333                 s0->first_field = 0;
3334                 return -1;
3335             }
3336         } else {
3337             ff_release_unused_pictures(s, 0);
3338         }
3339     }
3340     if (h != h0)
3341         clone_slice(h, h0);
3342
3343     s->current_picture_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
3344
3345     assert(s->mb_num == s->mb_width * s->mb_height);
3346     if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3347         first_mb_in_slice >= s->mb_num) {
3348         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3349         return -1;
3350     }
3351     s->resync_mb_x = s->mb_x =  first_mb_in_slice % s->mb_width;
3352     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3353     if (s->picture_structure == PICT_BOTTOM_FIELD)
3354         s->resync_mb_y = s->mb_y = s->mb_y + 1;
3355     assert(s->mb_y < s->mb_height);
3356
3357     if (s->picture_structure == PICT_FRAME) {
3358         h->curr_pic_num = h->frame_num;
3359         h->max_pic_num  = 1 << h->sps.log2_max_frame_num;
3360     } else {
3361         h->curr_pic_num = 2 * h->frame_num + 1;
3362         h->max_pic_num  = 1 << (h->sps.log2_max_frame_num + 1);
3363     }
3364
3365     if (h->nal_unit_type == NAL_IDR_SLICE)
3366         get_ue_golomb(&s->gb); /* idr_pic_id */
3367
3368     if (h->sps.poc_type == 0) {
3369         h->poc_lsb = get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3370
3371         if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
3372             h->delta_poc_bottom = get_se_golomb(&s->gb);
3373     }
3374
3375     if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
3376         h->delta_poc[0] = get_se_golomb(&s->gb);
3377
3378         if (h->pps.pic_order_present == 1 && s->picture_structure == PICT_FRAME)
3379             h->delta_poc[1] = get_se_golomb(&s->gb);
3380     }
3381
3382     init_poc(h);
3383
3384     if (h->pps.redundant_pic_cnt_present)
3385         h->redundant_pic_count = get_ue_golomb(&s->gb);
3386
3387     // set defaults, might be overridden a few lines later
3388     h->ref_count[0] = h->pps.ref_count[0];
3389     h->ref_count[1] = h->pps.ref_count[1];
3390
3391     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3392         unsigned max[2];
3393         max[0] = max[1] = s->picture_structure == PICT_FRAME ? 15 : 31;
3394
3395         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3396             h->direct_spatial_mv_pred = get_bits1(&s->gb);
3397         num_ref_idx_active_override_flag = get_bits1(&s->gb);
3398
3399         if (num_ref_idx_active_override_flag) {
3400             h->ref_count[0] = get_ue_golomb(&s->gb) + 1;
3401             if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3402                 h->ref_count[1] = get_ue_golomb(&s->gb) + 1;
3403             else
3404                 // full range is spec-ok in this case, even for frames
3405                 max[1] = 31;
3406         }
3407
3408         if (h->ref_count[0]-1 > max[0] || h->ref_count[1]-1 > max[1]){
3409             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow %u > %u or %u > %u\n", h->ref_count[0]-1, max[0], h->ref_count[1]-1, max[1]);
3410             h->ref_count[0] = h->ref_count[1] = 1;
3411             return AVERROR_INVALIDDATA;
3412         }
3413
3414         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
3415             h->list_count = 2;
3416         else
3417             h->list_count = 1;
3418     } else
3419         h->ref_count[1]= h->ref_count[0]= h->list_count= 0;
3420
3421     if (!default_ref_list_done)
3422         ff_h264_fill_default_ref_list(h);
3423
3424     if (h->slice_type_nos != AV_PICTURE_TYPE_I &&
3425         ff_h264_decode_ref_pic_list_reordering(h) < 0) {
3426         h->ref_count[1] = h->ref_count[0] = 0;
3427         return -1;
3428     }
3429
3430     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
3431         s->last_picture_ptr = &h->ref_list[0][0];
3432         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3433     }
3434     if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
3435         s->next_picture_ptr = &h->ref_list[1][0];
3436         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3437     }
3438
3439     if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
3440         (h->pps.weighted_bipred_idc == 1 &&
3441          h->slice_type_nos == AV_PICTURE_TYPE_B))
3442         pred_weight_table(h);
3443     else if (h->pps.weighted_bipred_idc == 2 &&
3444              h->slice_type_nos == AV_PICTURE_TYPE_B) {
3445         implicit_weight_table(h, -1);
3446     } else {
3447         h->use_weight = 0;
3448         for (i = 0; i < 2; i++) {
3449             h->luma_weight_flag[i]   = 0;
3450             h->chroma_weight_flag[i] = 0;
3451         }
3452     }
3453
3454     if (h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
3455         (s->avctx->err_recognition & AV_EF_EXPLODE))
3456         return AVERROR_INVALIDDATA;
3457
3458     if (FRAME_MBAFF) {
3459         ff_h264_fill_mbaff_ref_list(h);
3460
3461         if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) {
3462             implicit_weight_table(h, 0);
3463             implicit_weight_table(h, 1);
3464         }
3465     }
3466
3467     if (h->slice_type_nos == AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
3468         ff_h264_direct_dist_scale_factor(h);
3469     ff_h264_direct_ref_list_init(h);
3470
3471     if (h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac) {
3472         tmp = get_ue_golomb_31(&s->gb);
3473         if (tmp > 2) {
3474             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3475             return -1;
3476         }
3477         h->cabac_init_idc = tmp;
3478     }
3479
3480     h->last_qscale_diff = 0;
3481     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3482     if (tmp > 51 + 6 * (h->sps.bit_depth_luma - 8)) {
3483         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3484         return -1;
3485     }
3486     s->qscale       = tmp;
3487     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3488     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3489     // FIXME qscale / qp ... stuff
3490     if (h->slice_type == AV_PICTURE_TYPE_SP)
3491         get_bits1(&s->gb); /* sp_for_switch_flag */
3492     if (h->slice_type == AV_PICTURE_TYPE_SP ||
3493         h->slice_type == AV_PICTURE_TYPE_SI)
3494         get_se_golomb(&s->gb); /* slice_qs_delta */
3495
3496     h->deblocking_filter     = 1;
3497     h->slice_alpha_c0_offset = 52;
3498     h->slice_beta_offset     = 52;
3499     if (h->pps.deblocking_filter_parameters_present) {
3500         tmp = get_ue_golomb_31(&s->gb);
3501         if (tmp > 2) {
3502             av_log(s->avctx, AV_LOG_ERROR,
3503                    "deblocking_filter_idc %u out of range\n", tmp);
3504             return -1;
3505         }
3506         h->deblocking_filter = tmp;
3507         if (h->deblocking_filter < 2)
3508             h->deblocking_filter ^= 1;  // 1<->0
3509
3510         if (h->deblocking_filter) {
3511             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
3512             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
3513             if (h->slice_alpha_c0_offset > 104U ||
3514                 h->slice_beta_offset     > 104U) {
3515                 av_log(s->avctx, AV_LOG_ERROR,
3516                        "deblocking filter parameters %d %d out of range\n",
3517                        h->slice_alpha_c0_offset, h->slice_beta_offset);
3518                 return -1;
3519             }
3520         }
3521     }
3522
3523     if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
3524         (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&
3525          h->slice_type_nos != AV_PICTURE_TYPE_I) ||
3526         (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  &&
3527          h->slice_type_nos == AV_PICTURE_TYPE_B) ||
3528         (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
3529          h->nal_ref_idc == 0))
3530         h->deblocking_filter = 0;
3531
3532     if (h->deblocking_filter == 1 && h0->max_contexts > 1) {
3533         if (s->avctx->flags2 & CODEC_FLAG2_FAST) {
3534             /* Cheat slightly for speed:
3535              * Do not bother to deblock across slices. */
3536             h->deblocking_filter = 2;
3537         } else {
3538             h0->max_contexts = 1;
3539             if (!h0->single_decode_warning) {
3540                 av_log(s->avctx, AV_LOG_INFO,
3541                        "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3542                 h0->single_decode_warning = 1;
3543             }
3544             if (h != h0) {
3545                 av_log(h->s.avctx, AV_LOG_ERROR,
3546                        "Deblocking switched inside frame.\n");
3547                 return 1;
3548             }
3549         }
3550     }
3551     h->qp_thresh = 15 + 52 -
3552                    FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) -
3553                    FFMAX3(0,
3554                           h->pps.chroma_qp_index_offset[0],
3555                           h->pps.chroma_qp_index_offset[1]) +
3556                    6 * (h->sps.bit_depth_luma - 8);
3557
3558     h0->last_slice_type = slice_type;
3559     h->slice_num = ++h0->current_slice;
3560
3561     if (h->slice_num)
3562         h0->slice_row[(h->slice_num-1)&(MAX_SLICES-1)]= s->resync_mb_y;
3563     if (   h0->slice_row[h->slice_num&(MAX_SLICES-1)] + 3 >= s->resync_mb_y
3564         && h0->slice_row[h->slice_num&(MAX_SLICES-1)] <= s->resync_mb_y
3565         && h->slice_num >= MAX_SLICES) {
3566         //in case of ASO this check needs to be updated depending on how we decide to assign slice numbers in this case
3567         av_log(s->avctx, AV_LOG_WARNING, "Possibly too many slices (%d >= %d), increase MAX_SLICES and recompile if there are artifacts\n", h->slice_num, MAX_SLICES);
3568     }
3569
3570     for (j = 0; j < 2; j++) {
3571         int id_list[16];
3572         int *ref2frm = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][j];
3573         for (i = 0; i < 16; i++) {
3574             id_list[i] = 60;
3575             if (h->ref_list[j][i].f.data[0]) {
3576                 int k;
3577                 uint8_t *base = h->ref_list[j][i].f.base[0];
3578                 for (k = 0; k < h->short_ref_count; k++)
3579                     if (h->short_ref[k]->f.base[0] == base) {
3580                         id_list[i] = k;
3581                         break;
3582                     }
3583                 for (k = 0; k < h->long_ref_count; k++)
3584                     if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
3585                         id_list[i] = h->short_ref_count + k;
3586                         break;
3587                     }
3588             }
3589         }
3590
3591         ref2frm[0]     =
3592             ref2frm[1] = -1;
3593         for (i = 0; i < 16; i++)
3594             ref2frm[i + 2] = 4 * id_list[i] +
3595                              (h->ref_list[j][i].f.reference & 3);
3596         ref2frm[18 + 0]     =
3597             ref2frm[18 + 1] = -1;
3598         for (i = 16; i < 48; i++)
3599             ref2frm[i + 4] = 4 * id_list[(i - 16) >> 1] +
3600                              (h->ref_list[j][i].f.reference & 3);
3601     }
3602
3603     // FIXME: fix draw_edges + PAFF + frame threads
3604     h->emu_edge_width  = (s->flags & CODEC_FLAG_EMU_EDGE ||
3605                           (!h->sps.frame_mbs_only_flag &&
3606                            s->avctx->active_thread_type))
3607                          ? 0 : 16;
3608     h->emu_edge_height = (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3609
3610     if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
3611         av_log(h->s.avctx, AV_LOG_DEBUG,
3612                "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3613                h->slice_num,
3614                (s->picture_structure == PICT_FRAME ? "F" : s->picture_structure == PICT_TOP_FIELD ? "T" : "B"),
3615                first_mb_in_slice,
3616                av_get_picture_type_char(h->slice_type),
3617                h->slice_type_fixed ? " fix" : "",
3618                h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3619                pps_id, h->frame_num,
3620                s->current_picture_ptr->field_poc[0],
3621                s->current_picture_ptr->field_poc[1],
3622                h->ref_count[0], h->ref_count[1],
3623                s->qscale,
3624                h->deblocking_filter,
3625                h->slice_alpha_c0_offset / 2 - 26, h->slice_beta_offset / 2 - 26,
3626                h->use_weight,
3627                h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
3628                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
3629     }
3630
3631     return 0;
3632 }
3633
3634 int ff_h264_get_slice_type(const H264Context *h)
3635 {
3636     switch (h->slice_type) {
3637     case AV_PICTURE_TYPE_P:
3638         return 0;
3639     case AV_PICTURE_TYPE_B:
3640         return 1;
3641     case AV_PICTURE_TYPE_I:
3642         return 2;
3643     case AV_PICTURE_TYPE_SP:
3644         return 3;
3645     case AV_PICTURE_TYPE_SI:
3646         return 4;
3647     default:
3648         return -1;
3649     }
3650 }
3651
3652 static av_always_inline void fill_filter_caches_inter(H264Context *h,
3653                                                       MpegEncContext *const s,
3654                                                       int mb_type, int top_xy,
3655                                                       int left_xy[LEFT_MBS],
3656                                                       int top_type,
3657                                                       int left_type[LEFT_MBS],
3658                                                       int mb_xy, int list)
3659 {
3660     int b_stride = h->b_stride;
3661     int16_t(*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
3662     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
3663     if (IS_INTER(mb_type) || IS_DIRECT(mb_type)) {
3664         if (USES_LIST(top_type, list)) {
3665             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
3666             const int b8_xy = 4 * top_xy + 2;
3667             int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
3668             AV_COPY128(mv_dst - 1 * 8, s->current_picture.f.motion_val[list][b_xy + 0]);
3669             ref_cache[0 - 1 * 8] =
3670             ref_cache[1 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
3671             ref_cache[2 - 1 * 8] =
3672             ref_cache[3 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
3673         } else {
3674             AV_ZERO128(mv_dst - 1 * 8);
3675             AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3676         }
3677
3678         if (!IS_INTERLACED(mb_type ^ left_type[LTOP])) {
3679             if (USES_LIST(left_type[LTOP], list)) {
3680                 const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
3681                 const int b8_xy = 4 * left_xy[LTOP] + 1;
3682                 int (*ref2frm)[64] =(void*)( h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
3683                 AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride * 0]);
3684                 AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride * 1]);
3685                 AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride * 2]);
3686                 AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride * 3]);
3687                 ref_cache[-1 +  0] =
3688                 ref_cache[-1 +  8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2 * 0]];
3689                 ref_cache[-1 + 16] =
3690                 ref_cache[-1 + 24] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2 * 1]];
3691             } else {
3692                 AV_ZERO32(mv_dst - 1 +  0);
3693                 AV_ZERO32(mv_dst - 1 +  8);
3694                 AV_ZERO32(mv_dst - 1 + 16);
3695                 AV_ZERO32(mv_dst - 1 + 24);
3696                 ref_cache[-1 +  0] =
3697                 ref_cache[-1 +  8] =
3698                 ref_cache[-1 + 16] =
3699                 ref_cache[-1 + 24] = LIST_NOT_USED;
3700             }
3701         }
3702     }
3703
3704     if (!USES_LIST(mb_type, list)) {
3705         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0, 0), 4);
3706         AV_WN32A(&ref_cache[0 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3707         AV_WN32A(&ref_cache[1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3708         AV_WN32A(&ref_cache[2 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3709         AV_WN32A(&ref_cache[3 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
3710         return;
3711     }
3712
3713     {
3714         int8_t *ref = &s->current_picture.f.ref_index[list][4 * mb_xy];
3715         int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
3716         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
3717         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
3718         AV_WN32A(&ref_cache[0 * 8], ref01);
3719         AV_WN32A(&ref_cache[1 * 8], ref01);
3720         AV_WN32A(&ref_cache[2 * 8], ref23);
3721         AV_WN32A(&ref_cache[3 * 8], ref23);
3722     }
3723
3724     {
3725         int16_t(*mv_src)[2] = &s->current_picture.f.motion_val[list][4 * s->mb_x + 4 * s->mb_y * b_stride];
3726         AV_COPY128(mv_dst + 8 * 0, mv_src + 0 * b_stride);
3727         AV_COPY128(mv_dst + 8 * 1, mv_src + 1 * b_stride);
3728         AV_COPY128(mv_dst + 8 * 2, mv_src + 2 * b_stride);
3729         AV_COPY128(mv_dst + 8 * 3, mv_src + 3 * b_stride);
3730     }
3731 }
3732
3733 /**
3734  *
3735  * @return non zero if the loop filter can be skipped
3736  */
3737 static int fill_filter_caches(H264Context *h, int mb_type)
3738 {
3739     MpegEncContext *const s = &h->s;
3740     const int mb_xy = h->mb_xy;
3741     int top_xy, left_xy[LEFT_MBS];
3742     int top_type, left_type[LEFT_MBS];
3743     uint8_t *nnz;
3744     uint8_t *nnz_cache;
3745
3746     top_xy = mb_xy - (s->mb_stride << MB_FIELD);
3747
3748     /* Wow, what a mess, why didn't they simplify the interlacing & intra
3749      * stuff, I can't imagine that these complex rules are worth it. */
3750
3751     left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
3752     if (FRAME_MBAFF) {
3753         const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
3754         const int curr_mb_field_flag = IS_INTERLACED(mb_type);
3755         if (s->mb_y & 1) {
3756             if (left_mb_field_flag != curr_mb_field_flag)
3757                 left_xy[LTOP] -= s->mb_stride;
3758         } else {
3759             if (curr_mb_field_flag)
3760                 top_xy += s->mb_stride &
3761                     (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
3762             if (left_mb_field_flag != curr_mb_field_flag)
3763                 left_xy[LBOT] += s->mb_stride;
3764         }
3765     }
3766
3767     h->top_mb_xy        = top_xy;
3768     h->left_mb_xy[LTOP] = left_xy[LTOP];
3769     h->left_mb_xy[LBOT] = left_xy[LBOT];
3770     {
3771         /* For sufficiently low qp, filtering wouldn't do anything.
3772          * This is a conservative estimate: could also check beta_offset
3773          * and more accurate chroma_qp. */
3774         int qp_thresh = h->qp_thresh; // FIXME strictly we should store qp_thresh for each mb of a slice
3775         int qp        = s->current_picture.f.qscale_table[mb_xy];
3776         if (qp <= qp_thresh &&
3777             (left_xy[LTOP] < 0 ||
3778              ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh) &&
3779             (top_xy < 0 ||
3780              ((qp + s->current_picture.f.qscale_table[top_xy] + 1) >> 1) <= qp_thresh)) {
3781             if (!FRAME_MBAFF)
3782                 return 1;
3783             if ((left_xy[LTOP] < 0 ||
3784                  ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]] + 1) >> 1) <= qp_thresh) &&
3785                 (top_xy < s->mb_stride ||
3786                  ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
3787                 return 1;
3788         }
3789     }
3790
3791     top_type        = s->current_picture.f.mb_type[top_xy];
3792     left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
3793     left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
3794     if (h->deblocking_filter == 2) {
3795         if (h->slice_table[top_xy] != h->slice_num)
3796             top_type = 0;
3797         if (h->slice_table[left_xy[LBOT]] != h->slice_num)
3798             left_type[LTOP] = left_type[LBOT] = 0;
3799     } else {
3800         if (h->slice_table[top_xy] == 0xFFFF)
3801             top_type = 0;
3802         if (h->slice_table[left_xy[LBOT]] == 0xFFFF)
3803             left_type[LTOP] = left_type[LBOT] = 0;
3804     }
3805     h->top_type        = top_type;
3806     h->left_type[LTOP] = left_type[LTOP];
3807     h->left_type[LBOT] = left_type[LBOT];
3808
3809     if (IS_INTRA(mb_type))
3810         return 0;
3811
3812     fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy,
3813                              top_type, left_type, mb_xy, 0);
3814     if (h->list_count == 2)
3815         fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy,
3816                                  top_type, left_type, mb_xy, 1);
3817
3818     nnz       = h->non_zero_count[mb_xy];
3819     nnz_cache = h->non_zero_count_cache;
3820     AV_COPY32(&nnz_cache[4 + 8 * 1], &nnz[0]);
3821     AV_COPY32(&nnz_cache[4 + 8 * 2], &nnz[4]);
3822     AV_COPY32(&nnz_cache[4 + 8 * 3], &nnz[8]);
3823     AV_COPY32(&nnz_cache[4 + 8 * 4], &nnz[12]);
3824     h->cbp = h->cbp_table[mb_xy];
3825
3826     if (top_type) {
3827         nnz = h->non_zero_count[top_xy];
3828         AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[3 * 4]);
3829     }
3830
3831     if (left_type[LTOP]) {
3832         nnz = h->non_zero_count[left_xy[LTOP]];
3833         nnz_cache[3 + 8 * 1] = nnz[3 + 0 * 4];
3834         nnz_cache[3 + 8 * 2] = nnz[3 + 1 * 4];
3835         nnz_cache[3 + 8 * 3] = nnz[3 + 2 * 4];
3836         nnz_cache[3 + 8 * 4] = nnz[3 + 3 * 4];
3837     }
3838
3839     /* CAVLC 8x8dct requires NNZ values for residual decoding that differ
3840      * from what the loop filter needs */
3841     if (!CABAC && h->pps.transform_8x8_mode) {
3842         if (IS_8x8DCT(top_type)) {
3843             nnz_cache[4 + 8 * 0]     =
3844                 nnz_cache[5 + 8 * 0] = (h->cbp_table[top_xy] & 0x4000) >> 12;
3845             nnz_cache[6 + 8 * 0]     =
3846                 nnz_cache[7 + 8 * 0] = (h->cbp_table[top_xy] & 0x8000) >> 12;
3847         }
3848         if (IS_8x8DCT(left_type[LTOP])) {
3849             nnz_cache[3 + 8 * 1]     =
3850                 nnz_cache[3 + 8 * 2] = (h->cbp_table[left_xy[LTOP]] & 0x2000) >> 12; // FIXME check MBAFF
3851         }
3852         if (IS_8x8DCT(left_type[LBOT])) {
3853             nnz_cache[3 + 8 * 3]     =
3854                 nnz_cache[3 + 8 * 4] = (h->cbp_table[left_xy[LBOT]] & 0x8000) >> 12; // FIXME check MBAFF
3855         }
3856
3857         if (IS_8x8DCT(mb_type)) {
3858             nnz_cache[scan8[0]] =
3859             nnz_cache[scan8[1]] =
3860             nnz_cache[scan8[2]] =
3861             nnz_cache[scan8[3]] = (h->cbp & 0x1000) >> 12;
3862
3863             nnz_cache[scan8[0 + 4]] =
3864             nnz_cache[scan8[1 + 4]] =
3865             nnz_cache[scan8[2 + 4]] =
3866             nnz_cache[scan8[3 + 4]] = (h->cbp & 0x2000) >> 12;
3867
3868             nnz_cache[scan8[0 + 8]] =
3869             nnz_cache[scan8[1 + 8]] =
3870             nnz_cache[scan8[2 + 8]] =
3871             nnz_cache[scan8[3 + 8]] = (h->cbp & 0x4000) >> 12;
3872
3873             nnz_cache[scan8[0 + 12]] =
3874             nnz_cache[scan8[1 + 12]] =
3875             nnz_cache[scan8[2 + 12]] =
3876             nnz_cache[scan8[3 + 12]] = (h->cbp & 0x8000) >> 12;
3877         }
3878     }
3879
3880     return 0;
3881 }
3882
3883 static void loop_filter(H264Context *h, int start_x, int end_x)
3884 {
3885     MpegEncContext *const s = &h->s;
3886     uint8_t *dest_y, *dest_cb, *dest_cr;
3887     int linesize, uvlinesize, mb_x, mb_y;
3888     const int end_mb_y       = s->mb_y + FRAME_MBAFF;
3889     const int old_slice_type = h->slice_type;
3890     const int pixel_shift    = h->pixel_shift;
3891     const int block_h        = 16 >> s->chroma_y_shift;
3892
3893     if (h->deblocking_filter) {
3894         for (mb_x = start_x; mb_x < end_x; mb_x++)
3895             for (mb_y = end_mb_y - FRAME_MBAFF; mb_y <= end_mb_y; mb_y++) {
3896                 int mb_xy, mb_type;
3897                 mb_xy         = h->mb_xy = mb_x + mb_y * s->mb_stride;
3898                 h->slice_num  = h->slice_table[mb_xy];
3899                 mb_type       = s->current_picture.f.mb_type[mb_xy];
3900                 h->list_count = h->list_counts[mb_xy];
3901
3902                 if (FRAME_MBAFF)
3903                     h->mb_mbaff               =
3904                     h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
3905
3906                 s->mb_x = mb_x;
3907                 s->mb_y = mb_y;
3908                 dest_y  = s->current_picture.f.data[0] +
3909                           ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
3910                 dest_cb = s->current_picture.f.data[1] +
3911                           (mb_x << pixel_shift) * (8 << CHROMA444) +
3912                           mb_y * s->uvlinesize * block_h;
3913                 dest_cr = s->current_picture.f.data[2] +
3914                           (mb_x << pixel_shift) * (8 << CHROMA444) +
3915                           mb_y * s->uvlinesize * block_h;
3916                 // FIXME simplify above
3917
3918                 if (MB_FIELD) {
3919                     linesize   = h->mb_linesize   = s->linesize   * 2;
3920                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3921                     if (mb_y & 1) { // FIXME move out of this function?
3922                         dest_y  -= s->linesize   * 15;
3923                         dest_cb -= s->uvlinesize * (block_h - 1);
3924                         dest_cr -= s->uvlinesize * (block_h - 1);
3925                     }
3926                 } else {
3927                     linesize   = h->mb_linesize   = s->linesize;
3928                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3929                 }
3930                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
3931                                  uvlinesize, 0);
3932                 if (fill_filter_caches(h, mb_type))
3933                     continue;
3934                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
3935                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
3936
3937                 if (FRAME_MBAFF) {
3938                     ff_h264_filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr,
3939                                       linesize, uvlinesize);
3940                 } else {
3941                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb,
3942                                            dest_cr, linesize, uvlinesize);
3943                 }
3944             }
3945     }
3946     h->slice_type   = old_slice_type;
3947     s->mb_x         = end_x;
3948     s->mb_y         = end_mb_y - FRAME_MBAFF;
3949     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3950     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3951 }
3952
3953 static void predict_field_decoding_flag(H264Context *h)
3954 {
3955     MpegEncContext *const s = &h->s;
3956     const int mb_xy = s->mb_x + s->mb_y * s->mb_stride;
3957     int mb_type     = (h->slice_table[mb_xy - 1] == h->slice_num) ?
3958                       s->current_picture.f.mb_type[mb_xy - 1] :
3959                       (h->slice_table[mb_xy - s->mb_stride] == h->slice_num) ?
3960                       s->current_picture.f.mb_type[mb_xy - s->mb_stride] : 0;
3961     h->mb_mbaff     = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
3962 }
3963
3964 /**
3965  * Draw edges and report progress for the last MB row.
3966  */
3967 static void decode_finish_row(H264Context *h)
3968 {
3969     MpegEncContext *const s = &h->s;
3970     int top            = 16 * (s->mb_y      >> FIELD_PICTURE);
3971     int pic_height     = 16 *  s->mb_height >> FIELD_PICTURE;
3972     int height         =  16      << FRAME_MBAFF;
3973     int deblock_border = (16 + 4) << FRAME_MBAFF;
3974
3975     if (h->deblocking_filter) {
3976         if ((top + height) >= pic_height)
3977             height += deblock_border;
3978         top -= deblock_border;
3979     }
3980
3981     if (top >= pic_height || (top + height) < h->emu_edge_height)
3982         return;
3983
3984     height = FFMIN(height, pic_height - top);
3985     if (top < h->emu_edge_height) {
3986         height = top + height;
3987         top    = 0;
3988     }
3989
3990     ff_draw_horiz_band(s, top, height);
3991
3992     if (s->dropable)
3993         return;
3994
3995     ff_thread_report_progress(&s->current_picture_ptr->f, top + height - 1,
3996                               s->picture_structure == PICT_BOTTOM_FIELD);
3997 }
3998
3999 static int decode_slice(struct AVCodecContext *avctx, void *arg)
4000 {
4001     H264Context *h = *(void **)arg;
4002     MpegEncContext *const s = &h->s;
4003     const int part_mask     = s->partitioned_frame ? (ER_AC_END | ER_AC_ERROR)
4004                                                    : 0x7F;
4005     int lf_x_start = s->mb_x;
4006
4007     s->mb_skip_run = -1;
4008
4009     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME ||
4010                     s->codec_id != CODEC_ID_H264 ||
4011                     (CONFIG_GRAY && (s->flags & CODEC_FLAG_GRAY));
4012
4013     if (h->pps.cabac) {
4014         /* realign */
4015         align_get_bits(&s->gb);
4016
4017         /* init cabac */
4018         ff_init_cabac_states(&h->cabac);
4019         ff_init_cabac_decoder(&h->cabac,
4020                               s->gb.buffer + get_bits_count(&s->gb) / 8,
4021                               (get_bits_left(&s->gb) + 7) / 8);
4022
4023         ff_h264_init_cabac_states(h);
4024
4025         for (;;) {
4026             // START_TIMER
4027             int ret = ff_h264_decode_mb_cabac(h);
4028             int eos;
4029             // STOP_TIMER("decode_mb_cabac")
4030
4031             if (ret >= 0)
4032                 ff_h264_hl_decode_mb(h);
4033
4034             // FIXME optimal? or let mb_decode decode 16x32 ?
4035             if (ret >= 0 && FRAME_MBAFF) {
4036                 s->mb_y++;
4037
4038                 ret = ff_h264_decode_mb_cabac(h);
4039
4040                 if (ret >= 0)
4041                     ff_h264_hl_decode_mb(h);
4042                 s->mb_y--;
4043             }
4044             eos = get_cabac_terminate(&h->cabac);
4045
4046             if ((s->workaround_bugs & FF_BUG_TRUNCATED) &&
4047                 h->cabac.bytestream > h->cabac.bytestream_end + 2) {
4048                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x - 1,
4049                                 s->mb_y, ER_MB_END & part_mask);
4050                 if (s->mb_x >= lf_x_start)
4051                     loop_filter(h, lf_x_start, s->mb_x + 1);
4052                 return 0;
4053             }
4054             if (h->cabac.bytestream > h->cabac.bytestream_end + 2 )
4055                 av_log(h->s.avctx, AV_LOG_DEBUG, "bytestream overread %td\n", h->cabac.bytestream_end - h->cabac.bytestream);
4056             if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 4) {
4057                 av_log(h->s.avctx, AV_LOG_ERROR,
4058                        "error while decoding MB %d %d, bytestream (%td)\n",
4059                        s->mb_x, s->mb_y,
4060                        h->cabac.bytestream_end - h->cabac.bytestream);
4061                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x,
4062                                 s->mb_y, ER_MB_ERROR & part_mask);
4063                 return -1;
4064             }
4065
4066             if (++s->mb_x >= s->mb_width) {
4067                 loop_filter(h, lf_x_start, s->mb_x);
4068                 s->mb_x = lf_x_start = 0;
4069                 decode_finish_row(h);
4070                 ++s->mb_y;
4071                 if (FIELD_OR_MBAFF_PICTURE) {
4072                     ++s->mb_y;
4073                     if (FRAME_MBAFF && s->mb_y < s->mb_height)
4074                         predict_field_decoding_flag(h);
4075                 }
4076             }
4077
4078             if (eos || s->mb_y >= s->mb_height) {
4079                 tprintf(s->avctx, "slice end %d %d\n",
4080                         get_bits_count(&s->gb), s->gb.size_in_bits);
4081                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x - 1,
4082                                 s->mb_y, ER_MB_END & part_mask);
4083                 if (s->mb_x > lf_x_start)
4084                     loop_filter(h, lf_x_start, s->mb_x);
4085                 return 0;
4086             }
4087         }
4088     } else {
4089         for (;;) {
4090             int ret = ff_h264_decode_mb_cavlc(h);
4091
4092             if (ret >= 0)
4093                 ff_h264_hl_decode_mb(h);
4094
4095             // FIXME optimal? or let mb_decode decode 16x32 ?
4096             if (ret >= 0 && FRAME_MBAFF) {
4097                 s->mb_y++;
4098                 ret = ff_h264_decode_mb_cavlc(h);
4099
4100                 if (ret >= 0)
4101                     ff_h264_hl_decode_mb(h);
4102                 s->mb_y--;
4103             }
4104
4105             if (ret < 0) {
4106                 av_log(h->s.avctx, AV_LOG_ERROR,
4107                        "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
4108                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x,
4109                                 s->mb_y, ER_MB_ERROR & part_mask);
4110                 return -1;
4111             }
4112
4113             if (++s->mb_x >= s->mb_width) {
4114                 loop_filter(h, lf_x_start, s->mb_x);
4115                 s->mb_x = lf_x_start = 0;
4116                 decode_finish_row(h);
4117                 ++s->mb_y;
4118                 if (FIELD_OR_MBAFF_PICTURE) {
4119                     ++s->mb_y;
4120                     if (FRAME_MBAFF && s->mb_y < s->mb_height)
4121                         predict_field_decoding_flag(h);
4122                 }
4123                 if (s->mb_y >= s->mb_height) {
4124                     tprintf(s->avctx, "slice end %d %d\n",
4125                             get_bits_count(&s->gb), s->gb.size_in_bits);
4126
4127                     if (   get_bits_left(&s->gb) == 0
4128                         || get_bits_left(&s->gb) > 0 && !(s->avctx->err_recognition & AV_EF_AGGRESSIVE)) {
4129                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
4130                                         s->mb_x - 1, s->mb_y,
4131                                         ER_MB_END & part_mask);
4132
4133                         return 0;
4134                     } else {
4135                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
4136                                         s->mb_x, s->mb_y,
4137                                         ER_MB_END & part_mask);
4138
4139                         return -1;
4140                     }
4141                 }
4142             }
4143
4144             if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0) {
4145                 tprintf(s->avctx, "slice end %d %d\n",
4146                         get_bits_count(&s->gb), s->gb.size_in_bits);
4147                 if (get_bits_left(&s->gb) == 0) {
4148                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
4149                                     s->mb_x - 1, s->mb_y,
4150                                     ER_MB_END & part_mask);
4151                     if (s->mb_x > lf_x_start)
4152                         loop_filter(h, lf_x_start, s->mb_x);
4153
4154                     return 0;
4155                 } else {
4156                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x,
4157                                     s->mb_y, ER_MB_ERROR & part_mask);
4158
4159                     return -1;
4160                 }
4161             }
4162         }
4163     }
4164 }
4165
4166 /**
4167  * Call decode_slice() for each context.
4168  *
4169  * @param h h264 master context
4170  * @param context_count number of contexts to execute
4171  */
4172 static int execute_decode_slices(H264Context *h, int context_count)
4173 {
4174     MpegEncContext *const s     = &h->s;
4175     AVCodecContext *const avctx = s->avctx;
4176     H264Context *hx;
4177     int i;
4178
4179     if (s->avctx->hwaccel ||
4180         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
4181         return 0;
4182     if (context_count == 1) {
4183         return decode_slice(avctx, &h);
4184     } else {
4185         for (i = 1; i < context_count; i++) {
4186             hx                    = h->thread_context[i];
4187             hx->s.err_recognition = avctx->err_recognition;
4188             hx->s.error_count     = 0;
4189             hx->x264_build        = h->x264_build;
4190         }
4191
4192         avctx->execute(avctx, decode_slice, h->thread_context,
4193                        NULL, context_count, sizeof(void *));
4194
4195         /* pull back stuff from slices to master context */
4196         hx                   = h->thread_context[context_count - 1];
4197         s->mb_x              = hx->s.mb_x;
4198         s->mb_y              = hx->s.mb_y;
4199         s->dropable          = hx->s.dropable;
4200         s->picture_structure = hx->s.picture_structure;
4201         for (i = 1; i < context_count; i++)
4202             h->s.error_count += h->thread_context[i]->s.error_count;
4203     }
4204
4205     return 0;
4206 }
4207
4208 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
4209 {
4210     MpegEncContext *const s     = &h->s;
4211     AVCodecContext *const avctx = s->avctx;
4212     H264Context *hx; ///< thread context
4213     int buf_index;
4214     int context_count;
4215     int next_avc;
4216     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
4217     int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts
4218     int nal_index;
4219
4220     h->nal_unit_type= 0;
4221
4222     if(!s->slice_context_count)
4223          s->slice_context_count= 1;
4224     h->max_contexts = s->slice_context_count;
4225     if (!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
4226         h->current_slice = 0;
4227         if (!s->first_field)
4228             s->current_picture_ptr = NULL;
4229         ff_h264_reset_sei(h);
4230     }
4231
4232     for (; pass <= 1; pass++) {
4233         buf_index     = 0;
4234         context_count = 0;
4235         next_avc      = h->is_avc ? 0 : buf_size;
4236         nal_index     = 0;
4237         for (;;) {
4238             int consumed;
4239             int dst_length;
4240             int bit_length;
4241             const uint8_t *ptr;
4242             int i, nalsize = 0;
4243             int err;
4244
4245             if (buf_index >= next_avc) {
4246                 if (buf_index >= buf_size - h->nal_length_size)
4247                     break;
4248                 nalsize = 0;
4249                 for (i = 0; i < h->nal_length_size; i++)
4250                     nalsize = (nalsize << 8) | buf[buf_index++];
4251                 if (nalsize <= 0 || nalsize > buf_size - buf_index) {
4252                     av_log(h->s.avctx, AV_LOG_ERROR,
4253                            "AVC: nal size %d\n", nalsize);
4254                     break;
4255                 }
4256                 next_avc = buf_index + nalsize;
4257             } else {
4258                 // start code prefix search
4259                 for (; buf_index + 3 < next_avc; buf_index++)
4260                     // This should always succeed in the first iteration.
4261                     if (buf[buf_index]     == 0 &&
4262                         buf[buf_index + 1] == 0 &&
4263                         buf[buf_index + 2] == 1)
4264                         break;
4265
4266                 if (buf_index + 3 >= buf_size)
4267                     break;
4268
4269                 buf_index += 3;
4270                 if (buf_index >= next_avc)
4271                     continue;
4272             }
4273
4274             hx = h->thread_context[context_count];
4275
4276             ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
4277                                      &consumed, next_avc - buf_index);
4278             if (ptr == NULL || dst_length < 0) {
4279                 buf_index = -1;
4280                 goto end;
4281             }
4282             i = buf_index + consumed;
4283             if ((s->workaround_bugs & FF_BUG_AUTODETECT) && i + 3 < next_avc &&
4284                 buf[i]     == 0x00 && buf[i + 1] == 0x00 &&
4285                 buf[i + 2] == 0x01 && buf[i + 3] == 0xE0)
4286                 s->workaround_bugs |= FF_BUG_TRUNCATED;
4287
4288             if (!(s->workaround_bugs & FF_BUG_TRUNCATED))
4289                 while(dst_length > 0 && ptr[dst_length - 1] == 0)
4290                     dst_length--;
4291             bit_length = !dst_length ? 0
4292                                      : (8 * dst_length -
4293                                         decode_rbsp_trailing(h, ptr + dst_length - 1));
4294
4295             if (s->avctx->debug & FF_DEBUG_STARTCODE)
4296                 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d pass %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length, pass);
4297
4298             if (h->is_avc && (nalsize != consumed) && nalsize)
4299                 av_log(h->s.avctx, AV_LOG_DEBUG,
4300                        "AVC: Consumed only %d bytes instead of %d\n",
4301                        consumed, nalsize);
4302
4303             buf_index += consumed;
4304             nal_index++;
4305
4306             if (pass == 0) {
4307                 /* packets can sometimes contain multiple PPS/SPS,
4308                  * e.g. two PAFF field pictures in one packet, or a demuxer
4309                  * which splits NALs strangely if so, when frame threading we
4310                  * can't start the next thread until we've read all of them */
4311                 switch (hx->nal_unit_type) {
4312                 case NAL_SPS:
4313                 case NAL_PPS:
4314                     nals_needed = nal_index;
4315                     break;
4316                 case NAL_IDR_SLICE:
4317                 case NAL_SLICE:
4318                     init_get_bits(&hx->s.gb, ptr, bit_length);
4319                     if (!get_ue_golomb(&hx->s.gb))
4320                         nals_needed = nal_index;
4321                 }
4322                 continue;
4323             }
4324
4325             // FIXME do not discard SEI id
4326             if (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
4327                 continue;
4328
4329 again:
4330             err = 0;
4331             switch (hx->nal_unit_type) {
4332             case NAL_IDR_SLICE:
4333                 if (h->nal_unit_type != NAL_IDR_SLICE) {
4334                     av_log(h->s.avctx, AV_LOG_ERROR,
4335                            "Invalid mix of idr and non-idr slices\n");
4336                     buf_index = -1;
4337                     goto end;
4338                 }
4339                 idr(h); // FIXME ensure we don't lose some frames if there is reordering
4340             case NAL_SLICE:
4341                 init_get_bits(&hx->s.gb, ptr, bit_length);
4342                 hx->intra_gb_ptr        =
4343                     hx->inter_gb_ptr    = &hx->s.gb;
4344                 hx->s.data_partitioning = 0;
4345
4346                 if ((err = decode_slice_header(hx, h)))
4347                     break;
4348
4349                 if (   h->sei_recovery_frame_cnt >= 0
4350                     && (   h->recovery_frame<0
4351                         || ((h->recovery_frame - h->frame_num) & ((1 << h->sps.log2_max_frame_num)-1)) > h->sei_recovery_frame_cnt)) {
4352                     h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) %
4353                                         (1 << h->sps.log2_max_frame_num);
4354                 }
4355
4356                 s->current_picture_ptr->f.key_frame |=
4357                         (hx->nal_unit_type == NAL_IDR_SLICE);
4358
4359                 if (h->recovery_frame == h->frame_num) {
4360                     s->current_picture_ptr->sync |= 1;
4361                     h->recovery_frame = -1;
4362                 }
4363
4364                 h->sync |= !!s->current_picture_ptr->f.key_frame;
4365                 h->sync |= 3*!!(s->flags2 & CODEC_FLAG2_SHOW_ALL);
4366                 s->current_picture_ptr->sync |= h->sync;
4367
4368                 if (h->current_slice == 1) {
4369                     if (!(s->flags2 & CODEC_FLAG2_CHUNKS))
4370                         decode_postinit(h, nal_index >= nals_needed);
4371
4372                     if (s->avctx->hwaccel &&
4373                         s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
4374                         return -1;
4375                     if (CONFIG_H264_VDPAU_DECODER &&
4376                         s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
4377                         ff_vdpau_h264_picture_start(s);
4378                 }
4379
4380                 if (hx->redundant_pic_count == 0 &&
4381                     (avctx->skip_frame < AVDISCARD_NONREF ||
4382                      hx->nal_ref_idc) &&
4383                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
4384                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
4385                     (avctx->skip_frame < AVDISCARD_NONKEY ||
4386                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
4387                     avctx->skip_frame < AVDISCARD_ALL) {
4388                     if (avctx->hwaccel) {
4389                         if (avctx->hwaccel->decode_slice(avctx,
4390                                                          &buf[buf_index - consumed],
4391                                                          consumed) < 0)
4392                             return -1;
4393                     } else if (CONFIG_H264_VDPAU_DECODER &&
4394                                s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) {
4395                         static const uint8_t start_code[] = {
4396                             0x00, 0x00, 0x01 };
4397                         ff_vdpau_add_data_chunk(s, start_code,
4398                                                 sizeof(start_code));
4399                         ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed],
4400                                                 consumed);
4401                     } else
4402                         context_count++;
4403                 }
4404                 break;
4405             case NAL_DPA:
4406                 init_get_bits(&hx->s.gb, ptr, bit_length);
4407                 hx->intra_gb_ptr =
4408                 hx->inter_gb_ptr = NULL;
4409
4410                 if ((err = decode_slice_header(hx, h)) < 0)
4411                     break;
4412
4413                 hx->s.data_partitioning = 1;
4414                 break;
4415             case NAL_DPB:
4416                 init_get_bits(&hx->intra_gb, ptr, bit_length);
4417                 hx->intra_gb_ptr = &hx->intra_gb;
4418                 break;
4419             case NAL_DPC:
4420                 init_get_bits(&hx->inter_gb, ptr, bit_length);
4421                 hx->inter_gb_ptr = &hx->inter_gb;
4422
4423                 av_log(h->s.avctx, AV_LOG_ERROR, "Partitioned H.264 support is incomplete\n");
4424                 return AVERROR_PATCHWELCOME;
4425
4426                 if (hx->redundant_pic_count == 0 &&
4427                     hx->intra_gb_ptr &&
4428                     hx->s.data_partitioning &&
4429                     s->context_initialized &&
4430                     (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) &&
4431                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
4432                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
4433                     (avctx->skip_frame < AVDISCARD_NONKEY ||
4434                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
4435                     avctx->skip_frame < AVDISCARD_ALL)
4436                     context_count++;
4437                 break;
4438             case NAL_SEI:
4439                 init_get_bits(&s->gb, ptr, bit_length);
4440                 ff_h264_decode_sei(h);
4441                 break;
4442             case NAL_SPS:
4443                 init_get_bits(&s->gb, ptr, bit_length);
4444                 if (ff_h264_decode_seq_parameter_set(h) < 0 && (h->is_avc ? (nalsize != consumed) && nalsize : 1)) {
4445                     av_log(h->s.avctx, AV_LOG_DEBUG,
4446                            "SPS decoding failure, trying alternative mode\n");
4447                     if (h->is_avc)
4448                         av_assert0(next_avc - buf_index + consumed == nalsize);
4449                     init_get_bits(&s->gb, &buf[buf_index + 1 - consumed],
4450                                   8*(next_avc - buf_index + consumed - 1));
4451                     ff_h264_decode_seq_parameter_set(h);
4452                 }
4453
4454                 if (s->flags & CODEC_FLAG_LOW_DELAY ||
4455                     (h->sps.bitstream_restriction_flag &&
4456                      !h->sps.num_reorder_frames))
4457                     s->low_delay = 1;
4458                 if (avctx->has_b_frames < 2)
4459                     avctx->has_b_frames = !s->low_delay;
4460                 break;
4461             case NAL_PPS:
4462                 init_get_bits(&s->gb, ptr, bit_length);
4463                 ff_h264_decode_picture_parameter_set(h, bit_length);
4464                 break;
4465             case NAL_AUD:
4466             case NAL_END_SEQUENCE:
4467             case NAL_END_STREAM:
4468             case NAL_FILLER_DATA:
4469             case NAL_SPS_EXT:
4470             case NAL_AUXILIARY_SLICE:
4471                 break;
4472             default:
4473                 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n",
4474                        hx->nal_unit_type, bit_length);
4475             }
4476
4477             if (context_count == h->max_contexts) {
4478                 execute_decode_slices(h, context_count);
4479                 context_count = 0;
4480             }
4481
4482             if (err < 0)
4483                 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
4484             else if (err == 1) {
4485                 /* Slice could not be decoded in parallel mode, copy down
4486                  * NAL unit stuff to context 0 and restart. Note that
4487                  * rbsp_buffer is not transferred, but since we no longer
4488                  * run in parallel mode this should not be an issue. */
4489                 h->nal_unit_type = hx->nal_unit_type;
4490                 h->nal_ref_idc   = hx->nal_ref_idc;
4491                 hx               = h;
4492                 goto again;
4493             }
4494         }
4495     }
4496     if (context_count)
4497         execute_decode_slices(h, context_count);
4498
4499 end:
4500     /* clean up */
4501     if (s->current_picture_ptr && s->current_picture_ptr->owner2 == s &&
4502         !s->dropable) {
4503         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
4504                                   s->picture_structure == PICT_BOTTOM_FIELD);
4505     }
4506
4507     return buf_index;
4508 }
4509
4510 /**
4511  * Return the number of bytes consumed for building the current frame.
4512  */
4513 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size)
4514 {
4515     if (pos == 0)
4516         pos = 1;          // avoid infinite loops (i doubt that is needed but ...)
4517     if (pos + 10 > buf_size)
4518         pos = buf_size;                   // oops ;)
4519
4520     return pos;
4521 }
4522
4523 static int decode_frame(AVCodecContext *avctx, void *data,
4524                         int *data_size, AVPacket *avpkt)
4525 {
4526     const uint8_t *buf = avpkt->data;
4527     int buf_size       = avpkt->size;
4528     H264Context *h     = avctx->priv_data;
4529     MpegEncContext *s  = &h->s;
4530     AVFrame *pict      = data;
4531     int buf_index      = 0;
4532     Picture *out;
4533     int i, out_idx;
4534
4535     s->flags  = avctx->flags;
4536     s->flags2 = avctx->flags2;
4537
4538     /* end of stream, output what is still in the buffers */
4539     if (buf_size == 0) {
4540  out:
4541
4542         s->current_picture_ptr = NULL;
4543
4544         // FIXME factorize this with the output code below
4545         out     = h->delayed_pic[0];
4546         out_idx = 0;
4547         for (i = 1;
4548              h->delayed_pic[i] &&
4549              !h->delayed_pic[i]->f.key_frame &&
4550              !h->delayed_pic[i]->mmco_reset;
4551              i++)
4552             if (h->delayed_pic[i]->poc < out->poc) {
4553                 out     = h->delayed_pic[i];
4554                 out_idx = i;
4555             }
4556
4557         for (i = out_idx; h->delayed_pic[i]; i++)
4558             h->delayed_pic[i] = h->delayed_pic[i + 1];
4559
4560         if (out) {
4561             *data_size = sizeof(AVFrame);
4562             *pict      = out->f;
4563         }
4564
4565         return buf_index;
4566     }
4567     if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
4568         int cnt= buf[5]&0x1f;
4569         const uint8_t *p= buf+6;
4570         while(cnt--){
4571             int nalsize= AV_RB16(p) + 2;
4572             if(nalsize > buf_size - (p-buf) || p[2]!=0x67)
4573                 goto not_extra;
4574             p += nalsize;
4575         }
4576         cnt = *(p++);
4577         if(!cnt)
4578             goto not_extra;
4579         while(cnt--){
4580             int nalsize= AV_RB16(p) + 2;
4581             if(nalsize > buf_size - (p-buf) || p[2]!=0x68)
4582                 goto not_extra;
4583             p += nalsize;
4584         }
4585
4586         return ff_h264_decode_extradata(h, buf, buf_size);
4587     }
4588 not_extra:
4589
4590     buf_index = decode_nal_units(h, buf, buf_size);
4591     if (buf_index < 0)
4592         return -1;
4593
4594     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
4595         av_assert0(buf_index <= buf_size);
4596         goto out;
4597     }
4598
4599     if (!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr) {
4600         if (avctx->skip_frame >= AVDISCARD_NONREF ||
4601             buf_size >= 4 && !memcmp("Q264", buf, 4))
4602             return buf_size;
4603         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
4604         return -1;
4605     }
4606
4607     if (!(s->flags2 & CODEC_FLAG2_CHUNKS) ||
4608         (s->mb_y >= s->mb_height && s->mb_height)) {
4609         if (s->flags2 & CODEC_FLAG2_CHUNKS)
4610             decode_postinit(h, 1);
4611
4612         field_end(h, 0);
4613
4614         /* Wait for second field. */
4615         *data_size = 0;
4616         if (h->next_output_pic && (h->next_output_pic->sync || h->sync>1)) {
4617             *data_size = sizeof(AVFrame);
4618             *pict      = h->next_output_pic->f;
4619         }
4620     }
4621
4622     assert(pict->data[0] || !*data_size);
4623     ff_print_debug_info(s, pict);
4624     // printf("out %d\n", (int)pict->data[0]);
4625
4626     return get_consumed_bytes(s, buf_index, buf_size);
4627 }
4628
4629 av_cold void ff_h264_free_context(H264Context *h)
4630 {
4631     int i;
4632
4633     free_tables(h, 1); // FIXME cleanup init stuff perhaps
4634
4635     for (i = 0; i < MAX_SPS_COUNT; i++)
4636         av_freep(h->sps_buffers + i);
4637
4638     for (i = 0; i < MAX_PPS_COUNT; i++)
4639         av_freep(h->pps_buffers + i);
4640 }
4641
4642 static av_cold int h264_decode_end(AVCodecContext *avctx)
4643 {
4644     H264Context *h    = avctx->priv_data;
4645     MpegEncContext *s = &h->s;
4646
4647     ff_h264_remove_all_refs(h);
4648     ff_h264_free_context(h);
4649
4650     ff_MPV_common_end(s);
4651
4652     // memset(h, 0, sizeof(H264Context));
4653
4654     return 0;
4655 }
4656
4657 static const AVProfile profiles[] = {
4658     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
4659     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
4660     { FF_PROFILE_H264_MAIN,                 "Main"                  },
4661     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
4662     { FF_PROFILE_H264_HIGH,                 "High"                  },
4663     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
4664     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
4665     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
4666     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
4667     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
4668     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
4669     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
4670     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
4671     { FF_PROFILE_UNKNOWN },
4672 };
4673
4674 static const AVOption h264_options[] = {
4675     {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, 0},
4676     {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 4, 0},
4677     {NULL}
4678 };
4679
4680 static const AVClass h264_class = {
4681     "H264 Decoder",
4682     av_default_item_name,
4683     h264_options,
4684     LIBAVUTIL_VERSION_INT,
4685 };
4686
4687 static const AVClass h264_vdpau_class = {
4688     "H264 VDPAU Decoder",
4689     av_default_item_name,
4690     h264_options,
4691     LIBAVUTIL_VERSION_INT,
4692 };
4693
4694 AVCodec ff_h264_decoder = {
4695     .name                  = "h264",
4696     .type                  = AVMEDIA_TYPE_VIDEO,
4697     .id                    = CODEC_ID_H264,
4698     .priv_data_size        = sizeof(H264Context),
4699     .init                  = ff_h264_decode_init,
4700     .close                 = h264_decode_end,
4701     .decode                = decode_frame,
4702     .capabilities          = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
4703                              CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS |
4704                              CODEC_CAP_FRAME_THREADS,
4705     .flush                 = flush_dpb,
4706     .long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
4707     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
4708     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
4709     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
4710     .priv_class            = &h264_class,
4711 };
4712
4713 #if CONFIG_H264_VDPAU_DECODER
4714 AVCodec ff_h264_vdpau_decoder = {
4715     .name           = "h264_vdpau",
4716     .type           = AVMEDIA_TYPE_VIDEO,
4717     .id             = CODEC_ID_H264,
4718     .priv_data_size = sizeof(H264Context),
4719     .init           = ff_h264_decode_init,
4720     .close          = h264_decode_end,
4721     .decode         = decode_frame,
4722     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
4723     .flush          = flush_dpb,
4724     .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
4725     .pix_fmts       = (const enum PixelFormat[]) { PIX_FMT_VDPAU_H264,
4726                                                    PIX_FMT_NONE},
4727     .profiles       = NULL_IF_CONFIG_SMALL(profiles),
4728     .priv_class     = &h264_vdpau_class,
4729 };
4730 #endif