]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: Always consider VLC NALU type mismatch fatal
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/internal.h"
29 #include "libavutil/md5.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/stereo3d.h"
33
34 #include "bytestream.h"
35 #include "cabac_functions.h"
36 #include "dsputil.h"
37 #include "golomb.h"
38 #include "hevc.h"
39
40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
43
44 static const uint8_t scan_1x1[1] = { 0 };
45
46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
47
48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
49
50 static const uint8_t horiz_scan4x4_x[16] = {
51     0, 1, 2, 3,
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55 };
56
57 static const uint8_t horiz_scan4x4_y[16] = {
58     0, 0, 0, 0,
59     1, 1, 1, 1,
60     2, 2, 2, 2,
61     3, 3, 3, 3,
62 };
63
64 static const uint8_t horiz_scan8x8_inv[8][8] = {
65     {  0,  1,  2,  3, 16, 17, 18, 19, },
66     {  4,  5,  6,  7, 20, 21, 22, 23, },
67     {  8,  9, 10, 11, 24, 25, 26, 27, },
68     { 12, 13, 14, 15, 28, 29, 30, 31, },
69     { 32, 33, 34, 35, 48, 49, 50, 51, },
70     { 36, 37, 38, 39, 52, 53, 54, 55, },
71     { 40, 41, 42, 43, 56, 57, 58, 59, },
72     { 44, 45, 46, 47, 60, 61, 62, 63, },
73 };
74
75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
76
77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
78
79 static const uint8_t diag_scan2x2_inv[2][2] = {
80     { 0, 2, },
81     { 1, 3, },
82 };
83
84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
85     0, 0, 1, 0,
86     1, 2, 0, 1,
87     2, 3, 1, 2,
88     3, 2, 3, 3,
89 };
90
91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
92     0, 1, 0, 2,
93     1, 0, 3, 2,
94     1, 0, 3, 2,
95     1, 3, 2, 3,
96 };
97
98 static const uint8_t diag_scan4x4_inv[4][4] = {
99     { 0,  2,  5,  9, },
100     { 1,  4,  8, 12, },
101     { 3,  7, 11, 14, },
102     { 6, 10, 13, 15, },
103 };
104
105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
106     0, 0, 1, 0,
107     1, 2, 0, 1,
108     2, 3, 0, 1,
109     2, 3, 4, 0,
110     1, 2, 3, 4,
111     5, 0, 1, 2,
112     3, 4, 5, 6,
113     0, 1, 2, 3,
114     4, 5, 6, 7,
115     1, 2, 3, 4,
116     5, 6, 7, 2,
117     3, 4, 5, 6,
118     7, 3, 4, 5,
119     6, 7, 4, 5,
120     6, 7, 5, 6,
121     7, 6, 7, 7,
122 };
123
124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
125     0, 1, 0, 2,
126     1, 0, 3, 2,
127     1, 0, 4, 3,
128     2, 1, 0, 5,
129     4, 3, 2, 1,
130     0, 6, 5, 4,
131     3, 2, 1, 0,
132     7, 6, 5, 4,
133     3, 2, 1, 0,
134     7, 6, 5, 4,
135     3, 2, 1, 7,
136     6, 5, 4, 3,
137     2, 7, 6, 5,
138     4, 3, 7, 6,
139     5, 4, 7, 6,
140     5, 7, 6, 7,
141 };
142
143 static const uint8_t diag_scan8x8_inv[8][8] = {
144     {  0,  2,  5,  9, 14, 20, 27, 35, },
145     {  1,  4,  8, 13, 19, 26, 34, 42, },
146     {  3,  7, 12, 18, 25, 33, 41, 48, },
147     {  6, 11, 17, 24, 32, 40, 47, 53, },
148     { 10, 16, 23, 31, 39, 46, 52, 57, },
149     { 15, 22, 30, 38, 45, 51, 56, 60, },
150     { 21, 29, 37, 44, 50, 55, 59, 62, },
151     { 28, 36, 43, 49, 54, 58, 61, 63, },
152 };
153
154 /**
155  * NOTE: Each function hls_foo correspond to the function foo in the
156  * specification (HLS stands for High Level Syntax).
157  */
158
159 /**
160  * Section 5.7
161  */
162
163 /* free everything allocated  by pic_arrays_init() */
164 static void pic_arrays_free(HEVCContext *s)
165 {
166     av_freep(&s->sao);
167     av_freep(&s->deblock);
168     av_freep(&s->split_cu_flag);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size         = width * height;
195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
196                            ((height >> log2_min_cb_size) + 1);
197     int ctb_count        = sps->ctb_width * sps->ctb_height;
198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
199
200     s->bs_width  = width  >> 3;
201     s->bs_height = height >> 3;
202
203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
205     s->split_cu_flag = av_malloc(pic_size);
206     if (!s->sao || !s->deblock || !s->split_cu_flag)
207         goto fail;
208
209     s->skip_flag    = av_malloc(pic_size_in_ctb);
210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
211     if (!s->skip_flag || !s->tab_ct_depth)
212         goto fail;
213
214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
215     s->tab_ipm  = av_malloc(min_pu_size);
216     s->is_pcm   = av_malloc(min_pu_size);
217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
218         goto fail;
219
220     s->filter_slice_edges = av_malloc(ctb_count);
221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->tab_slice_address));
223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
224                                       sizeof(*s->qp_y_tab));
225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
226         goto fail;
227
228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
230     if (!s->horizontal_bs || !s->vertical_bs)
231         goto fail;
232
233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
234                                           av_buffer_alloc);
235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
236                                           av_buffer_allocz);
237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
238         goto fail;
239
240     return 0;
241
242 fail:
243     pic_arrays_free(s);
244     return AVERROR(ENOMEM);
245 }
246
247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
248 {
249     int i = 0;
250     int j = 0;
251     uint8_t luma_weight_l0_flag[16];
252     uint8_t chroma_weight_l0_flag[16];
253     uint8_t luma_weight_l1_flag[16];
254     uint8_t chroma_weight_l1_flag[16];
255
256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
257     if (s->sps->chroma_format_idc != 0) {
258         int delta = get_se_golomb(gb);
259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
260     }
261
262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
263         luma_weight_l0_flag[i] = get_bits1(gb);
264         if (!luma_weight_l0_flag[i]) {
265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
266             s->sh.luma_offset_l0[i] = 0;
267         }
268     }
269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
271             chroma_weight_l0_flag[i] = get_bits1(gb);
272     } else {
273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
274             chroma_weight_l0_flag[i] = 0;
275     }
276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
277         if (luma_weight_l0_flag[i]) {
278             int delta_luma_weight_l0 = get_se_golomb(gb);
279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
281         }
282         if (chroma_weight_l0_flag[i]) {
283             for (j = 0; j < 2; j++) {
284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
289             }
290         } else {
291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][0] = 0;
293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
294             s->sh.chroma_offset_l0[i][1] = 0;
295         }
296     }
297     if (s->sh.slice_type == B_SLICE) {
298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
299             luma_weight_l1_flag[i] = get_bits1(gb);
300             if (!luma_weight_l1_flag[i]) {
301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
302                 s->sh.luma_offset_l1[i] = 0;
303             }
304         }
305         if (s->sps->chroma_format_idc != 0) {
306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
307                 chroma_weight_l1_flag[i] = get_bits1(gb);
308         } else {
309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
310                 chroma_weight_l1_flag[i] = 0;
311         }
312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
313             if (luma_weight_l1_flag[i]) {
314                 int delta_luma_weight_l1 = get_se_golomb(gb);
315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
317             }
318             if (chroma_weight_l1_flag[i]) {
319                 for (j = 0; j < 2; j++) {
320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
325                 }
326             } else {
327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][0] = 0;
329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
330                 s->sh.chroma_offset_l1[i][1] = 0;
331             }
332         }
333     }
334 }
335
336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
337 {
338     const HEVCSPS *sps = s->sps;
339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
340     int prev_delta_msb = 0;
341     unsigned int nb_sps = 0, nb_sh;
342     int i;
343
344     rps->nb_refs = 0;
345     if (!sps->long_term_ref_pics_present_flag)
346         return 0;
347
348     if (sps->num_long_term_ref_pics_sps > 0)
349         nb_sps = get_ue_golomb_long(gb);
350     nb_sh = get_ue_golomb_long(gb);
351
352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
353         return AVERROR_INVALIDDATA;
354
355     rps->nb_refs = nb_sh + nb_sps;
356
357     for (i = 0; i < rps->nb_refs; i++) {
358         uint8_t delta_poc_msb_present;
359
360         if (i < nb_sps) {
361             uint8_t lt_idx_sps = 0;
362
363             if (sps->num_long_term_ref_pics_sps > 1)
364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
365
366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
368         } else {
369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
370             rps->used[i] = get_bits1(gb);
371         }
372
373         delta_poc_msb_present = get_bits1(gb);
374         if (delta_poc_msb_present) {
375             int delta = get_ue_golomb_long(gb);
376
377             if (i && i != nb_sps)
378                 delta += prev_delta_msb;
379
380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
381             prev_delta_msb = delta;
382         }
383     }
384
385     return 0;
386 }
387
388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
389 {
390     int ret;
391     int num = 0, den = 0;
392
393     pic_arrays_free(s);
394     ret = pic_arrays_init(s, sps);
395     if (ret < 0)
396         goto fail;
397
398     s->avctx->coded_width         = sps->width;
399     s->avctx->coded_height        = sps->height;
400     s->avctx->width               = sps->output_width;
401     s->avctx->height              = sps->output_height;
402     s->avctx->pix_fmt             = sps->pix_fmt;
403     s->avctx->sample_aspect_ratio = sps->vui.sar;
404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
405
406     if (sps->vui.video_signal_type_present_flag)
407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
408                                                                : AVCOL_RANGE_MPEG;
409     else
410         s->avctx->color_range = AVCOL_RANGE_MPEG;
411
412     if (sps->vui.colour_description_present_flag) {
413         s->avctx->color_primaries = sps->vui.colour_primaries;
414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
416     } else {
417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
420     }
421
422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
425
426     if (sps->sao_enabled) {
427         av_frame_unref(s->tmp_frame);
428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
429         if (ret < 0)
430             goto fail;
431         s->frame = s->tmp_frame;
432     }
433
434     s->sps = sps;
435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
436
437     if (s->vps->vps_timing_info_present_flag) {
438         num = s->vps->vps_num_units_in_tick;
439         den = s->vps->vps_time_scale;
440     } else if (sps->vui.vui_timing_info_present_flag) {
441         num = sps->vui.vui_num_units_in_tick;
442         den = sps->vui.vui_time_scale;
443     }
444
445     if (num != 0 && den != 0)
446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
447                   num, den, 1 << 30);
448
449     return 0;
450
451 fail:
452     pic_arrays_free(s);
453     s->sps = NULL;
454     return ret;
455 }
456
457 static int hls_slice_header(HEVCContext *s)
458 {
459     GetBitContext *gb = &s->HEVClc.gb;
460     SliceHeader *sh   = &s->sh;
461     int i, ret;
462
463     // Coded parameters
464     sh->first_slice_in_pic_flag = get_bits1(gb);
465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
466         s->seq_decode = (s->seq_decode + 1) & 0xff;
467         s->max_ra     = INT_MAX;
468         if (IS_IDR(s))
469             ff_hevc_clear_refs(s);
470     }
471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
473
474     sh->pps_id = get_ue_golomb_long(gb);
475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
477         return AVERROR_INVALIDDATA;
478     }
479     if (!sh->first_slice_in_pic_flag &&
480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
482         return AVERROR_INVALIDDATA;
483     }
484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
485
486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
488
489         ff_hevc_clear_refs(s);
490         ret = set_sps(s, s->sps);
491         if (ret < 0)
492             return ret;
493
494         s->seq_decode = (s->seq_decode + 1) & 0xff;
495         s->max_ra     = INT_MAX;
496     }
497
498     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
499     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
500
501     sh->dependent_slice_segment_flag = 0;
502     if (!sh->first_slice_in_pic_flag) {
503         int slice_address_length;
504
505         if (s->pps->dependent_slice_segments_enabled_flag)
506             sh->dependent_slice_segment_flag = get_bits1(gb);
507
508         slice_address_length = av_ceil_log2(s->sps->ctb_width *
509                                             s->sps->ctb_height);
510         sh->slice_segment_addr = get_bits(gb, slice_address_length);
511         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
512             av_log(s->avctx, AV_LOG_ERROR,
513                    "Invalid slice segment address: %u.\n",
514                    sh->slice_segment_addr);
515             return AVERROR_INVALIDDATA;
516         }
517
518         if (!sh->dependent_slice_segment_flag) {
519             sh->slice_addr = sh->slice_segment_addr;
520             s->slice_idx++;
521         }
522     } else {
523         sh->slice_segment_addr = sh->slice_addr = 0;
524         s->slice_idx           = 0;
525         s->slice_initialized   = 0;
526     }
527
528     if (!sh->dependent_slice_segment_flag) {
529         s->slice_initialized = 0;
530
531         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
532             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
533
534         sh->slice_type = get_ue_golomb_long(gb);
535         if (!(sh->slice_type == I_SLICE ||
536               sh->slice_type == P_SLICE ||
537               sh->slice_type == B_SLICE)) {
538             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
539                    sh->slice_type);
540             return AVERROR_INVALIDDATA;
541         }
542         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
543             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
544             return AVERROR_INVALIDDATA;
545         }
546
547         if (s->pps->output_flag_present_flag)
548             sh->pic_output_flag = get_bits1(gb);
549
550         if (s->sps->separate_colour_plane_flag)
551             sh->colour_plane_id = get_bits(gb, 2);
552
553         if (!IS_IDR(s)) {
554             int short_term_ref_pic_set_sps_flag, poc;
555
556             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
557             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
558             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
559                 av_log(s->avctx, AV_LOG_WARNING,
560                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
561                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
562                     return AVERROR_INVALIDDATA;
563                 poc = s->poc;
564             }
565             s->poc = poc;
566
567             short_term_ref_pic_set_sps_flag = get_bits1(gb);
568             if (!short_term_ref_pic_set_sps_flag) {
569                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
570                 if (ret < 0)
571                     return ret;
572
573                 sh->short_term_rps = &sh->slice_rps;
574             } else {
575                 int numbits, rps_idx;
576
577                 if (!s->sps->nb_st_rps) {
578                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
579                     return AVERROR_INVALIDDATA;
580                 }
581
582                 numbits = av_ceil_log2(s->sps->nb_st_rps);
583                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
584                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
585             }
586
587             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
588             if (ret < 0) {
589                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
590                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
591                     return AVERROR_INVALIDDATA;
592             }
593
594             if (s->sps->sps_temporal_mvp_enabled_flag)
595                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
596             else
597                 sh->slice_temporal_mvp_enabled_flag = 0;
598         } else {
599             s->sh.short_term_rps = NULL;
600             s->poc               = 0;
601         }
602
603         /* 8.3.1 */
604         if (s->temporal_id == 0 &&
605             s->nal_unit_type != NAL_TRAIL_N &&
606             s->nal_unit_type != NAL_TSA_N   &&
607             s->nal_unit_type != NAL_STSA_N  &&
608             s->nal_unit_type != NAL_RADL_N  &&
609             s->nal_unit_type != NAL_RADL_R  &&
610             s->nal_unit_type != NAL_RASL_N  &&
611             s->nal_unit_type != NAL_RASL_R)
612             s->pocTid0 = s->poc;
613
614         if (s->sps->sao_enabled) {
615             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
616             sh->slice_sample_adaptive_offset_flag[1] =
617             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
618         } else {
619             sh->slice_sample_adaptive_offset_flag[0] = 0;
620             sh->slice_sample_adaptive_offset_flag[1] = 0;
621             sh->slice_sample_adaptive_offset_flag[2] = 0;
622         }
623
624         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
625         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
626             int nb_refs;
627
628             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
629             if (sh->slice_type == B_SLICE)
630                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
631
632             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
633                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
634                 if (sh->slice_type == B_SLICE)
635                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
636             }
637             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
638                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
639                        sh->nb_refs[L0], sh->nb_refs[L1]);
640                 return AVERROR_INVALIDDATA;
641             }
642
643             sh->rpl_modification_flag[0] = 0;
644             sh->rpl_modification_flag[1] = 0;
645             nb_refs = ff_hevc_frame_nb_refs(s);
646             if (!nb_refs) {
647                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
648                 return AVERROR_INVALIDDATA;
649             }
650
651             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
652                 sh->rpl_modification_flag[0] = get_bits1(gb);
653                 if (sh->rpl_modification_flag[0]) {
654                     for (i = 0; i < sh->nb_refs[L0]; i++)
655                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
656                 }
657
658                 if (sh->slice_type == B_SLICE) {
659                     sh->rpl_modification_flag[1] = get_bits1(gb);
660                     if (sh->rpl_modification_flag[1] == 1)
661                         for (i = 0; i < sh->nb_refs[L1]; i++)
662                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
663                 }
664             }
665
666             if (sh->slice_type == B_SLICE)
667                 sh->mvd_l1_zero_flag = get_bits1(gb);
668
669             if (s->pps->cabac_init_present_flag)
670                 sh->cabac_init_flag = get_bits1(gb);
671             else
672                 sh->cabac_init_flag = 0;
673
674             sh->collocated_ref_idx = 0;
675             if (sh->slice_temporal_mvp_enabled_flag) {
676                 sh->collocated_list = L0;
677                 if (sh->slice_type == B_SLICE)
678                     sh->collocated_list = !get_bits1(gb);
679
680                 if (sh->nb_refs[sh->collocated_list] > 1) {
681                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
682                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
683                         av_log(s->avctx, AV_LOG_ERROR,
684                                "Invalid collocated_ref_idx: %d.\n",
685                                sh->collocated_ref_idx);
686                         return AVERROR_INVALIDDATA;
687                     }
688                 }
689             }
690
691             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
692                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
693                 pred_weight_table(s, gb);
694             }
695
696             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
697             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
698                 av_log(s->avctx, AV_LOG_ERROR,
699                        "Invalid number of merging MVP candidates: %d.\n",
700                        sh->max_num_merge_cand);
701                 return AVERROR_INVALIDDATA;
702             }
703         }
704
705         sh->slice_qp_delta = get_se_golomb(gb);
706
707         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
708             sh->slice_cb_qp_offset = get_se_golomb(gb);
709             sh->slice_cr_qp_offset = get_se_golomb(gb);
710         } else {
711             sh->slice_cb_qp_offset = 0;
712             sh->slice_cr_qp_offset = 0;
713         }
714
715         if (s->pps->deblocking_filter_control_present_flag) {
716             int deblocking_filter_override_flag = 0;
717
718             if (s->pps->deblocking_filter_override_enabled_flag)
719                 deblocking_filter_override_flag = get_bits1(gb);
720
721             if (deblocking_filter_override_flag) {
722                 sh->disable_deblocking_filter_flag = get_bits1(gb);
723                 if (!sh->disable_deblocking_filter_flag) {
724                     sh->beta_offset = get_se_golomb(gb) * 2;
725                     sh->tc_offset   = get_se_golomb(gb) * 2;
726                 }
727             } else {
728                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
729                 sh->beta_offset                    = s->pps->beta_offset;
730                 sh->tc_offset                      = s->pps->tc_offset;
731             }
732         } else {
733             sh->disable_deblocking_filter_flag = 0;
734             sh->beta_offset                    = 0;
735             sh->tc_offset                      = 0;
736         }
737
738         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
739             (sh->slice_sample_adaptive_offset_flag[0] ||
740              sh->slice_sample_adaptive_offset_flag[1] ||
741              !sh->disable_deblocking_filter_flag)) {
742             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
743         } else {
744             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
745         }
746     } else if (!s->slice_initialized) {
747         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
748         return AVERROR_INVALIDDATA;
749     }
750
751     sh->num_entry_point_offsets = 0;
752     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
753         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
754         if (sh->num_entry_point_offsets > 0) {
755             int offset_len = get_ue_golomb_long(gb) + 1;
756
757             for (i = 0; i < sh->num_entry_point_offsets; i++)
758                 skip_bits(gb, offset_len);
759         }
760     }
761
762     if (s->pps->slice_header_extension_present_flag) {
763         unsigned int length = get_ue_golomb_long(gb);
764         for (i = 0; i < length; i++)
765             skip_bits(gb, 8);  // slice_header_extension_data_byte
766     }
767
768     // Inferred parameters
769     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
770     if (sh->slice_qp > 51 ||
771         sh->slice_qp < -s->sps->qp_bd_offset) {
772         av_log(s->avctx, AV_LOG_ERROR,
773                "The slice_qp %d is outside the valid range "
774                "[%d, 51].\n",
775                sh->slice_qp,
776                -s->sps->qp_bd_offset);
777         return AVERROR_INVALIDDATA;
778     }
779
780     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
781
782     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
783         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
784         return AVERROR_INVALIDDATA;
785     }
786
787     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
788
789     if (!s->pps->cu_qp_delta_enabled_flag)
790         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
791                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
792
793     s->slice_initialized = 1;
794
795     return 0;
796 }
797
798 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
799
800 #define SET_SAO(elem, value)                            \
801 do {                                                    \
802     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
803         sao->elem = value;                              \
804     else if (sao_merge_left_flag)                       \
805         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
806     else if (sao_merge_up_flag)                         \
807         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
808     else                                                \
809         sao->elem = 0;                                  \
810 } while (0)
811
812 static void hls_sao_param(HEVCContext *s, int rx, int ry)
813 {
814     HEVCLocalContext *lc    = &s->HEVClc;
815     int sao_merge_left_flag = 0;
816     int sao_merge_up_flag   = 0;
817     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
818     SAOParams *sao          = &CTB(s->sao, rx, ry);
819     int c_idx, i;
820
821     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
822         s->sh.slice_sample_adaptive_offset_flag[1]) {
823         if (rx > 0) {
824             if (lc->ctb_left_flag)
825                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
826         }
827         if (ry > 0 && !sao_merge_left_flag) {
828             if (lc->ctb_up_flag)
829                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
830         }
831     }
832
833     for (c_idx = 0; c_idx < 3; c_idx++) {
834         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
835             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
836             continue;
837         }
838
839         if (c_idx == 2) {
840             sao->type_idx[2] = sao->type_idx[1];
841             sao->eo_class[2] = sao->eo_class[1];
842         } else {
843             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
844         }
845
846         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
847             continue;
848
849         for (i = 0; i < 4; i++)
850             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
851
852         if (sao->type_idx[c_idx] == SAO_BAND) {
853             for (i = 0; i < 4; i++) {
854                 if (sao->offset_abs[c_idx][i]) {
855                     SET_SAO(offset_sign[c_idx][i],
856                             ff_hevc_sao_offset_sign_decode(s));
857                 } else {
858                     sao->offset_sign[c_idx][i] = 0;
859                 }
860             }
861             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
862         } else if (c_idx != 2) {
863             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
864         }
865
866         // Inferred parameters
867         sao->offset_val[c_idx][0] = 0;
868         for (i = 0; i < 4; i++) {
869             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
870             if (sao->type_idx[c_idx] == SAO_EDGE) {
871                 if (i > 1)
872                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
873             } else if (sao->offset_sign[c_idx][i]) {
874                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
875             }
876         }
877     }
878 }
879
880 #undef SET_SAO
881 #undef CTB
882
883 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
884                                 int log2_trafo_size, enum ScanType scan_idx,
885                                 int c_idx)
886 {
887 #define GET_COORD(offset, n)                                    \
888     do {                                                        \
889         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
890         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
891     } while (0)
892     HEVCLocalContext *lc    = &s->HEVClc;
893     int transform_skip_flag = 0;
894
895     int last_significant_coeff_x, last_significant_coeff_y;
896     int last_scan_pos;
897     int n_end;
898     int num_coeff    = 0;
899     int greater1_ctx = 1;
900
901     int num_last_subset;
902     int x_cg_last_sig, y_cg_last_sig;
903
904     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
905
906     ptrdiff_t stride = s->frame->linesize[c_idx];
907     int hshift       = s->sps->hshift[c_idx];
908     int vshift       = s->sps->vshift[c_idx];
909     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
910                                               ((x0 >> hshift) << s->sps->pixel_shift)];
911     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
912     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
913
914     int trafo_size = 1 << log2_trafo_size;
915     int i, qp, shift, add, scale, scale_m;
916     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
917     const uint8_t *scale_matrix;
918     uint8_t dc_scale;
919
920     // Derive QP for dequant
921     if (!lc->cu.cu_transquant_bypass_flag) {
922         static const int qp_c[] = {
923             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
924         };
925
926         static const uint8_t rem6[51 + 2 * 6 + 1] = {
927             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
928             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
929             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
930         };
931
932         static const uint8_t div6[51 + 2 * 6 + 1] = {
933             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
934             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
935             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
936         };
937         int qp_y = lc->qp_y;
938
939         if (c_idx == 0) {
940             qp = qp_y + s->sps->qp_bd_offset;
941         } else {
942             int qp_i, offset;
943
944             if (c_idx == 1)
945                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
946             else
947                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
948
949             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
950             if (qp_i < 30)
951                 qp = qp_i;
952             else if (qp_i > 43)
953                 qp = qp_i - 6;
954             else
955                 qp = qp_c[qp_i - 30];
956
957             qp += s->sps->qp_bd_offset;
958         }
959
960         shift    = s->sps->bit_depth + log2_trafo_size - 5;
961         add      = 1 << (shift - 1);
962         scale    = level_scale[rem6[qp]] << (div6[qp]);
963         scale_m  = 16; // default when no custom scaling lists.
964         dc_scale = 16;
965
966         if (s->sps->scaling_list_enable_flag) {
967             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
968                                     &s->pps->scaling_list : &s->sps->scaling_list;
969             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
970
971             if (log2_trafo_size != 5)
972                 matrix_id = 3 * matrix_id + c_idx;
973
974             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
975             if (log2_trafo_size >= 4)
976                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
977         }
978     }
979
980     if (s->pps->transform_skip_enabled_flag &&
981         !lc->cu.cu_transquant_bypass_flag   &&
982         log2_trafo_size == 2) {
983         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
984     }
985
986     last_significant_coeff_x =
987         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
988     last_significant_coeff_y =
989         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
990
991     if (last_significant_coeff_x > 3) {
992         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
993         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
994                                    (2 + (last_significant_coeff_x & 1)) +
995                                    suffix;
996     }
997
998     if (last_significant_coeff_y > 3) {
999         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1000         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1001                                    (2 + (last_significant_coeff_y & 1)) +
1002                                    suffix;
1003     }
1004
1005     if (scan_idx == SCAN_VERT)
1006         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1007
1008     x_cg_last_sig = last_significant_coeff_x >> 2;
1009     y_cg_last_sig = last_significant_coeff_y >> 2;
1010
1011     switch (scan_idx) {
1012     case SCAN_DIAG: {
1013         int last_x_c = last_significant_coeff_x & 3;
1014         int last_y_c = last_significant_coeff_y & 3;
1015
1016         scan_x_off = ff_hevc_diag_scan4x4_x;
1017         scan_y_off = ff_hevc_diag_scan4x4_y;
1018         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1019         if (trafo_size == 4) {
1020             scan_x_cg = scan_1x1;
1021             scan_y_cg = scan_1x1;
1022         } else if (trafo_size == 8) {
1023             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1024             scan_x_cg  = diag_scan2x2_x;
1025             scan_y_cg  = diag_scan2x2_y;
1026         } else if (trafo_size == 16) {
1027             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1028             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1029             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1030         } else { // trafo_size == 32
1031             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1032             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1033             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1034         }
1035         break;
1036     }
1037     case SCAN_HORIZ:
1038         scan_x_cg  = horiz_scan2x2_x;
1039         scan_y_cg  = horiz_scan2x2_y;
1040         scan_x_off = horiz_scan4x4_x;
1041         scan_y_off = horiz_scan4x4_y;
1042         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1043         break;
1044     default: //SCAN_VERT
1045         scan_x_cg  = horiz_scan2x2_y;
1046         scan_y_cg  = horiz_scan2x2_x;
1047         scan_x_off = horiz_scan4x4_y;
1048         scan_y_off = horiz_scan4x4_x;
1049         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1050         break;
1051     }
1052     num_coeff++;
1053     num_last_subset = (num_coeff - 1) >> 4;
1054
1055     for (i = num_last_subset; i >= 0; i--) {
1056         int n, m;
1057         int x_cg, y_cg, x_c, y_c;
1058         int implicit_non_zero_coeff = 0;
1059         int64_t trans_coeff_level;
1060         int prev_sig = 0;
1061         int offset   = i << 4;
1062
1063         uint8_t significant_coeff_flag_idx[16];
1064         uint8_t nb_significant_coeff_flag = 0;
1065
1066         x_cg = scan_x_cg[i];
1067         y_cg = scan_y_cg[i];
1068
1069         if (i < num_last_subset && i > 0) {
1070             int ctx_cg = 0;
1071             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1072                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1073             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1074                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1075
1076             significant_coeff_group_flag[x_cg][y_cg] =
1077                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1078             implicit_non_zero_coeff = 1;
1079         } else {
1080             significant_coeff_group_flag[x_cg][y_cg] =
1081                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1082                  (x_cg == 0 && y_cg == 0));
1083         }
1084
1085         last_scan_pos = num_coeff - offset - 1;
1086
1087         if (i == num_last_subset) {
1088             n_end                         = last_scan_pos - 1;
1089             significant_coeff_flag_idx[0] = last_scan_pos;
1090             nb_significant_coeff_flag     = 1;
1091         } else {
1092             n_end = 15;
1093         }
1094
1095         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1096             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1097         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1098             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1099
1100         for (n = n_end; n >= 0; n--) {
1101             GET_COORD(offset, n);
1102
1103             if (significant_coeff_group_flag[x_cg][y_cg] &&
1104                 (n > 0 || implicit_non_zero_coeff == 0)) {
1105                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1106                                                           log2_trafo_size,
1107                                                           scan_idx,
1108                                                           prev_sig) == 1) {
1109                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1110                     nb_significant_coeff_flag++;
1111                     implicit_non_zero_coeff = 0;
1112                 }
1113             } else {
1114                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1115                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1116                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1117                     nb_significant_coeff_flag++;
1118                 }
1119             }
1120         }
1121
1122         n_end = nb_significant_coeff_flag;
1123
1124         if (n_end) {
1125             int first_nz_pos_in_cg = 16;
1126             int last_nz_pos_in_cg = -1;
1127             int c_rice_param = 0;
1128             int first_greater1_coeff_idx = -1;
1129             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1130             uint16_t coeff_sign_flag;
1131             int sum_abs = 0;
1132             int sign_hidden = 0;
1133
1134             // initialize first elem of coeff_bas_level_greater1_flag
1135             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1136
1137             if (!(i == num_last_subset) && greater1_ctx == 0)
1138                 ctx_set++;
1139             greater1_ctx      = 1;
1140             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1141
1142             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1143                 int n_idx = significant_coeff_flag_idx[m];
1144                 int inc   = (ctx_set << 2) + greater1_ctx;
1145                 coeff_abs_level_greater1_flag[n_idx] =
1146                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1147                 if (coeff_abs_level_greater1_flag[n_idx]) {
1148                     greater1_ctx = 0;
1149                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1150                     greater1_ctx++;
1151                 }
1152
1153                 if (coeff_abs_level_greater1_flag[n_idx] &&
1154                     first_greater1_coeff_idx == -1)
1155                     first_greater1_coeff_idx = n_idx;
1156             }
1157             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1158             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1159                                  !lc->cu.cu_transquant_bypass_flag;
1160
1161             if (first_greater1_coeff_idx != -1) {
1162                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1163             }
1164             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1165                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1166             } else {
1167                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1168             }
1169
1170             for (m = 0; m < n_end; m++) {
1171                 n = significant_coeff_flag_idx[m];
1172                 GET_COORD(offset, n);
1173                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1174                 if (trans_coeff_level == ((m < 8) ?
1175                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1176                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1177
1178                     trans_coeff_level += last_coeff_abs_level_remaining;
1179                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1180                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1181                 }
1182                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1183                     sum_abs += trans_coeff_level;
1184                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1185                         trans_coeff_level = -trans_coeff_level;
1186                 }
1187                 if (coeff_sign_flag >> 15)
1188                     trans_coeff_level = -trans_coeff_level;
1189                 coeff_sign_flag <<= 1;
1190                 if (!lc->cu.cu_transquant_bypass_flag) {
1191                     if (s->sps->scaling_list_enable_flag) {
1192                         if (y_c || x_c || log2_trafo_size < 4) {
1193                             int pos;
1194                             switch (log2_trafo_size) {
1195                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1196                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1197                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1198                             default: pos = (y_c        << 2) +  x_c;
1199                             }
1200                             scale_m = scale_matrix[pos];
1201                         } else {
1202                             scale_m = dc_scale;
1203                         }
1204                     }
1205                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1206                     if(trans_coeff_level < 0) {
1207                         if((~trans_coeff_level) & 0xFffffffffff8000)
1208                             trans_coeff_level = -32768;
1209                     } else {
1210                         if (trans_coeff_level & 0xffffffffffff8000)
1211                             trans_coeff_level = 32767;
1212                     }
1213                 }
1214                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1215             }
1216         }
1217     }
1218
1219     if (lc->cu.cu_transquant_bypass_flag) {
1220         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1221     } else {
1222         if (transform_skip_flag)
1223             s->hevcdsp.transform_skip(dst, coeffs, stride);
1224         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1225                  log2_trafo_size == 2)
1226             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1227         else
1228             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1229     }
1230 }
1231
1232 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1233                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1234                               int log2_cb_size, int log2_trafo_size,
1235                               int trafo_depth, int blk_idx)
1236 {
1237     HEVCLocalContext *lc = &s->HEVClc;
1238
1239     if (lc->cu.pred_mode == MODE_INTRA) {
1240         int trafo_size = 1 << log2_trafo_size;
1241         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1242
1243         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1244         if (log2_trafo_size > 2) {
1245             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1246             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1247             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1248             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1249         } else if (blk_idx == 3) {
1250             trafo_size = trafo_size << s->sps->hshift[1];
1251             ff_hevc_set_neighbour_available(s, xBase, yBase,
1252                                             trafo_size, trafo_size);
1253             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1254             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1255         }
1256     }
1257
1258     if (lc->tt.cbf_luma ||
1259         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1260         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1261         int scan_idx   = SCAN_DIAG;
1262         int scan_idx_c = SCAN_DIAG;
1263
1264         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1265             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1266             if (lc->tu.cu_qp_delta != 0)
1267                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1268                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1269             lc->tu.is_cu_qp_delta_coded = 1;
1270
1271             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1272                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1273                 av_log(s->avctx, AV_LOG_ERROR,
1274                        "The cu_qp_delta %d is outside the valid range "
1275                        "[%d, %d].\n",
1276                        lc->tu.cu_qp_delta,
1277                        -(26 + s->sps->qp_bd_offset / 2),
1278                         (25 + s->sps->qp_bd_offset / 2));
1279                 return AVERROR_INVALIDDATA;
1280             }
1281
1282             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1283         }
1284
1285         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1286             if (lc->tu.cur_intra_pred_mode >= 6 &&
1287                 lc->tu.cur_intra_pred_mode <= 14) {
1288                 scan_idx = SCAN_VERT;
1289             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1290                        lc->tu.cur_intra_pred_mode <= 30) {
1291                 scan_idx = SCAN_HORIZ;
1292             }
1293
1294             if (lc->pu.intra_pred_mode_c >=  6 &&
1295                 lc->pu.intra_pred_mode_c <= 14) {
1296                 scan_idx_c = SCAN_VERT;
1297             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1298                        lc->pu.intra_pred_mode_c <= 30) {
1299                 scan_idx_c = SCAN_HORIZ;
1300             }
1301         }
1302
1303         if (lc->tt.cbf_luma)
1304             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1305         if (log2_trafo_size > 2) {
1306             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1307                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1308             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1310         } else if (blk_idx == 3) {
1311             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1312                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1313             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1315         }
1316     }
1317     return 0;
1318 }
1319
1320 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1321 {
1322     int cb_size          = 1 << log2_cb_size;
1323     int log2_min_pu_size = s->sps->log2_min_pu_size;
1324
1325     int min_pu_width     = s->sps->min_pu_width;
1326     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1327     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1328     int i, j;
1329
1330     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1331         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1332             s->is_pcm[i + j * min_pu_width] = 2;
1333 }
1334
1335 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1336                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1337                               int log2_cb_size, int log2_trafo_size,
1338                               int trafo_depth, int blk_idx)
1339 {
1340     HEVCLocalContext *lc = &s->HEVClc;
1341     uint8_t split_transform_flag;
1342     int ret;
1343
1344     if (trafo_depth > 0 && log2_trafo_size == 2) {
1345         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1346             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1347         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1348             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1349     } else {
1350         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1351         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1352     }
1353
1354     if (lc->cu.intra_split_flag) {
1355         if (trafo_depth == 1)
1356             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1357     } else {
1358         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1359     }
1360
1361     lc->tt.cbf_luma = 1;
1362
1363     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1364                               lc->cu.pred_mode == MODE_INTER &&
1365                               lc->cu.part_mode != PART_2Nx2N &&
1366                               trafo_depth == 0;
1367
1368     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1369         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1370         trafo_depth     < lc->cu.max_trafo_depth       &&
1371         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1372         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1373     } else {
1374         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1375                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1376                                lc->tt.inter_split_flag;
1377     }
1378
1379     if (log2_trafo_size > 2) {
1380         if (trafo_depth == 0 ||
1381             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1382             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1383                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1384         }
1385
1386         if (trafo_depth == 0 ||
1387             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1388             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1389                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1390         }
1391     }
1392
1393     if (split_transform_flag) {
1394         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1395         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1396
1397         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1398                                  log2_cb_size, log2_trafo_size - 1,
1399                                  trafo_depth + 1, 0);
1400         if (ret < 0)
1401             return ret;
1402         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1403                                  log2_cb_size, log2_trafo_size - 1,
1404                                  trafo_depth + 1, 1);
1405         if (ret < 0)
1406             return ret;
1407         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1408                                  log2_cb_size, log2_trafo_size - 1,
1409                                  trafo_depth + 1, 2);
1410         if (ret < 0)
1411             return ret;
1412         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1413                                  log2_cb_size, log2_trafo_size - 1,
1414                                  trafo_depth + 1, 3);
1415         if (ret < 0)
1416             return ret;
1417     } else {
1418         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1419         int log2_min_tu_size = s->sps->log2_min_tb_size;
1420         int min_tu_width     = s->sps->min_tb_width;
1421
1422         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1423             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1424             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1425             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1426         }
1427
1428         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1429                                  log2_cb_size, log2_trafo_size, trafo_depth,
1430                                  blk_idx);
1431         if (ret < 0)
1432             return ret;
1433         // TODO: store cbf_luma somewhere else
1434         if (lc->tt.cbf_luma) {
1435             int i, j;
1436             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1437                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1438                     int x_tu = (x0 + j) >> log2_min_tu_size;
1439                     int y_tu = (y0 + i) >> log2_min_tu_size;
1440                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1441                 }
1442         }
1443         if (!s->sh.disable_deblocking_filter_flag) {
1444             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1445                                                   lc->slice_or_tiles_up_boundary,
1446                                                   lc->slice_or_tiles_left_boundary);
1447             if (s->pps->transquant_bypass_enable_flag &&
1448                 lc->cu.cu_transquant_bypass_flag)
1449                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1450         }
1451     }
1452     return 0;
1453 }
1454
1455 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1456 {
1457     //TODO: non-4:2:0 support
1458     HEVCLocalContext *lc = &s->HEVClc;
1459     GetBitContext gb;
1460     int cb_size   = 1 << log2_cb_size;
1461     int stride0   = s->frame->linesize[0];
1462     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1463     int   stride1 = s->frame->linesize[1];
1464     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1465     int   stride2 = s->frame->linesize[2];
1466     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1467
1468     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1469     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1470     int ret;
1471
1472     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1473                                           lc->slice_or_tiles_up_boundary,
1474                                           lc->slice_or_tiles_left_boundary);
1475
1476     ret = init_get_bits(&gb, pcm, length);
1477     if (ret < 0)
1478         return ret;
1479
1480     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1481     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1482     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1483     return 0;
1484 }
1485
1486 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1487 {
1488     HEVCLocalContext *lc = &s->HEVClc;
1489     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1490     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1491
1492     if (x)
1493         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1494     if (y)
1495         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1496
1497     switch (x) {
1498     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1499     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1500     case 0: lc->pu.mvd.x = 0;                               break;
1501     }
1502
1503     switch (y) {
1504     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1505     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1506     case 0: lc->pu.mvd.y = 0;                               break;
1507     }
1508 }
1509
1510 /**
1511  * 8.5.3.2.2.1 Luma sample interpolation process
1512  *
1513  * @param s HEVC decoding context
1514  * @param dst target buffer for block data at block position
1515  * @param dststride stride of the dst buffer
1516  * @param ref reference picture buffer at origin (0, 0)
1517  * @param mv motion vector (relative to block position) to get pixel data from
1518  * @param x_off horizontal position of block from origin (0, 0)
1519  * @param y_off vertical position of block from origin (0, 0)
1520  * @param block_w width of block
1521  * @param block_h height of block
1522  */
1523 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1524                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1525                     int block_w, int block_h)
1526 {
1527     HEVCLocalContext *lc = &s->HEVClc;
1528     uint8_t *src         = ref->data[0];
1529     ptrdiff_t srcstride  = ref->linesize[0];
1530     int pic_width        = s->sps->width;
1531     int pic_height       = s->sps->height;
1532
1533     int mx         = mv->x & 3;
1534     int my         = mv->y & 3;
1535     int extra_left = ff_hevc_qpel_extra_before[mx];
1536     int extra_top  = ff_hevc_qpel_extra_before[my];
1537
1538     x_off += mv->x >> 2;
1539     y_off += mv->y >> 2;
1540     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1541
1542     if (x_off < extra_left || y_off < extra_top ||
1543         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1544         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1545         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1546         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1547         int buf_offset = extra_top *
1548                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1549
1550         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1551                                  edge_emu_stride, srcstride,
1552                                  block_w + ff_hevc_qpel_extra[mx],
1553                                  block_h + ff_hevc_qpel_extra[my],
1554                                  x_off - extra_left, y_off - extra_top,
1555                                  pic_width, pic_height);
1556         src = lc->edge_emu_buffer + buf_offset;
1557         srcstride = edge_emu_stride;
1558     }
1559     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1560                                      block_h, lc->mc_buffer);
1561 }
1562
1563 /**
1564  * 8.5.3.2.2.2 Chroma sample interpolation process
1565  *
1566  * @param s HEVC decoding context
1567  * @param dst1 target buffer for block data at block position (U plane)
1568  * @param dst2 target buffer for block data at block position (V plane)
1569  * @param dststride stride of the dst1 and dst2 buffers
1570  * @param ref reference picture buffer at origin (0, 0)
1571  * @param mv motion vector (relative to block position) to get pixel data from
1572  * @param x_off horizontal position of block from origin (0, 0)
1573  * @param y_off vertical position of block from origin (0, 0)
1574  * @param block_w width of block
1575  * @param block_h height of block
1576  */
1577 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1578                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1579                       int x_off, int y_off, int block_w, int block_h)
1580 {
1581     HEVCLocalContext *lc = &s->HEVClc;
1582     uint8_t *src1        = ref->data[1];
1583     uint8_t *src2        = ref->data[2];
1584     ptrdiff_t src1stride = ref->linesize[1];
1585     ptrdiff_t src2stride = ref->linesize[2];
1586     int pic_width        = s->sps->width >> 1;
1587     int pic_height       = s->sps->height >> 1;
1588
1589     int mx = mv->x & 7;
1590     int my = mv->y & 7;
1591
1592     x_off += mv->x >> 3;
1593     y_off += mv->y >> 3;
1594     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1595     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1596
1597     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1598         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1599         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1600         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1601         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1602         int buf_offset1 = EPEL_EXTRA_BEFORE *
1603                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1604         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1605         int buf_offset2 = EPEL_EXTRA_BEFORE *
1606                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1607
1608         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1609                                  edge_emu_stride, src1stride,
1610                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1611                                  x_off - EPEL_EXTRA_BEFORE,
1612                                  y_off - EPEL_EXTRA_BEFORE,
1613                                  pic_width, pic_height);
1614
1615         src1 = lc->edge_emu_buffer + buf_offset1;
1616         src1stride = edge_emu_stride;
1617         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1618                                              block_w, block_h, mx, my, lc->mc_buffer);
1619
1620         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1621                                  edge_emu_stride, src2stride,
1622                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1623                                  x_off - EPEL_EXTRA_BEFORE,
1624                                  y_off - EPEL_EXTRA_BEFORE,
1625                                  pic_width, pic_height);
1626         src2 = lc->edge_emu_buffer + buf_offset2;
1627         src2stride = edge_emu_stride;
1628
1629         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1630                                              block_w, block_h, mx, my,
1631                                              lc->mc_buffer);
1632     } else {
1633         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1634                                              block_w, block_h, mx, my,
1635                                              lc->mc_buffer);
1636         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1637                                              block_w, block_h, mx, my,
1638                                              lc->mc_buffer);
1639     }
1640 }
1641
1642 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1643                                 const Mv *mv, int y0, int height)
1644 {
1645     int y = (mv->y >> 2) + y0 + height + 9;
1646     ff_thread_await_progress(&ref->tf, y, 0);
1647 }
1648
1649 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1650                                 int nPbW, int nPbH,
1651                                 int log2_cb_size, int partIdx)
1652 {
1653 #define POS(c_idx, x, y)                                                              \
1654     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1655                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1656     HEVCLocalContext *lc = &s->HEVClc;
1657     int merge_idx = 0;
1658     struct MvField current_mv = {{{ 0 }}};
1659
1660     int min_pu_width = s->sps->min_pu_width;
1661
1662     MvField *tab_mvf = s->ref->tab_mvf;
1663     RefPicList  *refPicList = s->ref->refPicList;
1664     HEVCFrame *ref0, *ref1;
1665
1666     int tmpstride = MAX_PB_SIZE;
1667
1668     uint8_t *dst0 = POS(0, x0, y0);
1669     uint8_t *dst1 = POS(1, x0, y0);
1670     uint8_t *dst2 = POS(2, x0, y0);
1671     int log2_min_cb_size = s->sps->log2_min_cb_size;
1672     int min_cb_width     = s->sps->min_cb_width;
1673     int x_cb             = x0 >> log2_min_cb_size;
1674     int y_cb             = y0 >> log2_min_cb_size;
1675     int ref_idx[2];
1676     int mvp_flag[2];
1677     int x_pu, y_pu;
1678     int i, j;
1679
1680     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1681         if (s->sh.max_num_merge_cand > 1)
1682             merge_idx = ff_hevc_merge_idx_decode(s);
1683         else
1684             merge_idx = 0;
1685
1686         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1687                                    1 << log2_cb_size,
1688                                    1 << log2_cb_size,
1689                                    log2_cb_size, partIdx,
1690                                    merge_idx, &current_mv);
1691         x_pu = x0 >> s->sps->log2_min_pu_size;
1692         y_pu = y0 >> s->sps->log2_min_pu_size;
1693
1694         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1695             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1696                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1697     } else { /* MODE_INTER */
1698         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1699         if (lc->pu.merge_flag) {
1700             if (s->sh.max_num_merge_cand > 1)
1701                 merge_idx = ff_hevc_merge_idx_decode(s);
1702             else
1703                 merge_idx = 0;
1704
1705             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1706                                        partIdx, merge_idx, &current_mv);
1707             x_pu = x0 >> s->sps->log2_min_pu_size;
1708             y_pu = y0 >> s->sps->log2_min_pu_size;
1709
1710             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1711                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1712                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1713         } else {
1714             enum InterPredIdc inter_pred_idc = PRED_L0;
1715             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1716             if (s->sh.slice_type == B_SLICE)
1717                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1718
1719             if (inter_pred_idc != PRED_L1) {
1720                 if (s->sh.nb_refs[L0]) {
1721                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1722                     current_mv.ref_idx[0] = ref_idx[0];
1723                 }
1724                 current_mv.pred_flag[0] = 1;
1725                 hls_mvd_coding(s, x0, y0, 0);
1726                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1727                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1728                                          partIdx, merge_idx, &current_mv,
1729                                          mvp_flag[0], 0);
1730                 current_mv.mv[0].x += lc->pu.mvd.x;
1731                 current_mv.mv[0].y += lc->pu.mvd.y;
1732             }
1733
1734             if (inter_pred_idc != PRED_L0) {
1735                 if (s->sh.nb_refs[L1]) {
1736                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1737                     current_mv.ref_idx[1] = ref_idx[1];
1738                 }
1739
1740                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1741                     lc->pu.mvd.x = 0;
1742                     lc->pu.mvd.y = 0;
1743                 } else {
1744                     hls_mvd_coding(s, x0, y0, 1);
1745                 }
1746
1747                 current_mv.pred_flag[1] = 1;
1748                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1749                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1750                                          partIdx, merge_idx, &current_mv,
1751                                          mvp_flag[1], 1);
1752                 current_mv.mv[1].x += lc->pu.mvd.x;
1753                 current_mv.mv[1].y += lc->pu.mvd.y;
1754             }
1755
1756             x_pu = x0 >> s->sps->log2_min_pu_size;
1757             y_pu = y0 >> s->sps->log2_min_pu_size;
1758
1759             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1760                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1761                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1762         }
1763     }
1764
1765     if (current_mv.pred_flag[0]) {
1766         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1767         if (!ref0)
1768             return;
1769         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1770     }
1771     if (current_mv.pred_flag[1]) {
1772         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1773         if (!ref1)
1774             return;
1775         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1776     }
1777
1778     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1779         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1780         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1781
1782         luma_mc(s, tmp, tmpstride, ref0->frame,
1783                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1784
1785         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1786             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1787             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1788                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1789                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1790                                      dst0, s->frame->linesize[0], tmp,
1791                                      tmpstride, nPbW, nPbH);
1792         } else {
1793             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1794         }
1795         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1796                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1797
1798         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1799             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1800             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1801                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1802                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1803                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1804                                      nPbW / 2, nPbH / 2);
1805             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1806                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1807                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1808                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1809                                      nPbW / 2, nPbH / 2);
1810         } else {
1811             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1812             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1813         }
1814     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1815         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1816         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1817
1818         if (!ref1)
1819             return;
1820
1821         luma_mc(s, tmp, tmpstride, ref1->frame,
1822                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1823
1824         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1825             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1826             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1827                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1828                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1829                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1830                                       nPbW, nPbH);
1831         } else {
1832             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1833         }
1834
1835         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1836                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1837
1838         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1839             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1840             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1841                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1842                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1843                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1844             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1845                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1846                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1847                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1848         } else {
1849             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1850             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1851         }
1852     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1853         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1854         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1855         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1856         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1857         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1858         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1859
1860         if (!ref0 || !ref1)
1861             return;
1862
1863         luma_mc(s, tmp, tmpstride, ref0->frame,
1864                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1865         luma_mc(s, tmp2, tmpstride, ref1->frame,
1866                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1867
1868         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1869             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1870             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1871                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1872                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1873                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1874                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1875                                          dst0, s->frame->linesize[0],
1876                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1877         } else {
1878             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1879                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1880         }
1881
1882         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1883                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1884         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1885                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1886
1887         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1888             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1889             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1890                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1891                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1892                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1893                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1894                                          dst1, s->frame->linesize[1], tmp, tmp3,
1895                                          tmpstride, nPbW / 2, nPbH / 2);
1896             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1897                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1898                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1899                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1900                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1901                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1902                                          tmpstride, nPbW / 2, nPbH / 2);
1903         } else {
1904             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1905             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1906         }
1907     }
1908 }
1909
1910 /**
1911  * 8.4.1
1912  */
1913 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1914                                 int prev_intra_luma_pred_flag)
1915 {
1916     HEVCLocalContext *lc = &s->HEVClc;
1917     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1918     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1919     int min_pu_width     = s->sps->min_pu_width;
1920     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1921     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1922     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1923
1924     int cand_up   = (lc->ctb_up_flag || y0b) ?
1925                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1926     int cand_left = (lc->ctb_left_flag || x0b) ?
1927                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1928
1929     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1930
1931     MvField *tab_mvf = s->ref->tab_mvf;
1932     int intra_pred_mode;
1933     int candidate[3];
1934     int i, j;
1935
1936     // intra_pred_mode prediction does not cross vertical CTB boundaries
1937     if ((y0 - 1) < y_ctb)
1938         cand_up = INTRA_DC;
1939
1940     if (cand_left == cand_up) {
1941         if (cand_left < 2) {
1942             candidate[0] = INTRA_PLANAR;
1943             candidate[1] = INTRA_DC;
1944             candidate[2] = INTRA_ANGULAR_26;
1945         } else {
1946             candidate[0] = cand_left;
1947             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1948             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1949         }
1950     } else {
1951         candidate[0] = cand_left;
1952         candidate[1] = cand_up;
1953         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1954             candidate[2] = INTRA_PLANAR;
1955         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1956             candidate[2] = INTRA_DC;
1957         } else {
1958             candidate[2] = INTRA_ANGULAR_26;
1959         }
1960     }
1961
1962     if (prev_intra_luma_pred_flag) {
1963         intra_pred_mode = candidate[lc->pu.mpm_idx];
1964     } else {
1965         if (candidate[0] > candidate[1])
1966             FFSWAP(uint8_t, candidate[0], candidate[1]);
1967         if (candidate[0] > candidate[2])
1968             FFSWAP(uint8_t, candidate[0], candidate[2]);
1969         if (candidate[1] > candidate[2])
1970             FFSWAP(uint8_t, candidate[1], candidate[2]);
1971
1972         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1973         for (i = 0; i < 3; i++)
1974             if (intra_pred_mode >= candidate[i])
1975                 intra_pred_mode++;
1976     }
1977
1978     /* write the intra prediction units into the mv array */
1979     if (!size_in_pus)
1980         size_in_pus = 1;
1981     for (i = 0; i < size_in_pus; i++) {
1982         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1983                intra_pred_mode, size_in_pus);
1984
1985         for (j = 0; j < size_in_pus; j++) {
1986             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1987             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1991             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1992             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1993             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1994             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1995         }
1996     }
1997
1998     return intra_pred_mode;
1999 }
2000
2001 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2002                                           int log2_cb_size, int ct_depth)
2003 {
2004     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2005     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2006     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2007     int y;
2008
2009     for (y = 0; y < length; y++)
2010         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2011                ct_depth, length);
2012 }
2013
2014 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2015                                   int log2_cb_size)
2016 {
2017     HEVCLocalContext *lc = &s->HEVClc;
2018     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2019     uint8_t prev_intra_luma_pred_flag[4];
2020     int split   = lc->cu.part_mode == PART_NxN;
2021     int pb_size = (1 << log2_cb_size) >> split;
2022     int side    = split + 1;
2023     int chroma_mode;
2024     int i, j;
2025
2026     for (i = 0; i < side; i++)
2027         for (j = 0; j < side; j++)
2028             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2029
2030     for (i = 0; i < side; i++) {
2031         for (j = 0; j < side; j++) {
2032             if (prev_intra_luma_pred_flag[2 * i + j])
2033                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2034             else
2035                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2036
2037             lc->pu.intra_pred_mode[2 * i + j] =
2038                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2039                                      prev_intra_luma_pred_flag[2 * i + j]);
2040         }
2041     }
2042
2043     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2044     if (chroma_mode != 4) {
2045         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2046             lc->pu.intra_pred_mode_c = 34;
2047         else
2048             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2049     } else {
2050         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2051     }
2052 }
2053
2054 static void intra_prediction_unit_default_value(HEVCContext *s,
2055                                                 int x0, int y0,
2056                                                 int log2_cb_size)
2057 {
2058     HEVCLocalContext *lc = &s->HEVClc;
2059     int pb_size          = 1 << log2_cb_size;
2060     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2061     int min_pu_width     = s->sps->min_pu_width;
2062     MvField *tab_mvf     = s->ref->tab_mvf;
2063     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2064     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2065     int j, k;
2066
2067     if (size_in_pus == 0)
2068         size_in_pus = 1;
2069     for (j = 0; j < size_in_pus; j++) {
2070         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2071         for (k = 0; k < size_in_pus; k++)
2072             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2073     }
2074 }
2075
2076 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2077 {
2078     int cb_size          = 1 << log2_cb_size;
2079     HEVCLocalContext *lc = &s->HEVClc;
2080     int log2_min_cb_size = s->sps->log2_min_cb_size;
2081     int length           = cb_size >> log2_min_cb_size;
2082     int min_cb_width     = s->sps->min_cb_width;
2083     int x_cb             = x0 >> log2_min_cb_size;
2084     int y_cb             = y0 >> log2_min_cb_size;
2085     int x, y, ret;
2086
2087     lc->cu.x                = x0;
2088     lc->cu.y                = y0;
2089     lc->cu.rqt_root_cbf     = 1;
2090     lc->cu.pred_mode        = MODE_INTRA;
2091     lc->cu.part_mode        = PART_2Nx2N;
2092     lc->cu.intra_split_flag = 0;
2093     lc->cu.pcm_flag         = 0;
2094
2095     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2096     for (x = 0; x < 4; x++)
2097         lc->pu.intra_pred_mode[x] = 1;
2098     if (s->pps->transquant_bypass_enable_flag) {
2099         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2100         if (lc->cu.cu_transquant_bypass_flag)
2101             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2102     } else
2103         lc->cu.cu_transquant_bypass_flag = 0;
2104
2105     if (s->sh.slice_type != I_SLICE) {
2106         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2107
2108         lc->cu.pred_mode = MODE_SKIP;
2109         x = y_cb * min_cb_width + x_cb;
2110         for (y = 0; y < length; y++) {
2111             memset(&s->skip_flag[x], skip_flag, length);
2112             x += min_cb_width;
2113         }
2114         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2115     }
2116
2117     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2118         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2119         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2120
2121         if (!s->sh.disable_deblocking_filter_flag)
2122             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2123                                                   lc->slice_or_tiles_up_boundary,
2124                                                   lc->slice_or_tiles_left_boundary);
2125     } else {
2126         if (s->sh.slice_type != I_SLICE)
2127             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2128         if (lc->cu.pred_mode != MODE_INTRA ||
2129             log2_cb_size == s->sps->log2_min_cb_size) {
2130             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2131             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2132                                       lc->cu.pred_mode == MODE_INTRA;
2133         }
2134
2135         if (lc->cu.pred_mode == MODE_INTRA) {
2136             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2137                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2138                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2139                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2140             }
2141             if (lc->cu.pcm_flag) {
2142                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2143                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2144                 if (s->sps->pcm.loop_filter_disable_flag)
2145                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2146
2147                 if (ret < 0)
2148                     return ret;
2149             } else {
2150                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2151             }
2152         } else {
2153             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2154             switch (lc->cu.part_mode) {
2155             case PART_2Nx2N:
2156                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2157                 break;
2158             case PART_2NxN:
2159                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2160                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2161                 break;
2162             case PART_Nx2N:
2163                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2164                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2165                 break;
2166             case PART_2NxnU:
2167                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2168                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2169                 break;
2170             case PART_2NxnD:
2171                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2172                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2173                 break;
2174             case PART_nLx2N:
2175                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2176                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2177                 break;
2178             case PART_nRx2N:
2179                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2180                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2181                 break;
2182             case PART_NxN:
2183                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2184                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2185                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2186                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2187                 break;
2188             }
2189         }
2190
2191         if (!lc->cu.pcm_flag) {
2192             if (lc->cu.pred_mode != MODE_INTRA &&
2193                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2194                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2195             }
2196             if (lc->cu.rqt_root_cbf) {
2197                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2198                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2199                                          s->sps->max_transform_hierarchy_depth_inter;
2200                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2201                                          log2_cb_size,
2202                                          log2_cb_size, 0, 0);
2203                 if (ret < 0)
2204                     return ret;
2205             } else {
2206                 if (!s->sh.disable_deblocking_filter_flag)
2207                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2208                                                           lc->slice_or_tiles_up_boundary,
2209                                                           lc->slice_or_tiles_left_boundary);
2210             }
2211         }
2212     }
2213
2214     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2215         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2216
2217     x = y_cb * min_cb_width + x_cb;
2218     for (y = 0; y < length; y++) {
2219         memset(&s->qp_y_tab[x], lc->qp_y, length);
2220         x += min_cb_width;
2221     }
2222
2223     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2224
2225     return 0;
2226 }
2227
2228 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2229                                int log2_cb_size, int cb_depth)
2230 {
2231     HEVCLocalContext *lc = &s->HEVClc;
2232     const int cb_size    = 1 << log2_cb_size;
2233
2234     lc->ct.depth = cb_depth;
2235     if (x0 + cb_size <= s->sps->width  &&
2236         y0 + cb_size <= s->sps->height &&
2237         log2_cb_size > s->sps->log2_min_cb_size) {
2238         SAMPLE(s->split_cu_flag, x0, y0) =
2239             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2240     } else {
2241         SAMPLE(s->split_cu_flag, x0, y0) =
2242             (log2_cb_size > s->sps->log2_min_cb_size);
2243     }
2244     if (s->pps->cu_qp_delta_enabled_flag &&
2245         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2246         lc->tu.is_cu_qp_delta_coded = 0;
2247         lc->tu.cu_qp_delta          = 0;
2248     }
2249
2250     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2251         const int cb_size_split = cb_size >> 1;
2252         const int x1 = x0 + cb_size_split;
2253         const int y1 = y0 + cb_size_split;
2254
2255         log2_cb_size--;
2256         cb_depth++;
2257
2258 #define SUBDIVIDE(x, y)                                                \
2259 do {                                                                   \
2260     if (x < s->sps->width && y < s->sps->height) {                     \
2261         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2262         if (ret < 0)                                                   \
2263             return ret;                                                \
2264     }                                                                  \
2265 } while (0)
2266
2267         SUBDIVIDE(x0, y0);
2268         SUBDIVIDE(x1, y0);
2269         SUBDIVIDE(x0, y1);
2270         SUBDIVIDE(x1, y1);
2271     } else {
2272         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2273         if (ret < 0)
2274             return ret;
2275     }
2276
2277     return 0;
2278 }
2279
2280 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2281                                  int ctb_addr_ts)
2282 {
2283     HEVCLocalContext *lc  = &s->HEVClc;
2284     int ctb_size          = 1 << s->sps->log2_ctb_size;
2285     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2286     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2287
2288     int tile_left_boundary, tile_up_boundary;
2289     int slice_left_boundary, slice_up_boundary;
2290
2291     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2292
2293     if (s->pps->entropy_coding_sync_enabled_flag) {
2294         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2295             lc->first_qp_group = 1;
2296         lc->end_of_tiles_x = s->sps->width;
2297     } else if (s->pps->tiles_enabled_flag) {
2298         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2299             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2300             lc->start_of_tiles_x = x_ctb;
2301             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2302             lc->first_qp_group   = 1;
2303         }
2304     } else {
2305         lc->end_of_tiles_x = s->sps->width;
2306     }
2307
2308     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2309
2310     if (s->pps->tiles_enabled_flag) {
2311         tile_left_boundary  = x_ctb > 0 &&
2312                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2313         slice_left_boundary = x_ctb > 0 &&
2314                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2315         tile_up_boundary  = y_ctb > 0 &&
2316                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2317         slice_up_boundary = y_ctb > 0 &&
2318                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2319     } else {
2320         tile_left_boundary  =
2321         tile_up_boundary    = 1;
2322         slice_left_boundary = ctb_addr_in_slice > 0;
2323         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2324     }
2325     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2326     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2327     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2328     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2329     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2330     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2331 }
2332
2333 static int hls_slice_data(HEVCContext *s)
2334 {
2335     int ctb_size    = 1 << s->sps->log2_ctb_size;
2336     int more_data   = 1;
2337     int x_ctb       = 0;
2338     int y_ctb       = 0;
2339     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2340     int ret;
2341
2342     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2343         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2344
2345         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2346         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2347         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2348
2349         ff_hevc_cabac_init(s, ctb_addr_ts);
2350
2351         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2352
2353         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2354         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2355         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2356
2357         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2358         if (ret < 0)
2359             return ret;
2360         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2361
2362         ctb_addr_ts++;
2363         ff_hevc_save_states(s, ctb_addr_ts);
2364         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2365     }
2366
2367     if (x_ctb + ctb_size >= s->sps->width &&
2368         y_ctb + ctb_size >= s->sps->height)
2369         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2370
2371     return ctb_addr_ts;
2372 }
2373
2374 /**
2375  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2376  * 0 if the unit should be skipped, 1 otherwise
2377  */
2378 static int hls_nal_unit(HEVCContext *s)
2379 {
2380     GetBitContext *gb = &s->HEVClc.gb;
2381     int nuh_layer_id;
2382
2383     if (get_bits1(gb) != 0)
2384         return AVERROR_INVALIDDATA;
2385
2386     s->nal_unit_type = get_bits(gb, 6);
2387
2388     nuh_layer_id   = get_bits(gb, 6);
2389     s->temporal_id = get_bits(gb, 3) - 1;
2390     if (s->temporal_id < 0)
2391         return AVERROR_INVALIDDATA;
2392
2393     av_log(s->avctx, AV_LOG_DEBUG,
2394            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2395            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2396
2397     return nuh_layer_id == 0;
2398 }
2399
2400 static void restore_tqb_pixels(HEVCContext *s)
2401 {
2402     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2403     int x, y, c_idx;
2404
2405     for (c_idx = 0; c_idx < 3; c_idx++) {
2406         ptrdiff_t stride = s->frame->linesize[c_idx];
2407         int hshift       = s->sps->hshift[c_idx];
2408         int vshift       = s->sps->vshift[c_idx];
2409         for (y = 0; y < s->sps->min_pu_height; y++) {
2410             for (x = 0; x < s->sps->min_pu_width; x++) {
2411                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2412                     int n;
2413                     int len      = min_pu_size >> hshift;
2414                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2415                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2416                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2417                         memcpy(dst, src, len);
2418                         src += stride;
2419                         dst += stride;
2420                     }
2421                 }
2422             }
2423         }
2424     }
2425 }
2426
2427 static int set_side_data(HEVCContext *s)
2428 {
2429     AVFrame *out = s->ref->frame;
2430
2431     if (s->sei_frame_packing_present &&
2432         s->frame_packing_arrangement_type >= 3 &&
2433         s->frame_packing_arrangement_type <= 5 &&
2434         s->content_interpretation_type > 0 &&
2435         s->content_interpretation_type < 3) {
2436         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2437         if (!stereo)
2438             return AVERROR(ENOMEM);
2439
2440         switch (s->frame_packing_arrangement_type) {
2441         case 3:
2442             if (s->quincunx_subsampling)
2443                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2444             else
2445                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2446             break;
2447         case 4:
2448             stereo->type = AV_STEREO3D_TOPBOTTOM;
2449             break;
2450         case 5:
2451             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2452             break;
2453         }
2454
2455         if (s->content_interpretation_type == 2)
2456             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2457     }
2458
2459     return 0;
2460 }
2461
2462 static int hevc_frame_start(HEVCContext *s)
2463 {
2464     HEVCLocalContext *lc = &s->HEVClc;
2465     int ret;
2466
2467     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2468     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2469     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2470     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2471
2472     lc->start_of_tiles_x = 0;
2473     s->is_decoded        = 0;
2474     s->first_nal_type    = s->nal_unit_type;
2475
2476     if (s->pps->tiles_enabled_flag)
2477         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2478
2479     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2480                               s->poc);
2481     if (ret < 0)
2482         goto fail;
2483
2484     ret = ff_hevc_frame_rps(s);
2485     if (ret < 0) {
2486         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2487         goto fail;
2488     }
2489
2490     ret = set_side_data(s);
2491     if (ret < 0)
2492         goto fail;
2493
2494     av_frame_unref(s->output_frame);
2495     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2496     if (ret < 0)
2497         goto fail;
2498
2499     ff_thread_finish_setup(s->avctx);
2500
2501     return 0;
2502
2503 fail:
2504     if (s->ref)
2505         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2506     s->ref = NULL;
2507     return ret;
2508 }
2509
2510 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2511 {
2512     HEVCLocalContext *lc = &s->HEVClc;
2513     GetBitContext *gb    = &lc->gb;
2514     int ctb_addr_ts, ret;
2515
2516     ret = init_get_bits8(gb, nal, length);
2517     if (ret < 0)
2518         return ret;
2519
2520     ret = hls_nal_unit(s);
2521     if (ret < 0) {
2522         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2523                s->nal_unit_type);
2524         goto fail;
2525     } else if (!ret)
2526         return 0;
2527
2528     switch (s->nal_unit_type) {
2529     case NAL_VPS:
2530         ret = ff_hevc_decode_nal_vps(s);
2531         if (ret < 0)
2532             goto fail;
2533         break;
2534     case NAL_SPS:
2535         ret = ff_hevc_decode_nal_sps(s);
2536         if (ret < 0)
2537             goto fail;
2538         break;
2539     case NAL_PPS:
2540         ret = ff_hevc_decode_nal_pps(s);
2541         if (ret < 0)
2542             goto fail;
2543         break;
2544     case NAL_SEI_PREFIX:
2545     case NAL_SEI_SUFFIX:
2546         ret = ff_hevc_decode_nal_sei(s);
2547         if (ret < 0)
2548             goto fail;
2549         break;
2550     case NAL_TRAIL_R:
2551     case NAL_TRAIL_N:
2552     case NAL_TSA_N:
2553     case NAL_TSA_R:
2554     case NAL_STSA_N:
2555     case NAL_STSA_R:
2556     case NAL_BLA_W_LP:
2557     case NAL_BLA_W_RADL:
2558     case NAL_BLA_N_LP:
2559     case NAL_IDR_W_RADL:
2560     case NAL_IDR_N_LP:
2561     case NAL_CRA_NUT:
2562     case NAL_RADL_N:
2563     case NAL_RADL_R:
2564     case NAL_RASL_N:
2565     case NAL_RASL_R:
2566         ret = hls_slice_header(s);
2567         if (ret < 0)
2568             return ret;
2569
2570         if (s->max_ra == INT_MAX) {
2571             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2572                 s->max_ra = s->poc;
2573             } else {
2574                 if (IS_IDR(s))
2575                     s->max_ra = INT_MIN;
2576             }
2577         }
2578
2579         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2580             s->poc <= s->max_ra) {
2581             s->is_decoded = 0;
2582             break;
2583         } else {
2584             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2585                 s->max_ra = INT_MIN;
2586         }
2587
2588         if (s->sh.first_slice_in_pic_flag) {
2589             ret = hevc_frame_start(s);
2590             if (ret < 0)
2591                 return ret;
2592         } else if (!s->ref) {
2593             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2594             goto fail;
2595         }
2596
2597         if (s->nal_unit_type != s->first_nal_type) {
2598             av_log(s->avctx, AV_LOG_ERROR,
2599                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2600                    s->first_nal_type, s->nal_unit_type);
2601             return AVERROR_INVALIDDATA;
2602         }
2603
2604         if (!s->sh.dependent_slice_segment_flag &&
2605             s->sh.slice_type != I_SLICE) {
2606             ret = ff_hevc_slice_rpl(s);
2607             if (ret < 0) {
2608                 av_log(s->avctx, AV_LOG_WARNING,
2609                        "Error constructing the reference lists for the current slice.\n");
2610                 goto fail;
2611             }
2612         }
2613
2614         ctb_addr_ts = hls_slice_data(s);
2615         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2616             s->is_decoded = 1;
2617             if ((s->pps->transquant_bypass_enable_flag ||
2618                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2619                 s->sps->sao_enabled)
2620                 restore_tqb_pixels(s);
2621         }
2622
2623         if (ctb_addr_ts < 0) {
2624             ret = ctb_addr_ts;
2625             goto fail;
2626         }
2627         break;
2628     case NAL_EOS_NUT:
2629     case NAL_EOB_NUT:
2630         s->seq_decode = (s->seq_decode + 1) & 0xff;
2631         s->max_ra     = INT_MAX;
2632         break;
2633     case NAL_AUD:
2634     case NAL_FD_NUT:
2635         break;
2636     default:
2637         av_log(s->avctx, AV_LOG_INFO,
2638                "Skipping NAL unit %d\n", s->nal_unit_type);
2639     }
2640
2641     return 0;
2642 fail:
2643     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2644         return ret;
2645     return 0;
2646 }
2647
2648 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2649  * between these functions would be nice. */
2650 static int extract_rbsp(const uint8_t *src, int length,
2651                         HEVCNAL *nal)
2652 {
2653     int i, si, di;
2654     uint8_t *dst;
2655
2656 #define STARTCODE_TEST                                                  \
2657         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2658             if (src[i + 2] != 3) {                                      \
2659                 /* startcode, so we must be past the end */             \
2660                 length = i;                                             \
2661             }                                                           \
2662             break;                                                      \
2663         }
2664 #if HAVE_FAST_UNALIGNED
2665 #define FIND_FIRST_ZERO                                                 \
2666         if (i > 0 && !src[i])                                           \
2667             i--;                                                        \
2668         while (src[i])                                                  \
2669             i++
2670 #if HAVE_FAST_64BIT
2671     for (i = 0; i + 1 < length; i += 9) {
2672         if (!((~AV_RN64A(src + i) &
2673                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2674               0x8000800080008080ULL))
2675             continue;
2676         FIND_FIRST_ZERO;
2677         STARTCODE_TEST;
2678         i -= 7;
2679     }
2680 #else
2681     for (i = 0; i + 1 < length; i += 5) {
2682         if (!((~AV_RN32A(src + i) &
2683                (AV_RN32A(src + i) - 0x01000101U)) &
2684               0x80008080U))
2685             continue;
2686         FIND_FIRST_ZERO;
2687         STARTCODE_TEST;
2688         i -= 3;
2689     }
2690 #endif /* HAVE_FAST_64BIT */
2691 #else
2692     for (i = 0; i + 1 < length; i += 2) {
2693         if (src[i])
2694             continue;
2695         if (i > 0 && src[i - 1] == 0)
2696             i--;
2697         STARTCODE_TEST;
2698     }
2699 #endif /* HAVE_FAST_UNALIGNED */
2700
2701     if (i >= length - 1) { // no escaped 0
2702         nal->data = src;
2703         nal->size = length;
2704         return length;
2705     }
2706
2707     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2708                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2709     if (!nal->rbsp_buffer)
2710         return AVERROR(ENOMEM);
2711
2712     dst = nal->rbsp_buffer;
2713
2714     memcpy(dst, src, i);
2715     si = di = i;
2716     while (si + 2 < length) {
2717         // remove escapes (very rare 1:2^22)
2718         if (src[si + 2] > 3) {
2719             dst[di++] = src[si++];
2720             dst[di++] = src[si++];
2721         } else if (src[si] == 0 && src[si + 1] == 0) {
2722             if (src[si + 2] == 3) { // escape
2723                 dst[di++] = 0;
2724                 dst[di++] = 0;
2725                 si       += 3;
2726
2727                 continue;
2728             } else // next start code
2729                 goto nsc;
2730         }
2731
2732         dst[di++] = src[si++];
2733     }
2734     while (si < length)
2735         dst[di++] = src[si++];
2736
2737 nsc:
2738     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2739
2740     nal->data = dst;
2741     nal->size = di;
2742     return si;
2743 }
2744
2745 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2746 {
2747     int i, consumed, ret = 0;
2748
2749     s->ref = NULL;
2750     s->eos = 0;
2751
2752     /* split the input packet into NAL units, so we know the upper bound on the
2753      * number of slices in the frame */
2754     s->nb_nals = 0;
2755     while (length >= 4) {
2756         HEVCNAL *nal;
2757         int extract_length = 0;
2758
2759         if (s->is_nalff) {
2760             int i;
2761             for (i = 0; i < s->nal_length_size; i++)
2762                 extract_length = (extract_length << 8) | buf[i];
2763             buf    += s->nal_length_size;
2764             length -= s->nal_length_size;
2765
2766             if (extract_length > length) {
2767                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2768                 ret = AVERROR_INVALIDDATA;
2769                 goto fail;
2770             }
2771         } else {
2772             if (buf[2] == 0) {
2773                 length--;
2774                 buf++;
2775                 continue;
2776             }
2777             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2778                 ret = AVERROR_INVALIDDATA;
2779                 goto fail;
2780             }
2781
2782             buf           += 3;
2783             length        -= 3;
2784             extract_length = length;
2785         }
2786
2787         if (s->nals_allocated < s->nb_nals + 1) {
2788             int new_size = s->nals_allocated + 1;
2789             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2790             if (!tmp) {
2791                 ret = AVERROR(ENOMEM);
2792                 goto fail;
2793             }
2794             s->nals = tmp;
2795             memset(s->nals + s->nals_allocated, 0,
2796                    (new_size - s->nals_allocated) * sizeof(*tmp));
2797             s->nals_allocated = new_size;
2798         }
2799         nal = &s->nals[s->nb_nals++];
2800
2801         consumed = extract_rbsp(buf, extract_length, nal);
2802         if (consumed < 0) {
2803             ret = consumed;
2804             goto fail;
2805         }
2806
2807         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2808         if (ret < 0)
2809             goto fail;
2810         hls_nal_unit(s);
2811
2812         if (s->nal_unit_type == NAL_EOB_NUT ||
2813             s->nal_unit_type == NAL_EOS_NUT)
2814             s->eos = 1;
2815
2816         buf    += consumed;
2817         length -= consumed;
2818     }
2819
2820     /* parse the NAL units */
2821     for (i = 0; i < s->nb_nals; i++) {
2822         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2823         if (ret < 0) {
2824             av_log(s->avctx, AV_LOG_WARNING,
2825                    "Error parsing NAL unit #%d.\n", i);
2826             goto fail;
2827         }
2828     }
2829
2830 fail:
2831     if (s->ref)
2832         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2833
2834     return ret;
2835 }
2836
2837 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2838 {
2839     int i;
2840     for (i = 0; i < 16; i++)
2841         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2842 }
2843
2844 static int verify_md5(HEVCContext *s, AVFrame *frame)
2845 {
2846     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2847     int pixel_shift;
2848     int i, j;
2849
2850     if (!desc)
2851         return AVERROR(EINVAL);
2852
2853     pixel_shift = desc->comp[0].depth_minus1 > 7;
2854
2855     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2856            s->poc);
2857
2858     /* the checksums are LE, so we have to byteswap for >8bpp formats
2859      * on BE arches */
2860 #if HAVE_BIGENDIAN
2861     if (pixel_shift && !s->checksum_buf) {
2862         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2863                        FFMAX3(frame->linesize[0], frame->linesize[1],
2864                               frame->linesize[2]));
2865         if (!s->checksum_buf)
2866             return AVERROR(ENOMEM);
2867     }
2868 #endif
2869
2870     for (i = 0; frame->data[i]; i++) {
2871         int width  = s->avctx->coded_width;
2872         int height = s->avctx->coded_height;
2873         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2874         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2875         uint8_t md5[16];
2876
2877         av_md5_init(s->md5_ctx);
2878         for (j = 0; j < h; j++) {
2879             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2880 #if HAVE_BIGENDIAN
2881             if (pixel_shift) {
2882                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2883                                    (const uint16_t*)src, w);
2884                 src = s->checksum_buf;
2885             }
2886 #endif
2887             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2888         }
2889         av_md5_final(s->md5_ctx, md5);
2890
2891         if (!memcmp(md5, s->md5[i], 16)) {
2892             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2893             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2894             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2895         } else {
2896             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2897             print_md5(s->avctx, AV_LOG_ERROR, md5);
2898             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2899             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2900             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2901             return AVERROR_INVALIDDATA;
2902         }
2903     }
2904
2905     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2906
2907     return 0;
2908 }
2909
2910 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2911                              AVPacket *avpkt)
2912 {
2913     int ret;
2914     HEVCContext *s = avctx->priv_data;
2915
2916     if (!avpkt->size) {
2917         ret = ff_hevc_output_frame(s, data, 1);
2918         if (ret < 0)
2919             return ret;
2920
2921         *got_output = ret;
2922         return 0;
2923     }
2924
2925     s->ref = NULL;
2926     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2927     if (ret < 0)
2928         return ret;
2929
2930     /* verify the SEI checksum */
2931     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2932         s->is_md5) {
2933         ret = verify_md5(s, s->ref->frame);
2934         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2935             ff_hevc_unref_frame(s, s->ref, ~0);
2936             return ret;
2937         }
2938     }
2939     s->is_md5 = 0;
2940
2941     if (s->is_decoded) {
2942         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2943         s->is_decoded = 0;
2944     }
2945
2946     if (s->output_frame->buf[0]) {
2947         av_frame_move_ref(data, s->output_frame);
2948         *got_output = 1;
2949     }
2950
2951     return avpkt->size;
2952 }
2953
2954 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2955 {
2956     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2957     if (ret < 0)
2958         return ret;
2959
2960     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2961     if (!dst->tab_mvf_buf)
2962         goto fail;
2963     dst->tab_mvf = src->tab_mvf;
2964
2965     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2966     if (!dst->rpl_tab_buf)
2967         goto fail;
2968     dst->rpl_tab = src->rpl_tab;
2969
2970     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2971     if (!dst->rpl_buf)
2972         goto fail;
2973
2974     dst->poc        = src->poc;
2975     dst->ctb_count  = src->ctb_count;
2976     dst->window     = src->window;
2977     dst->flags      = src->flags;
2978     dst->sequence   = src->sequence;
2979
2980     return 0;
2981 fail:
2982     ff_hevc_unref_frame(s, dst, ~0);
2983     return AVERROR(ENOMEM);
2984 }
2985
2986 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2987 {
2988     HEVCContext       *s = avctx->priv_data;
2989     int i;
2990
2991     pic_arrays_free(s);
2992
2993     av_freep(&s->md5_ctx);
2994
2995     av_frame_free(&s->tmp_frame);
2996     av_frame_free(&s->output_frame);
2997
2998     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2999         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3000         av_frame_free(&s->DPB[i].frame);
3001     }
3002
3003     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3004         av_buffer_unref(&s->vps_list[i]);
3005     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3006         av_buffer_unref(&s->sps_list[i]);
3007     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3008         av_buffer_unref(&s->pps_list[i]);
3009
3010     for (i = 0; i < s->nals_allocated; i++)
3011         av_freep(&s->nals[i].rbsp_buffer);
3012     av_freep(&s->nals);
3013     s->nals_allocated = 0;
3014
3015     return 0;
3016 }
3017
3018 static av_cold int hevc_init_context(AVCodecContext *avctx)
3019 {
3020     HEVCContext *s = avctx->priv_data;
3021     int i;
3022
3023     s->avctx = avctx;
3024
3025     s->tmp_frame = av_frame_alloc();
3026     if (!s->tmp_frame)
3027         goto fail;
3028
3029     s->output_frame = av_frame_alloc();
3030     if (!s->output_frame)
3031         goto fail;
3032
3033     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3034         s->DPB[i].frame = av_frame_alloc();
3035         if (!s->DPB[i].frame)
3036             goto fail;
3037         s->DPB[i].tf.f = s->DPB[i].frame;
3038     }
3039
3040     s->max_ra = INT_MAX;
3041
3042     s->md5_ctx = av_md5_alloc();
3043     if (!s->md5_ctx)
3044         goto fail;
3045
3046     ff_dsputil_init(&s->dsp, avctx);
3047
3048     s->context_initialized = 1;
3049
3050     return 0;
3051
3052 fail:
3053     hevc_decode_free(avctx);
3054     return AVERROR(ENOMEM);
3055 }
3056
3057 static int hevc_update_thread_context(AVCodecContext *dst,
3058                                       const AVCodecContext *src)
3059 {
3060     HEVCContext *s  = dst->priv_data;
3061     HEVCContext *s0 = src->priv_data;
3062     int i, ret;
3063
3064     if (!s->context_initialized) {
3065         ret = hevc_init_context(dst);
3066         if (ret < 0)
3067             return ret;
3068     }
3069
3070     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3071         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3072         if (s0->DPB[i].frame->buf[0]) {
3073             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3074             if (ret < 0)
3075                 return ret;
3076         }
3077     }
3078
3079     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3080         av_buffer_unref(&s->vps_list[i]);
3081         if (s0->vps_list[i]) {
3082             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3083             if (!s->vps_list[i])
3084                 return AVERROR(ENOMEM);
3085         }
3086     }
3087
3088     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3089         av_buffer_unref(&s->sps_list[i]);
3090         if (s0->sps_list[i]) {
3091             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3092             if (!s->sps_list[i])
3093                 return AVERROR(ENOMEM);
3094         }
3095     }
3096
3097     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3098         av_buffer_unref(&s->pps_list[i]);
3099         if (s0->pps_list[i]) {
3100             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3101             if (!s->pps_list[i])
3102                 return AVERROR(ENOMEM);
3103         }
3104     }
3105
3106     if (s->sps != s0->sps)
3107         ret = set_sps(s, s0->sps);
3108
3109     s->seq_decode = s0->seq_decode;
3110     s->seq_output = s0->seq_output;
3111     s->pocTid0    = s0->pocTid0;
3112     s->max_ra     = s0->max_ra;
3113
3114     s->is_nalff        = s0->is_nalff;
3115     s->nal_length_size = s0->nal_length_size;
3116
3117     if (s0->eos) {
3118         s->seq_decode = (s->seq_decode + 1) & 0xff;
3119         s->max_ra = INT_MAX;
3120     }
3121
3122     return 0;
3123 }
3124
3125 static int hevc_decode_extradata(HEVCContext *s)
3126 {
3127     AVCodecContext *avctx = s->avctx;
3128     GetByteContext gb;
3129     int ret;
3130
3131     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3132
3133     if (avctx->extradata_size > 3 &&
3134         (avctx->extradata[0] || avctx->extradata[1] ||
3135          avctx->extradata[2] > 1)) {
3136         /* It seems the extradata is encoded as hvcC format.
3137          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3138          * is finalized. When finalized, configurationVersion will be 1 and we
3139          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3140         int i, j, num_arrays, nal_len_size;
3141
3142         s->is_nalff = 1;
3143
3144         bytestream2_skip(&gb, 21);
3145         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3146         num_arrays   = bytestream2_get_byte(&gb);
3147
3148         /* nal units in the hvcC always have length coded with 2 bytes,
3149          * so put a fake nal_length_size = 2 while parsing them */
3150         s->nal_length_size = 2;
3151
3152         /* Decode nal units from hvcC. */
3153         for (i = 0; i < num_arrays; i++) {
3154             int type = bytestream2_get_byte(&gb) & 0x3f;
3155             int cnt  = bytestream2_get_be16(&gb);
3156
3157             for (j = 0; j < cnt; j++) {
3158                 // +2 for the nal size field
3159                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3160                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3161                     av_log(s->avctx, AV_LOG_ERROR,
3162                            "Invalid NAL unit size in extradata.\n");
3163                     return AVERROR_INVALIDDATA;
3164                 }
3165
3166                 ret = decode_nal_units(s, gb.buffer, nalsize);
3167                 if (ret < 0) {
3168                     av_log(avctx, AV_LOG_ERROR,
3169                            "Decoding nal unit %d %d from hvcC failed\n",
3170                            type, i);
3171                     return ret;
3172                 }
3173                 bytestream2_skip(&gb, nalsize);
3174             }
3175         }
3176
3177         /* Now store right nal length size, that will be used to parse
3178          * all other nals */
3179         s->nal_length_size = nal_len_size;
3180     } else {
3181         s->is_nalff = 0;
3182         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3183         if (ret < 0)
3184             return ret;
3185     }
3186     return 0;
3187 }
3188
3189 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3190 {
3191     HEVCContext *s = avctx->priv_data;
3192     int ret;
3193
3194     ff_init_cabac_states();
3195
3196     avctx->internal->allocate_progress = 1;
3197
3198     ret = hevc_init_context(avctx);
3199     if (ret < 0)
3200         return ret;
3201
3202     if (avctx->extradata_size > 0 && avctx->extradata) {
3203         ret = hevc_decode_extradata(s);
3204         if (ret < 0) {
3205             hevc_decode_free(avctx);
3206             return ret;
3207         }
3208     }
3209
3210     return 0;
3211 }
3212
3213 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3214 {
3215     HEVCContext *s = avctx->priv_data;
3216     int ret;
3217
3218     memset(s, 0, sizeof(*s));
3219
3220     ret = hevc_init_context(avctx);
3221     if (ret < 0)
3222         return ret;
3223
3224     return 0;
3225 }
3226
3227 static void hevc_decode_flush(AVCodecContext *avctx)
3228 {
3229     HEVCContext *s = avctx->priv_data;
3230     ff_hevc_flush_dpb(s);
3231     s->max_ra = INT_MAX;
3232 }
3233
3234 #define OFFSET(x) offsetof(HEVCContext, x)
3235 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3236
3237 static const AVProfile profiles[] = {
3238     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3239     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3240     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3241     { FF_PROFILE_UNKNOWN },
3242 };
3243
3244 static const AVOption options[] = {
3245     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3246         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3247     { NULL },
3248 };
3249
3250 static const AVClass hevc_decoder_class = {
3251     .class_name = "HEVC decoder",
3252     .item_name  = av_default_item_name,
3253     .option     = options,
3254     .version    = LIBAVUTIL_VERSION_INT,
3255 };
3256
3257 AVCodec ff_hevc_decoder = {
3258     .name                  = "hevc",
3259     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3260     .type                  = AVMEDIA_TYPE_VIDEO,
3261     .id                    = AV_CODEC_ID_HEVC,
3262     .priv_data_size        = sizeof(HEVCContext),
3263     .priv_class            = &hevc_decoder_class,
3264     .init                  = hevc_decode_init,
3265     .close                 = hevc_decode_free,
3266     .decode                = hevc_decode_frame,
3267     .flush                 = hevc_decode_flush,
3268     .update_thread_context = hevc_update_thread_context,
3269     .init_thread_copy      = hevc_init_thread_copy,
3270     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3271                              CODEC_CAP_FRAME_THREADS,
3272     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3273 };