]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
Add av_image_check_sar() and use it to validate SAR
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/internal.h"
29 #include "libavutil/md5.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/stereo3d.h"
33
34 #include "bytestream.h"
35 #include "cabac_functions.h"
36 #include "dsputil.h"
37 #include "golomb.h"
38 #include "hevc.h"
39
40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
43
44 static const uint8_t scan_1x1[1] = { 0 };
45
46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
47
48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
49
50 static const uint8_t horiz_scan4x4_x[16] = {
51     0, 1, 2, 3,
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55 };
56
57 static const uint8_t horiz_scan4x4_y[16] = {
58     0, 0, 0, 0,
59     1, 1, 1, 1,
60     2, 2, 2, 2,
61     3, 3, 3, 3,
62 };
63
64 static const uint8_t horiz_scan8x8_inv[8][8] = {
65     {  0,  1,  2,  3, 16, 17, 18, 19, },
66     {  4,  5,  6,  7, 20, 21, 22, 23, },
67     {  8,  9, 10, 11, 24, 25, 26, 27, },
68     { 12, 13, 14, 15, 28, 29, 30, 31, },
69     { 32, 33, 34, 35, 48, 49, 50, 51, },
70     { 36, 37, 38, 39, 52, 53, 54, 55, },
71     { 40, 41, 42, 43, 56, 57, 58, 59, },
72     { 44, 45, 46, 47, 60, 61, 62, 63, },
73 };
74
75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
76
77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
78
79 static const uint8_t diag_scan2x2_inv[2][2] = {
80     { 0, 2, },
81     { 1, 3, },
82 };
83
84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
85     0, 0, 1, 0,
86     1, 2, 0, 1,
87     2, 3, 1, 2,
88     3, 2, 3, 3,
89 };
90
91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
92     0, 1, 0, 2,
93     1, 0, 3, 2,
94     1, 0, 3, 2,
95     1, 3, 2, 3,
96 };
97
98 static const uint8_t diag_scan4x4_inv[4][4] = {
99     { 0,  2,  5,  9, },
100     { 1,  4,  8, 12, },
101     { 3,  7, 11, 14, },
102     { 6, 10, 13, 15, },
103 };
104
105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
106     0, 0, 1, 0,
107     1, 2, 0, 1,
108     2, 3, 0, 1,
109     2, 3, 4, 0,
110     1, 2, 3, 4,
111     5, 0, 1, 2,
112     3, 4, 5, 6,
113     0, 1, 2, 3,
114     4, 5, 6, 7,
115     1, 2, 3, 4,
116     5, 6, 7, 2,
117     3, 4, 5, 6,
118     7, 3, 4, 5,
119     6, 7, 4, 5,
120     6, 7, 5, 6,
121     7, 6, 7, 7,
122 };
123
124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
125     0, 1, 0, 2,
126     1, 0, 3, 2,
127     1, 0, 4, 3,
128     2, 1, 0, 5,
129     4, 3, 2, 1,
130     0, 6, 5, 4,
131     3, 2, 1, 0,
132     7, 6, 5, 4,
133     3, 2, 1, 0,
134     7, 6, 5, 4,
135     3, 2, 1, 7,
136     6, 5, 4, 3,
137     2, 7, 6, 5,
138     4, 3, 7, 6,
139     5, 4, 7, 6,
140     5, 7, 6, 7,
141 };
142
143 static const uint8_t diag_scan8x8_inv[8][8] = {
144     {  0,  2,  5,  9, 14, 20, 27, 35, },
145     {  1,  4,  8, 13, 19, 26, 34, 42, },
146     {  3,  7, 12, 18, 25, 33, 41, 48, },
147     {  6, 11, 17, 24, 32, 40, 47, 53, },
148     { 10, 16, 23, 31, 39, 46, 52, 57, },
149     { 15, 22, 30, 38, 45, 51, 56, 60, },
150     { 21, 29, 37, 44, 50, 55, 59, 62, },
151     { 28, 36, 43, 49, 54, 58, 61, 63, },
152 };
153
154 /**
155  * NOTE: Each function hls_foo correspond to the function foo in the
156  * specification (HLS stands for High Level Syntax).
157  */
158
159 /**
160  * Section 5.7
161  */
162
163 /* free everything allocated  by pic_arrays_init() */
164 static void pic_arrays_free(HEVCContext *s)
165 {
166     av_freep(&s->sao);
167     av_freep(&s->deblock);
168     av_freep(&s->split_cu_flag);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size         = width * height;
195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
196                            ((height >> log2_min_cb_size) + 1);
197     int ctb_count        = sps->ctb_width * sps->ctb_height;
198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
199
200     s->bs_width  = width  >> 3;
201     s->bs_height = height >> 3;
202
203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
205     s->split_cu_flag = av_malloc(pic_size);
206     if (!s->sao || !s->deblock || !s->split_cu_flag)
207         goto fail;
208
209     s->skip_flag    = av_malloc(pic_size_in_ctb);
210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
211     if (!s->skip_flag || !s->tab_ct_depth)
212         goto fail;
213
214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
215     s->tab_ipm  = av_mallocz(min_pu_size);
216     s->is_pcm   = av_malloc(min_pu_size);
217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
218         goto fail;
219
220     s->filter_slice_edges = av_malloc(ctb_count);
221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->tab_slice_address));
223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
224                                       sizeof(*s->qp_y_tab));
225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
226         goto fail;
227
228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
230     if (!s->horizontal_bs || !s->vertical_bs)
231         goto fail;
232
233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
234                                           av_buffer_alloc);
235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
236                                           av_buffer_allocz);
237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
238         goto fail;
239
240     return 0;
241
242 fail:
243     pic_arrays_free(s);
244     return AVERROR(ENOMEM);
245 }
246
247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
248 {
249     int i = 0;
250     int j = 0;
251     uint8_t luma_weight_l0_flag[16];
252     uint8_t chroma_weight_l0_flag[16];
253     uint8_t luma_weight_l1_flag[16];
254     uint8_t chroma_weight_l1_flag[16];
255
256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
257     if (s->sps->chroma_format_idc != 0) {
258         int delta = get_se_golomb(gb);
259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
260     }
261
262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
263         luma_weight_l0_flag[i] = get_bits1(gb);
264         if (!luma_weight_l0_flag[i]) {
265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
266             s->sh.luma_offset_l0[i] = 0;
267         }
268     }
269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
271             chroma_weight_l0_flag[i] = get_bits1(gb);
272     } else {
273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
274             chroma_weight_l0_flag[i] = 0;
275     }
276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
277         if (luma_weight_l0_flag[i]) {
278             int delta_luma_weight_l0 = get_se_golomb(gb);
279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
281         }
282         if (chroma_weight_l0_flag[i]) {
283             for (j = 0; j < 2; j++) {
284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
289             }
290         } else {
291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][0] = 0;
293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
294             s->sh.chroma_offset_l0[i][1] = 0;
295         }
296     }
297     if (s->sh.slice_type == B_SLICE) {
298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
299             luma_weight_l1_flag[i] = get_bits1(gb);
300             if (!luma_weight_l1_flag[i]) {
301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
302                 s->sh.luma_offset_l1[i] = 0;
303             }
304         }
305         if (s->sps->chroma_format_idc != 0) {
306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
307                 chroma_weight_l1_flag[i] = get_bits1(gb);
308         } else {
309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
310                 chroma_weight_l1_flag[i] = 0;
311         }
312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
313             if (luma_weight_l1_flag[i]) {
314                 int delta_luma_weight_l1 = get_se_golomb(gb);
315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
317             }
318             if (chroma_weight_l1_flag[i]) {
319                 for (j = 0; j < 2; j++) {
320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
325                 }
326             } else {
327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][0] = 0;
329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
330                 s->sh.chroma_offset_l1[i][1] = 0;
331             }
332         }
333     }
334 }
335
336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
337 {
338     const HEVCSPS *sps = s->sps;
339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
340     int prev_delta_msb = 0;
341     unsigned int nb_sps = 0, nb_sh;
342     int i;
343
344     rps->nb_refs = 0;
345     if (!sps->long_term_ref_pics_present_flag)
346         return 0;
347
348     if (sps->num_long_term_ref_pics_sps > 0)
349         nb_sps = get_ue_golomb_long(gb);
350     nb_sh = get_ue_golomb_long(gb);
351
352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
353         return AVERROR_INVALIDDATA;
354
355     rps->nb_refs = nb_sh + nb_sps;
356
357     for (i = 0; i < rps->nb_refs; i++) {
358         uint8_t delta_poc_msb_present;
359
360         if (i < nb_sps) {
361             uint8_t lt_idx_sps = 0;
362
363             if (sps->num_long_term_ref_pics_sps > 1)
364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
365
366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
368         } else {
369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
370             rps->used[i] = get_bits1(gb);
371         }
372
373         delta_poc_msb_present = get_bits1(gb);
374         if (delta_poc_msb_present) {
375             int delta = get_ue_golomb_long(gb);
376
377             if (i && i != nb_sps)
378                 delta += prev_delta_msb;
379
380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
381             prev_delta_msb = delta;
382         }
383     }
384
385     return 0;
386 }
387
388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
389 {
390     int ret;
391     unsigned int num = 0, den = 0;
392
393     pic_arrays_free(s);
394     ret = pic_arrays_init(s, sps);
395     if (ret < 0)
396         goto fail;
397
398     s->avctx->coded_width         = sps->width;
399     s->avctx->coded_height        = sps->height;
400     s->avctx->width               = sps->output_width;
401     s->avctx->height              = sps->output_height;
402     s->avctx->pix_fmt             = sps->pix_fmt;
403     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
404
405     ff_set_sar(s->avctx, sps->vui.sar);
406
407     if (sps->vui.video_signal_type_present_flag)
408         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
409                                                                : AVCOL_RANGE_MPEG;
410     else
411         s->avctx->color_range = AVCOL_RANGE_MPEG;
412
413     if (sps->vui.colour_description_present_flag) {
414         s->avctx->color_primaries = sps->vui.colour_primaries;
415         s->avctx->color_trc       = sps->vui.transfer_characteristic;
416         s->avctx->colorspace      = sps->vui.matrix_coeffs;
417     } else {
418         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
419         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
420         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
421     }
422
423     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
424     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
425     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
426
427     if (sps->sao_enabled) {
428         av_frame_unref(s->tmp_frame);
429         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
430         if (ret < 0)
431             goto fail;
432         s->frame = s->tmp_frame;
433     }
434
435     s->sps = sps;
436     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
437
438     if (s->vps->vps_timing_info_present_flag) {
439         num = s->vps->vps_num_units_in_tick;
440         den = s->vps->vps_time_scale;
441     } else if (sps->vui.vui_timing_info_present_flag) {
442         num = sps->vui.vui_num_units_in_tick;
443         den = sps->vui.vui_time_scale;
444     }
445
446     if (num != 0 && den != 0)
447         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
448                   num, den, 1 << 30);
449
450     return 0;
451
452 fail:
453     pic_arrays_free(s);
454     s->sps = NULL;
455     return ret;
456 }
457
458 static int hls_slice_header(HEVCContext *s)
459 {
460     GetBitContext *gb = &s->HEVClc.gb;
461     SliceHeader *sh   = &s->sh;
462     int i, ret;
463
464     // Coded parameters
465     sh->first_slice_in_pic_flag = get_bits1(gb);
466     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
467         s->seq_decode = (s->seq_decode + 1) & 0xff;
468         s->max_ra     = INT_MAX;
469         if (IS_IDR(s))
470             ff_hevc_clear_refs(s);
471     }
472     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
473         sh->no_output_of_prior_pics_flag = get_bits1(gb);
474
475     sh->pps_id = get_ue_golomb_long(gb);
476     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
477         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
478         return AVERROR_INVALIDDATA;
479     }
480     if (!sh->first_slice_in_pic_flag &&
481         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
482         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
483         return AVERROR_INVALIDDATA;
484     }
485     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
486
487     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
488         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
489
490         ff_hevc_clear_refs(s);
491         ret = set_sps(s, s->sps);
492         if (ret < 0)
493             return ret;
494
495         s->seq_decode = (s->seq_decode + 1) & 0xff;
496         s->max_ra     = INT_MAX;
497     }
498
499     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
500     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
501
502     sh->dependent_slice_segment_flag = 0;
503     if (!sh->first_slice_in_pic_flag) {
504         int slice_address_length;
505
506         if (s->pps->dependent_slice_segments_enabled_flag)
507             sh->dependent_slice_segment_flag = get_bits1(gb);
508
509         slice_address_length = av_ceil_log2(s->sps->ctb_width *
510                                             s->sps->ctb_height);
511         sh->slice_segment_addr = get_bits(gb, slice_address_length);
512         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
513             av_log(s->avctx, AV_LOG_ERROR,
514                    "Invalid slice segment address: %u.\n",
515                    sh->slice_segment_addr);
516             return AVERROR_INVALIDDATA;
517         }
518
519         if (!sh->dependent_slice_segment_flag) {
520             sh->slice_addr = sh->slice_segment_addr;
521             s->slice_idx++;
522         }
523     } else {
524         sh->slice_segment_addr = sh->slice_addr = 0;
525         s->slice_idx           = 0;
526         s->slice_initialized   = 0;
527     }
528
529     if (!sh->dependent_slice_segment_flag) {
530         s->slice_initialized = 0;
531
532         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
533             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
534
535         sh->slice_type = get_ue_golomb_long(gb);
536         if (!(sh->slice_type == I_SLICE ||
537               sh->slice_type == P_SLICE ||
538               sh->slice_type == B_SLICE)) {
539             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
540                    sh->slice_type);
541             return AVERROR_INVALIDDATA;
542         }
543         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
544             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
545             return AVERROR_INVALIDDATA;
546         }
547
548         if (s->pps->output_flag_present_flag)
549             sh->pic_output_flag = get_bits1(gb);
550
551         if (s->sps->separate_colour_plane_flag)
552             sh->colour_plane_id = get_bits(gb, 2);
553
554         if (!IS_IDR(s)) {
555             int short_term_ref_pic_set_sps_flag, poc;
556
557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
560                 av_log(s->avctx, AV_LOG_WARNING,
561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
563                     return AVERROR_INVALIDDATA;
564                 poc = s->poc;
565             }
566             s->poc = poc;
567
568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
569             if (!short_term_ref_pic_set_sps_flag) {
570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
571                 if (ret < 0)
572                     return ret;
573
574                 sh->short_term_rps = &sh->slice_rps;
575             } else {
576                 int numbits, rps_idx;
577
578                 if (!s->sps->nb_st_rps) {
579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
580                     return AVERROR_INVALIDDATA;
581                 }
582
583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
586             }
587
588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
589             if (ret < 0) {
590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
592                     return AVERROR_INVALIDDATA;
593             }
594
595             if (s->sps->sps_temporal_mvp_enabled_flag)
596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
597             else
598                 sh->slice_temporal_mvp_enabled_flag = 0;
599         } else {
600             s->sh.short_term_rps = NULL;
601             s->poc               = 0;
602         }
603
604         /* 8.3.1 */
605         if (s->temporal_id == 0 &&
606             s->nal_unit_type != NAL_TRAIL_N &&
607             s->nal_unit_type != NAL_TSA_N   &&
608             s->nal_unit_type != NAL_STSA_N  &&
609             s->nal_unit_type != NAL_RADL_N  &&
610             s->nal_unit_type != NAL_RADL_R  &&
611             s->nal_unit_type != NAL_RASL_N  &&
612             s->nal_unit_type != NAL_RASL_R)
613             s->pocTid0 = s->poc;
614
615         if (s->sps->sao_enabled) {
616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
617             sh->slice_sample_adaptive_offset_flag[1] =
618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
619         } else {
620             sh->slice_sample_adaptive_offset_flag[0] = 0;
621             sh->slice_sample_adaptive_offset_flag[1] = 0;
622             sh->slice_sample_adaptive_offset_flag[2] = 0;
623         }
624
625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
627             int nb_refs;
628
629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
630             if (sh->slice_type == B_SLICE)
631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
632
633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
635                 if (sh->slice_type == B_SLICE)
636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
637             }
638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
640                        sh->nb_refs[L0], sh->nb_refs[L1]);
641                 return AVERROR_INVALIDDATA;
642             }
643
644             sh->rpl_modification_flag[0] = 0;
645             sh->rpl_modification_flag[1] = 0;
646             nb_refs = ff_hevc_frame_nb_refs(s);
647             if (!nb_refs) {
648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
649                 return AVERROR_INVALIDDATA;
650             }
651
652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
653                 sh->rpl_modification_flag[0] = get_bits1(gb);
654                 if (sh->rpl_modification_flag[0]) {
655                     for (i = 0; i < sh->nb_refs[L0]; i++)
656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658
659                 if (sh->slice_type == B_SLICE) {
660                     sh->rpl_modification_flag[1] = get_bits1(gb);
661                     if (sh->rpl_modification_flag[1] == 1)
662                         for (i = 0; i < sh->nb_refs[L1]; i++)
663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
664                 }
665             }
666
667             if (sh->slice_type == B_SLICE)
668                 sh->mvd_l1_zero_flag = get_bits1(gb);
669
670             if (s->pps->cabac_init_present_flag)
671                 sh->cabac_init_flag = get_bits1(gb);
672             else
673                 sh->cabac_init_flag = 0;
674
675             sh->collocated_ref_idx = 0;
676             if (sh->slice_temporal_mvp_enabled_flag) {
677                 sh->collocated_list = L0;
678                 if (sh->slice_type == B_SLICE)
679                     sh->collocated_list = !get_bits1(gb);
680
681                 if (sh->nb_refs[sh->collocated_list] > 1) {
682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
684                         av_log(s->avctx, AV_LOG_ERROR,
685                                "Invalid collocated_ref_idx: %d.\n",
686                                sh->collocated_ref_idx);
687                         return AVERROR_INVALIDDATA;
688                     }
689                 }
690             }
691
692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
694                 pred_weight_table(s, gb);
695             }
696
697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
699                 av_log(s->avctx, AV_LOG_ERROR,
700                        "Invalid number of merging MVP candidates: %d.\n",
701                        sh->max_num_merge_cand);
702                 return AVERROR_INVALIDDATA;
703             }
704         }
705
706         sh->slice_qp_delta = get_se_golomb(gb);
707
708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
709             sh->slice_cb_qp_offset = get_se_golomb(gb);
710             sh->slice_cr_qp_offset = get_se_golomb(gb);
711         } else {
712             sh->slice_cb_qp_offset = 0;
713             sh->slice_cr_qp_offset = 0;
714         }
715
716         if (s->pps->deblocking_filter_control_present_flag) {
717             int deblocking_filter_override_flag = 0;
718
719             if (s->pps->deblocking_filter_override_enabled_flag)
720                 deblocking_filter_override_flag = get_bits1(gb);
721
722             if (deblocking_filter_override_flag) {
723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
724                 if (!sh->disable_deblocking_filter_flag) {
725                     sh->beta_offset = get_se_golomb(gb) * 2;
726                     sh->tc_offset   = get_se_golomb(gb) * 2;
727                 }
728             } else {
729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
730                 sh->beta_offset                    = s->pps->beta_offset;
731                 sh->tc_offset                      = s->pps->tc_offset;
732             }
733         } else {
734             sh->disable_deblocking_filter_flag = 0;
735             sh->beta_offset                    = 0;
736             sh->tc_offset                      = 0;
737         }
738
739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
740             (sh->slice_sample_adaptive_offset_flag[0] ||
741              sh->slice_sample_adaptive_offset_flag[1] ||
742              !sh->disable_deblocking_filter_flag)) {
743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
744         } else {
745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
746         }
747     } else if (!s->slice_initialized) {
748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
749         return AVERROR_INVALIDDATA;
750     }
751
752     sh->num_entry_point_offsets = 0;
753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
755         if (sh->num_entry_point_offsets > 0) {
756             int offset_len = get_ue_golomb_long(gb) + 1;
757
758             for (i = 0; i < sh->num_entry_point_offsets; i++)
759                 skip_bits(gb, offset_len);
760         }
761     }
762
763     if (s->pps->slice_header_extension_present_flag) {
764         unsigned int length = get_ue_golomb_long(gb);
765         for (i = 0; i < length; i++)
766             skip_bits(gb, 8);  // slice_header_extension_data_byte
767     }
768
769     // Inferred parameters
770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771     if (sh->slice_qp > 51 ||
772         sh->slice_qp < -s->sps->qp_bd_offset) {
773         av_log(s->avctx, AV_LOG_ERROR,
774                "The slice_qp %d is outside the valid range "
775                "[%d, 51].\n",
776                sh->slice_qp,
777                -s->sps->qp_bd_offset);
778         return AVERROR_INVALIDDATA;
779     }
780
781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
782
783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785         return AVERROR_INVALIDDATA;
786     }
787
788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
789
790     if (!s->pps->cu_qp_delta_enabled_flag)
791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
793
794     s->slice_initialized = 1;
795
796     return 0;
797 }
798
799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
800
801 #define SET_SAO(elem, value)                            \
802 do {                                                    \
803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
804         sao->elem = value;                              \
805     else if (sao_merge_left_flag)                       \
806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
807     else if (sao_merge_up_flag)                         \
808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
809     else                                                \
810         sao->elem = 0;                                  \
811 } while (0)
812
813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
814 {
815     HEVCLocalContext *lc    = &s->HEVClc;
816     int sao_merge_left_flag = 0;
817     int sao_merge_up_flag   = 0;
818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
819     SAOParams *sao          = &CTB(s->sao, rx, ry);
820     int c_idx, i;
821
822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
823         s->sh.slice_sample_adaptive_offset_flag[1]) {
824         if (rx > 0) {
825             if (lc->ctb_left_flag)
826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
827         }
828         if (ry > 0 && !sao_merge_left_flag) {
829             if (lc->ctb_up_flag)
830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
831         }
832     }
833
834     for (c_idx = 0; c_idx < 3; c_idx++) {
835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
837             continue;
838         }
839
840         if (c_idx == 2) {
841             sao->type_idx[2] = sao->type_idx[1];
842             sao->eo_class[2] = sao->eo_class[1];
843         } else {
844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
845         }
846
847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
848             continue;
849
850         for (i = 0; i < 4; i++)
851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
852
853         if (sao->type_idx[c_idx] == SAO_BAND) {
854             for (i = 0; i < 4; i++) {
855                 if (sao->offset_abs[c_idx][i]) {
856                     SET_SAO(offset_sign[c_idx][i],
857                             ff_hevc_sao_offset_sign_decode(s));
858                 } else {
859                     sao->offset_sign[c_idx][i] = 0;
860                 }
861             }
862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
863         } else if (c_idx != 2) {
864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
865         }
866
867         // Inferred parameters
868         sao->offset_val[c_idx][0] = 0;
869         for (i = 0; i < 4; i++) {
870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
871             if (sao->type_idx[c_idx] == SAO_EDGE) {
872                 if (i > 1)
873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
874             } else if (sao->offset_sign[c_idx][i]) {
875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
876             }
877         }
878     }
879 }
880
881 #undef SET_SAO
882 #undef CTB
883
884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
885                                 int log2_trafo_size, enum ScanType scan_idx,
886                                 int c_idx)
887 {
888 #define GET_COORD(offset, n)                                    \
889     do {                                                        \
890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
892     } while (0)
893     HEVCLocalContext *lc    = &s->HEVClc;
894     int transform_skip_flag = 0;
895
896     int last_significant_coeff_x, last_significant_coeff_y;
897     int last_scan_pos;
898     int n_end;
899     int num_coeff    = 0;
900     int greater1_ctx = 1;
901
902     int num_last_subset;
903     int x_cg_last_sig, y_cg_last_sig;
904
905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
906
907     ptrdiff_t stride = s->frame->linesize[c_idx];
908     int hshift       = s->sps->hshift[c_idx];
909     int vshift       = s->sps->vshift[c_idx];
910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
914
915     int trafo_size = 1 << log2_trafo_size;
916     int i, qp, shift, add, scale, scale_m;
917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
918     const uint8_t *scale_matrix;
919     uint8_t dc_scale;
920
921     // Derive QP for dequant
922     if (!lc->cu.cu_transquant_bypass_flag) {
923         static const int qp_c[] = {
924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
925         };
926
927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
931         };
932
933         static const uint8_t div6[51 + 2 * 6 + 1] = {
934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
937         };
938         int qp_y = lc->qp_y;
939
940         if (c_idx == 0) {
941             qp = qp_y + s->sps->qp_bd_offset;
942         } else {
943             int qp_i, offset;
944
945             if (c_idx == 1)
946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
947             else
948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
949
950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
951             if (qp_i < 30)
952                 qp = qp_i;
953             else if (qp_i > 43)
954                 qp = qp_i - 6;
955             else
956                 qp = qp_c[qp_i - 30];
957
958             qp += s->sps->qp_bd_offset;
959         }
960
961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
962         add      = 1 << (shift - 1);
963         scale    = level_scale[rem6[qp]] << (div6[qp]);
964         scale_m  = 16; // default when no custom scaling lists.
965         dc_scale = 16;
966
967         if (s->sps->scaling_list_enable_flag) {
968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
969                                     &s->pps->scaling_list : &s->sps->scaling_list;
970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
971
972             if (log2_trafo_size != 5)
973                 matrix_id = 3 * matrix_id + c_idx;
974
975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
976             if (log2_trafo_size >= 4)
977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
978         }
979     }
980
981     if (s->pps->transform_skip_enabled_flag &&
982         !lc->cu.cu_transquant_bypass_flag   &&
983         log2_trafo_size == 2) {
984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
985     }
986
987     last_significant_coeff_x =
988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
989     last_significant_coeff_y =
990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
991
992     if (last_significant_coeff_x > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_x & 1)) +
996                                    suffix;
997     }
998
999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int trafo_depth, int blk_idx)
1237 {
1238     HEVCLocalContext *lc = &s->HEVClc;
1239
1240     if (lc->cu.pred_mode == MODE_INTRA) {
1241         int trafo_size = 1 << log2_trafo_size;
1242         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1243
1244         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1245         if (log2_trafo_size > 2) {
1246             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1247             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1248             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1250         } else if (blk_idx == 3) {
1251             trafo_size = trafo_size << s->sps->hshift[1];
1252             ff_hevc_set_neighbour_available(s, xBase, yBase,
1253                                             trafo_size, trafo_size);
1254             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1256         }
1257     }
1258
1259     if (lc->tt.cbf_luma ||
1260         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1261         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1262         int scan_idx   = SCAN_DIAG;
1263         int scan_idx_c = SCAN_DIAG;
1264
1265         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1266             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1267             if (lc->tu.cu_qp_delta != 0)
1268                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1269                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1270             lc->tu.is_cu_qp_delta_coded = 1;
1271
1272             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1273                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1274                 av_log(s->avctx, AV_LOG_ERROR,
1275                        "The cu_qp_delta %d is outside the valid range "
1276                        "[%d, %d].\n",
1277                        lc->tu.cu_qp_delta,
1278                        -(26 + s->sps->qp_bd_offset / 2),
1279                         (25 + s->sps->qp_bd_offset / 2));
1280                 return AVERROR_INVALIDDATA;
1281             }
1282
1283             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1284         }
1285
1286         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1287             if (lc->tu.cur_intra_pred_mode >= 6 &&
1288                 lc->tu.cur_intra_pred_mode <= 14) {
1289                 scan_idx = SCAN_VERT;
1290             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1291                        lc->tu.cur_intra_pred_mode <= 30) {
1292                 scan_idx = SCAN_HORIZ;
1293             }
1294
1295             if (lc->pu.intra_pred_mode_c >=  6 &&
1296                 lc->pu.intra_pred_mode_c <= 14) {
1297                 scan_idx_c = SCAN_VERT;
1298             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1299                        lc->pu.intra_pred_mode_c <= 30) {
1300                 scan_idx_c = SCAN_HORIZ;
1301             }
1302         }
1303
1304         if (lc->tt.cbf_luma)
1305             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1306         if (log2_trafo_size > 2) {
1307             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1308                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1309             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1310                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1311         } else if (blk_idx == 3) {
1312             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1313                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1314             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1315                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1316         }
1317     }
1318     return 0;
1319 }
1320
1321 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1322 {
1323     int cb_size          = 1 << log2_cb_size;
1324     int log2_min_pu_size = s->sps->log2_min_pu_size;
1325
1326     int min_pu_width     = s->sps->min_pu_width;
1327     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1328     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1329     int i, j;
1330
1331     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1332         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1333             s->is_pcm[i + j * min_pu_width] = 2;
1334 }
1335
1336 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1337                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1338                               int log2_cb_size, int log2_trafo_size,
1339                               int trafo_depth, int blk_idx)
1340 {
1341     HEVCLocalContext *lc = &s->HEVClc;
1342     uint8_t split_transform_flag;
1343     int ret;
1344
1345     if (trafo_depth > 0 && log2_trafo_size == 2) {
1346         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1347             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1348         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1349             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1350     } else {
1351         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1352         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1353     }
1354
1355     if (lc->cu.intra_split_flag) {
1356         if (trafo_depth == 1)
1357             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1358     } else {
1359         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1360     }
1361
1362     lc->tt.cbf_luma = 1;
1363
1364     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1365                               lc->cu.pred_mode == MODE_INTER &&
1366                               lc->cu.part_mode != PART_2Nx2N &&
1367                               trafo_depth == 0;
1368
1369     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1370         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1371         trafo_depth     < lc->cu.max_trafo_depth       &&
1372         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1373         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1374     } else {
1375         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1376                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1377                                lc->tt.inter_split_flag;
1378     }
1379
1380     if (log2_trafo_size > 2) {
1381         if (trafo_depth == 0 ||
1382             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1383             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1384                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1385         }
1386
1387         if (trafo_depth == 0 ||
1388             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1389             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1390                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1391         }
1392     }
1393
1394     if (split_transform_flag) {
1395         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1396         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1397
1398         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1399                                  log2_cb_size, log2_trafo_size - 1,
1400                                  trafo_depth + 1, 0);
1401         if (ret < 0)
1402             return ret;
1403         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1404                                  log2_cb_size, log2_trafo_size - 1,
1405                                  trafo_depth + 1, 1);
1406         if (ret < 0)
1407             return ret;
1408         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1409                                  log2_cb_size, log2_trafo_size - 1,
1410                                  trafo_depth + 1, 2);
1411         if (ret < 0)
1412             return ret;
1413         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1414                                  log2_cb_size, log2_trafo_size - 1,
1415                                  trafo_depth + 1, 3);
1416         if (ret < 0)
1417             return ret;
1418     } else {
1419         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1420         int log2_min_tu_size = s->sps->log2_min_tb_size;
1421         int min_tu_width     = s->sps->min_tb_width;
1422
1423         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1424             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1425             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1426             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1427         }
1428
1429         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1430                                  log2_cb_size, log2_trafo_size, trafo_depth,
1431                                  blk_idx);
1432         if (ret < 0)
1433             return ret;
1434         // TODO: store cbf_luma somewhere else
1435         if (lc->tt.cbf_luma) {
1436             int i, j;
1437             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1438                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1439                     int x_tu = (x0 + j) >> log2_min_tu_size;
1440                     int y_tu = (y0 + i) >> log2_min_tu_size;
1441                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1442                 }
1443         }
1444         if (!s->sh.disable_deblocking_filter_flag) {
1445             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1446                                                   lc->slice_or_tiles_up_boundary,
1447                                                   lc->slice_or_tiles_left_boundary);
1448             if (s->pps->transquant_bypass_enable_flag &&
1449                 lc->cu.cu_transquant_bypass_flag)
1450                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1451         }
1452     }
1453     return 0;
1454 }
1455
1456 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1457 {
1458     //TODO: non-4:2:0 support
1459     HEVCLocalContext *lc = &s->HEVClc;
1460     GetBitContext gb;
1461     int cb_size   = 1 << log2_cb_size;
1462     int stride0   = s->frame->linesize[0];
1463     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1464     int   stride1 = s->frame->linesize[1];
1465     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1466     int   stride2 = s->frame->linesize[2];
1467     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1468
1469     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1470     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1471     int ret;
1472
1473     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1474                                           lc->slice_or_tiles_up_boundary,
1475                                           lc->slice_or_tiles_left_boundary);
1476
1477     ret = init_get_bits(&gb, pcm, length);
1478     if (ret < 0)
1479         return ret;
1480
1481     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1482     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1483     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1484     return 0;
1485 }
1486
1487 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1488 {
1489     HEVCLocalContext *lc = &s->HEVClc;
1490     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1491     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1492
1493     if (x)
1494         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1495     if (y)
1496         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1497
1498     switch (x) {
1499     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1500     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1501     case 0: lc->pu.mvd.x = 0;                               break;
1502     }
1503
1504     switch (y) {
1505     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1506     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1507     case 0: lc->pu.mvd.y = 0;                               break;
1508     }
1509 }
1510
1511 /**
1512  * 8.5.3.2.2.1 Luma sample interpolation process
1513  *
1514  * @param s HEVC decoding context
1515  * @param dst target buffer for block data at block position
1516  * @param dststride stride of the dst buffer
1517  * @param ref reference picture buffer at origin (0, 0)
1518  * @param mv motion vector (relative to block position) to get pixel data from
1519  * @param x_off horizontal position of block from origin (0, 0)
1520  * @param y_off vertical position of block from origin (0, 0)
1521  * @param block_w width of block
1522  * @param block_h height of block
1523  */
1524 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1525                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1526                     int block_w, int block_h)
1527 {
1528     HEVCLocalContext *lc = &s->HEVClc;
1529     uint8_t *src         = ref->data[0];
1530     ptrdiff_t srcstride  = ref->linesize[0];
1531     int pic_width        = s->sps->width;
1532     int pic_height       = s->sps->height;
1533
1534     int mx         = mv->x & 3;
1535     int my         = mv->y & 3;
1536     int extra_left = ff_hevc_qpel_extra_before[mx];
1537     int extra_top  = ff_hevc_qpel_extra_before[my];
1538
1539     x_off += mv->x >> 2;
1540     y_off += mv->y >> 2;
1541     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1542
1543     if (x_off < extra_left || y_off < extra_top ||
1544         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1545         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1546         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1547         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1548         int buf_offset = extra_top *
1549                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1550
1551         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1552                                  edge_emu_stride, srcstride,
1553                                  block_w + ff_hevc_qpel_extra[mx],
1554                                  block_h + ff_hevc_qpel_extra[my],
1555                                  x_off - extra_left, y_off - extra_top,
1556                                  pic_width, pic_height);
1557         src = lc->edge_emu_buffer + buf_offset;
1558         srcstride = edge_emu_stride;
1559     }
1560     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1561                                      block_h, lc->mc_buffer);
1562 }
1563
1564 /**
1565  * 8.5.3.2.2.2 Chroma sample interpolation process
1566  *
1567  * @param s HEVC decoding context
1568  * @param dst1 target buffer for block data at block position (U plane)
1569  * @param dst2 target buffer for block data at block position (V plane)
1570  * @param dststride stride of the dst1 and dst2 buffers
1571  * @param ref reference picture buffer at origin (0, 0)
1572  * @param mv motion vector (relative to block position) to get pixel data from
1573  * @param x_off horizontal position of block from origin (0, 0)
1574  * @param y_off vertical position of block from origin (0, 0)
1575  * @param block_w width of block
1576  * @param block_h height of block
1577  */
1578 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1579                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1580                       int x_off, int y_off, int block_w, int block_h)
1581 {
1582     HEVCLocalContext *lc = &s->HEVClc;
1583     uint8_t *src1        = ref->data[1];
1584     uint8_t *src2        = ref->data[2];
1585     ptrdiff_t src1stride = ref->linesize[1];
1586     ptrdiff_t src2stride = ref->linesize[2];
1587     int pic_width        = s->sps->width >> 1;
1588     int pic_height       = s->sps->height >> 1;
1589
1590     int mx = mv->x & 7;
1591     int my = mv->y & 7;
1592
1593     x_off += mv->x >> 3;
1594     y_off += mv->y >> 3;
1595     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1596     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1597
1598     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1599         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1600         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1601         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1602         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1603         int buf_offset1 = EPEL_EXTRA_BEFORE *
1604                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1605         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1606         int buf_offset2 = EPEL_EXTRA_BEFORE *
1607                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1608
1609         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1610                                  edge_emu_stride, src1stride,
1611                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1612                                  x_off - EPEL_EXTRA_BEFORE,
1613                                  y_off - EPEL_EXTRA_BEFORE,
1614                                  pic_width, pic_height);
1615
1616         src1 = lc->edge_emu_buffer + buf_offset1;
1617         src1stride = edge_emu_stride;
1618         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1619                                              block_w, block_h, mx, my, lc->mc_buffer);
1620
1621         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1622                                  edge_emu_stride, src2stride,
1623                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1624                                  x_off - EPEL_EXTRA_BEFORE,
1625                                  y_off - EPEL_EXTRA_BEFORE,
1626                                  pic_width, pic_height);
1627         src2 = lc->edge_emu_buffer + buf_offset2;
1628         src2stride = edge_emu_stride;
1629
1630         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1631                                              block_w, block_h, mx, my,
1632                                              lc->mc_buffer);
1633     } else {
1634         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1635                                              block_w, block_h, mx, my,
1636                                              lc->mc_buffer);
1637         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1638                                              block_w, block_h, mx, my,
1639                                              lc->mc_buffer);
1640     }
1641 }
1642
1643 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1644                                 const Mv *mv, int y0, int height)
1645 {
1646     int y = (mv->y >> 2) + y0 + height + 9;
1647     ff_thread_await_progress(&ref->tf, y, 0);
1648 }
1649
1650 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1651                                 int nPbW, int nPbH,
1652                                 int log2_cb_size, int partIdx)
1653 {
1654 #define POS(c_idx, x, y)                                                              \
1655     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1656                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1657     HEVCLocalContext *lc = &s->HEVClc;
1658     int merge_idx = 0;
1659     struct MvField current_mv = {{{ 0 }}};
1660
1661     int min_pu_width = s->sps->min_pu_width;
1662
1663     MvField *tab_mvf = s->ref->tab_mvf;
1664     RefPicList  *refPicList = s->ref->refPicList;
1665     HEVCFrame *ref0, *ref1;
1666
1667     int tmpstride = MAX_PB_SIZE;
1668
1669     uint8_t *dst0 = POS(0, x0, y0);
1670     uint8_t *dst1 = POS(1, x0, y0);
1671     uint8_t *dst2 = POS(2, x0, y0);
1672     int log2_min_cb_size = s->sps->log2_min_cb_size;
1673     int min_cb_width     = s->sps->min_cb_width;
1674     int x_cb             = x0 >> log2_min_cb_size;
1675     int y_cb             = y0 >> log2_min_cb_size;
1676     int ref_idx[2];
1677     int mvp_flag[2];
1678     int x_pu, y_pu;
1679     int i, j;
1680
1681     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1682         if (s->sh.max_num_merge_cand > 1)
1683             merge_idx = ff_hevc_merge_idx_decode(s);
1684         else
1685             merge_idx = 0;
1686
1687         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1688                                    1 << log2_cb_size,
1689                                    1 << log2_cb_size,
1690                                    log2_cb_size, partIdx,
1691                                    merge_idx, &current_mv);
1692         x_pu = x0 >> s->sps->log2_min_pu_size;
1693         y_pu = y0 >> s->sps->log2_min_pu_size;
1694
1695         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1696             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1697                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1698     } else { /* MODE_INTER */
1699         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1700         if (lc->pu.merge_flag) {
1701             if (s->sh.max_num_merge_cand > 1)
1702                 merge_idx = ff_hevc_merge_idx_decode(s);
1703             else
1704                 merge_idx = 0;
1705
1706             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1707                                        partIdx, merge_idx, &current_mv);
1708             x_pu = x0 >> s->sps->log2_min_pu_size;
1709             y_pu = y0 >> s->sps->log2_min_pu_size;
1710
1711             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1712                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1713                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1714         } else {
1715             enum InterPredIdc inter_pred_idc = PRED_L0;
1716             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1717             if (s->sh.slice_type == B_SLICE)
1718                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1719
1720             if (inter_pred_idc != PRED_L1) {
1721                 if (s->sh.nb_refs[L0]) {
1722                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1723                     current_mv.ref_idx[0] = ref_idx[0];
1724                 }
1725                 current_mv.pred_flag[0] = 1;
1726                 hls_mvd_coding(s, x0, y0, 0);
1727                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1728                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1729                                          partIdx, merge_idx, &current_mv,
1730                                          mvp_flag[0], 0);
1731                 current_mv.mv[0].x += lc->pu.mvd.x;
1732                 current_mv.mv[0].y += lc->pu.mvd.y;
1733             }
1734
1735             if (inter_pred_idc != PRED_L0) {
1736                 if (s->sh.nb_refs[L1]) {
1737                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1738                     current_mv.ref_idx[1] = ref_idx[1];
1739                 }
1740
1741                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1742                     lc->pu.mvd.x = 0;
1743                     lc->pu.mvd.y = 0;
1744                 } else {
1745                     hls_mvd_coding(s, x0, y0, 1);
1746                 }
1747
1748                 current_mv.pred_flag[1] = 1;
1749                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1750                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1751                                          partIdx, merge_idx, &current_mv,
1752                                          mvp_flag[1], 1);
1753                 current_mv.mv[1].x += lc->pu.mvd.x;
1754                 current_mv.mv[1].y += lc->pu.mvd.y;
1755             }
1756
1757             x_pu = x0 >> s->sps->log2_min_pu_size;
1758             y_pu = y0 >> s->sps->log2_min_pu_size;
1759
1760             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1761                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1762                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1763         }
1764     }
1765
1766     if (current_mv.pred_flag[0]) {
1767         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1768         if (!ref0)
1769             return;
1770         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1771     }
1772     if (current_mv.pred_flag[1]) {
1773         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1774         if (!ref1)
1775             return;
1776         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1777     }
1778
1779     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1780         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1781         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1782
1783         luma_mc(s, tmp, tmpstride, ref0->frame,
1784                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1785
1786         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1787             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1788             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1789                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1790                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1791                                      dst0, s->frame->linesize[0], tmp,
1792                                      tmpstride, nPbW, nPbH);
1793         } else {
1794             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1795         }
1796         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1797                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1798
1799         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1800             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1801             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1802                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1803                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1804                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1805                                      nPbW / 2, nPbH / 2);
1806             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1807                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1808                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1809                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1810                                      nPbW / 2, nPbH / 2);
1811         } else {
1812             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1813             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1814         }
1815     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1816         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1817         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1818
1819         if (!ref1)
1820             return;
1821
1822         luma_mc(s, tmp, tmpstride, ref1->frame,
1823                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1824
1825         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1826             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1827             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1828                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1829                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1830                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1831                                       nPbW, nPbH);
1832         } else {
1833             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1834         }
1835
1836         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1837                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1838
1839         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1840             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1841             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1842                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1843                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1844                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1845             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1846                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1847                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1848                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1849         } else {
1850             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1851             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1852         }
1853     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1854         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1855         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1856         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1857         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1858         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1859         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1860
1861         if (!ref0 || !ref1)
1862             return;
1863
1864         luma_mc(s, tmp, tmpstride, ref0->frame,
1865                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1866         luma_mc(s, tmp2, tmpstride, ref1->frame,
1867                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1868
1869         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1870             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1871             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1872                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1873                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1874                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1875                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1876                                          dst0, s->frame->linesize[0],
1877                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1878         } else {
1879             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1880                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1881         }
1882
1883         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1884                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1885         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1886                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1887
1888         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1889             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1890             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1891                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1892                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1893                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1894                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1895                                          dst1, s->frame->linesize[1], tmp, tmp3,
1896                                          tmpstride, nPbW / 2, nPbH / 2);
1897             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1898                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1899                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1900                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1901                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1902                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1903                                          tmpstride, nPbW / 2, nPbH / 2);
1904         } else {
1905             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1906             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1907         }
1908     }
1909 }
1910
1911 /**
1912  * 8.4.1
1913  */
1914 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1915                                 int prev_intra_luma_pred_flag)
1916 {
1917     HEVCLocalContext *lc = &s->HEVClc;
1918     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1919     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1920     int min_pu_width     = s->sps->min_pu_width;
1921     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1922     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1923     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1924
1925     int cand_up   = (lc->ctb_up_flag || y0b) ?
1926                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1927     int cand_left = (lc->ctb_left_flag || x0b) ?
1928                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1929
1930     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1931
1932     MvField *tab_mvf = s->ref->tab_mvf;
1933     int intra_pred_mode;
1934     int candidate[3];
1935     int i, j;
1936
1937     // intra_pred_mode prediction does not cross vertical CTB boundaries
1938     if ((y0 - 1) < y_ctb)
1939         cand_up = INTRA_DC;
1940
1941     if (cand_left == cand_up) {
1942         if (cand_left < 2) {
1943             candidate[0] = INTRA_PLANAR;
1944             candidate[1] = INTRA_DC;
1945             candidate[2] = INTRA_ANGULAR_26;
1946         } else {
1947             candidate[0] = cand_left;
1948             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1949             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1950         }
1951     } else {
1952         candidate[0] = cand_left;
1953         candidate[1] = cand_up;
1954         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1955             candidate[2] = INTRA_PLANAR;
1956         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1957             candidate[2] = INTRA_DC;
1958         } else {
1959             candidate[2] = INTRA_ANGULAR_26;
1960         }
1961     }
1962
1963     if (prev_intra_luma_pred_flag) {
1964         intra_pred_mode = candidate[lc->pu.mpm_idx];
1965     } else {
1966         if (candidate[0] > candidate[1])
1967             FFSWAP(uint8_t, candidate[0], candidate[1]);
1968         if (candidate[0] > candidate[2])
1969             FFSWAP(uint8_t, candidate[0], candidate[2]);
1970         if (candidate[1] > candidate[2])
1971             FFSWAP(uint8_t, candidate[1], candidate[2]);
1972
1973         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1974         for (i = 0; i < 3; i++)
1975             if (intra_pred_mode >= candidate[i])
1976                 intra_pred_mode++;
1977     }
1978
1979     /* write the intra prediction units into the mv array */
1980     if (!size_in_pus)
1981         size_in_pus = 1;
1982     for (i = 0; i < size_in_pus; i++) {
1983         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1984                intra_pred_mode, size_in_pus);
1985
1986         for (j = 0; j < size_in_pus; j++) {
1987             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1991             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1992             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1993             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1994             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1995             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1996         }
1997     }
1998
1999     return intra_pred_mode;
2000 }
2001
2002 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2003                                           int log2_cb_size, int ct_depth)
2004 {
2005     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2006     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2007     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2008     int y;
2009
2010     for (y = 0; y < length; y++)
2011         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2012                ct_depth, length);
2013 }
2014
2015 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2016                                   int log2_cb_size)
2017 {
2018     HEVCLocalContext *lc = &s->HEVClc;
2019     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2020     uint8_t prev_intra_luma_pred_flag[4];
2021     int split   = lc->cu.part_mode == PART_NxN;
2022     int pb_size = (1 << log2_cb_size) >> split;
2023     int side    = split + 1;
2024     int chroma_mode;
2025     int i, j;
2026
2027     for (i = 0; i < side; i++)
2028         for (j = 0; j < side; j++)
2029             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2030
2031     for (i = 0; i < side; i++) {
2032         for (j = 0; j < side; j++) {
2033             if (prev_intra_luma_pred_flag[2 * i + j])
2034                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2035             else
2036                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2037
2038             lc->pu.intra_pred_mode[2 * i + j] =
2039                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2040                                      prev_intra_luma_pred_flag[2 * i + j]);
2041         }
2042     }
2043
2044     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2045     if (chroma_mode != 4) {
2046         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2047             lc->pu.intra_pred_mode_c = 34;
2048         else
2049             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2050     } else {
2051         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2052     }
2053 }
2054
2055 static void intra_prediction_unit_default_value(HEVCContext *s,
2056                                                 int x0, int y0,
2057                                                 int log2_cb_size)
2058 {
2059     HEVCLocalContext *lc = &s->HEVClc;
2060     int pb_size          = 1 << log2_cb_size;
2061     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2062     int min_pu_width     = s->sps->min_pu_width;
2063     MvField *tab_mvf     = s->ref->tab_mvf;
2064     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2065     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2066     int j, k;
2067
2068     if (size_in_pus == 0)
2069         size_in_pus = 1;
2070     for (j = 0; j < size_in_pus; j++) {
2071         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2072         for (k = 0; k < size_in_pus; k++)
2073             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2074     }
2075 }
2076
2077 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2078 {
2079     int cb_size          = 1 << log2_cb_size;
2080     HEVCLocalContext *lc = &s->HEVClc;
2081     int log2_min_cb_size = s->sps->log2_min_cb_size;
2082     int length           = cb_size >> log2_min_cb_size;
2083     int min_cb_width     = s->sps->min_cb_width;
2084     int x_cb             = x0 >> log2_min_cb_size;
2085     int y_cb             = y0 >> log2_min_cb_size;
2086     int x, y, ret;
2087
2088     lc->cu.x                = x0;
2089     lc->cu.y                = y0;
2090     lc->cu.rqt_root_cbf     = 1;
2091     lc->cu.pred_mode        = MODE_INTRA;
2092     lc->cu.part_mode        = PART_2Nx2N;
2093     lc->cu.intra_split_flag = 0;
2094     lc->cu.pcm_flag         = 0;
2095
2096     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2097     for (x = 0; x < 4; x++)
2098         lc->pu.intra_pred_mode[x] = 1;
2099     if (s->pps->transquant_bypass_enable_flag) {
2100         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2101         if (lc->cu.cu_transquant_bypass_flag)
2102             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2103     } else
2104         lc->cu.cu_transquant_bypass_flag = 0;
2105
2106     if (s->sh.slice_type != I_SLICE) {
2107         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2108
2109         lc->cu.pred_mode = MODE_SKIP;
2110         x = y_cb * min_cb_width + x_cb;
2111         for (y = 0; y < length; y++) {
2112             memset(&s->skip_flag[x], skip_flag, length);
2113             x += min_cb_width;
2114         }
2115         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2116     }
2117
2118     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2119         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2120         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2121
2122         if (!s->sh.disable_deblocking_filter_flag)
2123             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2124                                                   lc->slice_or_tiles_up_boundary,
2125                                                   lc->slice_or_tiles_left_boundary);
2126     } else {
2127         if (s->sh.slice_type != I_SLICE)
2128             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2129         if (lc->cu.pred_mode != MODE_INTRA ||
2130             log2_cb_size == s->sps->log2_min_cb_size) {
2131             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2132             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2133                                       lc->cu.pred_mode == MODE_INTRA;
2134         }
2135
2136         if (lc->cu.pred_mode == MODE_INTRA) {
2137             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2138                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2139                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2140                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2141             }
2142             if (lc->cu.pcm_flag) {
2143                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2144                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2145                 if (s->sps->pcm.loop_filter_disable_flag)
2146                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2147
2148                 if (ret < 0)
2149                     return ret;
2150             } else {
2151                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2152             }
2153         } else {
2154             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2155             switch (lc->cu.part_mode) {
2156             case PART_2Nx2N:
2157                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2158                 break;
2159             case PART_2NxN:
2160                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2161                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2162                 break;
2163             case PART_Nx2N:
2164                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2165                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2166                 break;
2167             case PART_2NxnU:
2168                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2169                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2170                 break;
2171             case PART_2NxnD:
2172                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2173                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2174                 break;
2175             case PART_nLx2N:
2176                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2177                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2178                 break;
2179             case PART_nRx2N:
2180                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2181                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2182                 break;
2183             case PART_NxN:
2184                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2185                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2186                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2187                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2188                 break;
2189             }
2190         }
2191
2192         if (!lc->cu.pcm_flag) {
2193             if (lc->cu.pred_mode != MODE_INTRA &&
2194                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2195                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2196             }
2197             if (lc->cu.rqt_root_cbf) {
2198                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2199                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2200                                          s->sps->max_transform_hierarchy_depth_inter;
2201                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2202                                          log2_cb_size,
2203                                          log2_cb_size, 0, 0);
2204                 if (ret < 0)
2205                     return ret;
2206             } else {
2207                 if (!s->sh.disable_deblocking_filter_flag)
2208                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2209                                                           lc->slice_or_tiles_up_boundary,
2210                                                           lc->slice_or_tiles_left_boundary);
2211             }
2212         }
2213     }
2214
2215     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2216         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2217
2218     x = y_cb * min_cb_width + x_cb;
2219     for (y = 0; y < length; y++) {
2220         memset(&s->qp_y_tab[x], lc->qp_y, length);
2221         x += min_cb_width;
2222     }
2223
2224     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2225
2226     return 0;
2227 }
2228
2229 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2230                                int log2_cb_size, int cb_depth)
2231 {
2232     HEVCLocalContext *lc = &s->HEVClc;
2233     const int cb_size    = 1 << log2_cb_size;
2234
2235     lc->ct.depth = cb_depth;
2236     if (x0 + cb_size <= s->sps->width  &&
2237         y0 + cb_size <= s->sps->height &&
2238         log2_cb_size > s->sps->log2_min_cb_size) {
2239         SAMPLE(s->split_cu_flag, x0, y0) =
2240             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2241     } else {
2242         SAMPLE(s->split_cu_flag, x0, y0) =
2243             (log2_cb_size > s->sps->log2_min_cb_size);
2244     }
2245     if (s->pps->cu_qp_delta_enabled_flag &&
2246         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2247         lc->tu.is_cu_qp_delta_coded = 0;
2248         lc->tu.cu_qp_delta          = 0;
2249     }
2250
2251     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2252         const int cb_size_split = cb_size >> 1;
2253         const int x1 = x0 + cb_size_split;
2254         const int y1 = y0 + cb_size_split;
2255
2256         log2_cb_size--;
2257         cb_depth++;
2258
2259 #define SUBDIVIDE(x, y)                                                \
2260 do {                                                                   \
2261     if (x < s->sps->width && y < s->sps->height) {                     \
2262         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2263         if (ret < 0)                                                   \
2264             return ret;                                                \
2265     }                                                                  \
2266 } while (0)
2267
2268         SUBDIVIDE(x0, y0);
2269         SUBDIVIDE(x1, y0);
2270         SUBDIVIDE(x0, y1);
2271         SUBDIVIDE(x1, y1);
2272     } else {
2273         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2274         if (ret < 0)
2275             return ret;
2276     }
2277
2278     return 0;
2279 }
2280
2281 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2282                                  int ctb_addr_ts)
2283 {
2284     HEVCLocalContext *lc  = &s->HEVClc;
2285     int ctb_size          = 1 << s->sps->log2_ctb_size;
2286     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2287     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2288
2289     int tile_left_boundary, tile_up_boundary;
2290     int slice_left_boundary, slice_up_boundary;
2291
2292     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2293
2294     if (s->pps->entropy_coding_sync_enabled_flag) {
2295         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2296             lc->first_qp_group = 1;
2297         lc->end_of_tiles_x = s->sps->width;
2298     } else if (s->pps->tiles_enabled_flag) {
2299         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2300             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2301             lc->start_of_tiles_x = x_ctb;
2302             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2303             lc->first_qp_group   = 1;
2304         }
2305     } else {
2306         lc->end_of_tiles_x = s->sps->width;
2307     }
2308
2309     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2310
2311     if (s->pps->tiles_enabled_flag) {
2312         tile_left_boundary  = x_ctb > 0 &&
2313                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2314         slice_left_boundary = x_ctb > 0 &&
2315                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2316         tile_up_boundary  = y_ctb > 0 &&
2317                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2318         slice_up_boundary = y_ctb > 0 &&
2319                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2320     } else {
2321         tile_left_boundary  =
2322         tile_up_boundary    = 1;
2323         slice_left_boundary = ctb_addr_in_slice > 0;
2324         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2325     }
2326     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2327     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2328     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2329     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2330     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2331     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2332 }
2333
2334 static int hls_slice_data(HEVCContext *s)
2335 {
2336     int ctb_size    = 1 << s->sps->log2_ctb_size;
2337     int more_data   = 1;
2338     int x_ctb       = 0;
2339     int y_ctb       = 0;
2340     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2341     int ret;
2342
2343     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2344         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2345
2346         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2347         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2348         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2349
2350         ff_hevc_cabac_init(s, ctb_addr_ts);
2351
2352         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2353
2354         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2355         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2356         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2357
2358         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2359         if (ret < 0)
2360             return ret;
2361         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2362
2363         ctb_addr_ts++;
2364         ff_hevc_save_states(s, ctb_addr_ts);
2365         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2366     }
2367
2368     if (x_ctb + ctb_size >= s->sps->width &&
2369         y_ctb + ctb_size >= s->sps->height)
2370         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2371
2372     return ctb_addr_ts;
2373 }
2374
2375 /**
2376  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2377  * 0 if the unit should be skipped, 1 otherwise
2378  */
2379 static int hls_nal_unit(HEVCContext *s)
2380 {
2381     GetBitContext *gb = &s->HEVClc.gb;
2382     int nuh_layer_id;
2383
2384     if (get_bits1(gb) != 0)
2385         return AVERROR_INVALIDDATA;
2386
2387     s->nal_unit_type = get_bits(gb, 6);
2388
2389     nuh_layer_id   = get_bits(gb, 6);
2390     s->temporal_id = get_bits(gb, 3) - 1;
2391     if (s->temporal_id < 0)
2392         return AVERROR_INVALIDDATA;
2393
2394     av_log(s->avctx, AV_LOG_DEBUG,
2395            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2396            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2397
2398     return nuh_layer_id == 0;
2399 }
2400
2401 static void restore_tqb_pixels(HEVCContext *s)
2402 {
2403     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2404     int x, y, c_idx;
2405
2406     for (c_idx = 0; c_idx < 3; c_idx++) {
2407         ptrdiff_t stride = s->frame->linesize[c_idx];
2408         int hshift       = s->sps->hshift[c_idx];
2409         int vshift       = s->sps->vshift[c_idx];
2410         for (y = 0; y < s->sps->min_pu_height; y++) {
2411             for (x = 0; x < s->sps->min_pu_width; x++) {
2412                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2413                     int n;
2414                     int len      = min_pu_size >> hshift;
2415                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2416                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2417                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2418                         memcpy(dst, src, len);
2419                         src += stride;
2420                         dst += stride;
2421                     }
2422                 }
2423             }
2424         }
2425     }
2426 }
2427
2428 static int set_side_data(HEVCContext *s)
2429 {
2430     AVFrame *out = s->ref->frame;
2431
2432     if (s->sei_frame_packing_present &&
2433         s->frame_packing_arrangement_type >= 3 &&
2434         s->frame_packing_arrangement_type <= 5 &&
2435         s->content_interpretation_type > 0 &&
2436         s->content_interpretation_type < 3) {
2437         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2438         if (!stereo)
2439             return AVERROR(ENOMEM);
2440
2441         switch (s->frame_packing_arrangement_type) {
2442         case 3:
2443             if (s->quincunx_subsampling)
2444                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2445             else
2446                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2447             break;
2448         case 4:
2449             stereo->type = AV_STEREO3D_TOPBOTTOM;
2450             break;
2451         case 5:
2452             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2453             break;
2454         }
2455
2456         if (s->content_interpretation_type == 2)
2457             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2458     }
2459
2460     return 0;
2461 }
2462
2463 static int hevc_frame_start(HEVCContext *s)
2464 {
2465     HEVCLocalContext *lc = &s->HEVClc;
2466     int ret;
2467
2468     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2469     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2470     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2471     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2472
2473     lc->start_of_tiles_x = 0;
2474     s->is_decoded        = 0;
2475     s->first_nal_type    = s->nal_unit_type;
2476
2477     if (s->pps->tiles_enabled_flag)
2478         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2479
2480     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2481                               s->poc);
2482     if (ret < 0)
2483         goto fail;
2484
2485     ret = ff_hevc_frame_rps(s);
2486     if (ret < 0) {
2487         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2488         goto fail;
2489     }
2490
2491     ret = set_side_data(s);
2492     if (ret < 0)
2493         goto fail;
2494
2495     av_frame_unref(s->output_frame);
2496     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2497     if (ret < 0)
2498         goto fail;
2499
2500     ff_thread_finish_setup(s->avctx);
2501
2502     return 0;
2503
2504 fail:
2505     if (s->ref)
2506         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2507     s->ref = NULL;
2508     return ret;
2509 }
2510
2511 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2512 {
2513     HEVCLocalContext *lc = &s->HEVClc;
2514     GetBitContext *gb    = &lc->gb;
2515     int ctb_addr_ts, ret;
2516
2517     ret = init_get_bits8(gb, nal, length);
2518     if (ret < 0)
2519         return ret;
2520
2521     ret = hls_nal_unit(s);
2522     if (ret < 0) {
2523         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2524                s->nal_unit_type);
2525         goto fail;
2526     } else if (!ret)
2527         return 0;
2528
2529     switch (s->nal_unit_type) {
2530     case NAL_VPS:
2531         ret = ff_hevc_decode_nal_vps(s);
2532         if (ret < 0)
2533             goto fail;
2534         break;
2535     case NAL_SPS:
2536         ret = ff_hevc_decode_nal_sps(s);
2537         if (ret < 0)
2538             goto fail;
2539         break;
2540     case NAL_PPS:
2541         ret = ff_hevc_decode_nal_pps(s);
2542         if (ret < 0)
2543             goto fail;
2544         break;
2545     case NAL_SEI_PREFIX:
2546     case NAL_SEI_SUFFIX:
2547         ret = ff_hevc_decode_nal_sei(s);
2548         if (ret < 0)
2549             goto fail;
2550         break;
2551     case NAL_TRAIL_R:
2552     case NAL_TRAIL_N:
2553     case NAL_TSA_N:
2554     case NAL_TSA_R:
2555     case NAL_STSA_N:
2556     case NAL_STSA_R:
2557     case NAL_BLA_W_LP:
2558     case NAL_BLA_W_RADL:
2559     case NAL_BLA_N_LP:
2560     case NAL_IDR_W_RADL:
2561     case NAL_IDR_N_LP:
2562     case NAL_CRA_NUT:
2563     case NAL_RADL_N:
2564     case NAL_RADL_R:
2565     case NAL_RASL_N:
2566     case NAL_RASL_R:
2567         ret = hls_slice_header(s);
2568         if (ret < 0)
2569             return ret;
2570
2571         if (s->max_ra == INT_MAX) {
2572             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2573                 s->max_ra = s->poc;
2574             } else {
2575                 if (IS_IDR(s))
2576                     s->max_ra = INT_MIN;
2577             }
2578         }
2579
2580         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2581             s->poc <= s->max_ra) {
2582             s->is_decoded = 0;
2583             break;
2584         } else {
2585             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2586                 s->max_ra = INT_MIN;
2587         }
2588
2589         if (s->sh.first_slice_in_pic_flag) {
2590             ret = hevc_frame_start(s);
2591             if (ret < 0)
2592                 return ret;
2593         } else if (!s->ref) {
2594             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2595             goto fail;
2596         }
2597
2598         if (s->nal_unit_type != s->first_nal_type) {
2599             av_log(s->avctx, AV_LOG_ERROR,
2600                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2601                    s->first_nal_type, s->nal_unit_type);
2602             return AVERROR_INVALIDDATA;
2603         }
2604
2605         if (!s->sh.dependent_slice_segment_flag &&
2606             s->sh.slice_type != I_SLICE) {
2607             ret = ff_hevc_slice_rpl(s);
2608             if (ret < 0) {
2609                 av_log(s->avctx, AV_LOG_WARNING,
2610                        "Error constructing the reference lists for the current slice.\n");
2611                 goto fail;
2612             }
2613         }
2614
2615         ctb_addr_ts = hls_slice_data(s);
2616         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2617             s->is_decoded = 1;
2618             if ((s->pps->transquant_bypass_enable_flag ||
2619                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2620                 s->sps->sao_enabled)
2621                 restore_tqb_pixels(s);
2622         }
2623
2624         if (ctb_addr_ts < 0) {
2625             ret = ctb_addr_ts;
2626             goto fail;
2627         }
2628         break;
2629     case NAL_EOS_NUT:
2630     case NAL_EOB_NUT:
2631         s->seq_decode = (s->seq_decode + 1) & 0xff;
2632         s->max_ra     = INT_MAX;
2633         break;
2634     case NAL_AUD:
2635     case NAL_FD_NUT:
2636         break;
2637     default:
2638         av_log(s->avctx, AV_LOG_INFO,
2639                "Skipping NAL unit %d\n", s->nal_unit_type);
2640     }
2641
2642     return 0;
2643 fail:
2644     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2645         return ret;
2646     return 0;
2647 }
2648
2649 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2650  * between these functions would be nice. */
2651 static int extract_rbsp(const uint8_t *src, int length,
2652                         HEVCNAL *nal)
2653 {
2654     int i, si, di;
2655     uint8_t *dst;
2656
2657 #define STARTCODE_TEST                                                  \
2658         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2659             if (src[i + 2] != 3) {                                      \
2660                 /* startcode, so we must be past the end */             \
2661                 length = i;                                             \
2662             }                                                           \
2663             break;                                                      \
2664         }
2665 #if HAVE_FAST_UNALIGNED
2666 #define FIND_FIRST_ZERO                                                 \
2667         if (i > 0 && !src[i])                                           \
2668             i--;                                                        \
2669         while (src[i])                                                  \
2670             i++
2671 #if HAVE_FAST_64BIT
2672     for (i = 0; i + 1 < length; i += 9) {
2673         if (!((~AV_RN64A(src + i) &
2674                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2675               0x8000800080008080ULL))
2676             continue;
2677         FIND_FIRST_ZERO;
2678         STARTCODE_TEST;
2679         i -= 7;
2680     }
2681 #else
2682     for (i = 0; i + 1 < length; i += 5) {
2683         if (!((~AV_RN32A(src + i) &
2684                (AV_RN32A(src + i) - 0x01000101U)) &
2685               0x80008080U))
2686             continue;
2687         FIND_FIRST_ZERO;
2688         STARTCODE_TEST;
2689         i -= 3;
2690     }
2691 #endif /* HAVE_FAST_64BIT */
2692 #else
2693     for (i = 0; i + 1 < length; i += 2) {
2694         if (src[i])
2695             continue;
2696         if (i > 0 && src[i - 1] == 0)
2697             i--;
2698         STARTCODE_TEST;
2699     }
2700 #endif /* HAVE_FAST_UNALIGNED */
2701
2702     if (i >= length - 1) { // no escaped 0
2703         nal->data = src;
2704         nal->size = length;
2705         return length;
2706     }
2707
2708     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2709                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2710     if (!nal->rbsp_buffer)
2711         return AVERROR(ENOMEM);
2712
2713     dst = nal->rbsp_buffer;
2714
2715     memcpy(dst, src, i);
2716     si = di = i;
2717     while (si + 2 < length) {
2718         // remove escapes (very rare 1:2^22)
2719         if (src[si + 2] > 3) {
2720             dst[di++] = src[si++];
2721             dst[di++] = src[si++];
2722         } else if (src[si] == 0 && src[si + 1] == 0) {
2723             if (src[si + 2] == 3) { // escape
2724                 dst[di++] = 0;
2725                 dst[di++] = 0;
2726                 si       += 3;
2727
2728                 continue;
2729             } else // next start code
2730                 goto nsc;
2731         }
2732
2733         dst[di++] = src[si++];
2734     }
2735     while (si < length)
2736         dst[di++] = src[si++];
2737
2738 nsc:
2739     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2740
2741     nal->data = dst;
2742     nal->size = di;
2743     return si;
2744 }
2745
2746 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2747 {
2748     int i, consumed, ret = 0;
2749
2750     s->ref = NULL;
2751     s->eos = 0;
2752
2753     /* split the input packet into NAL units, so we know the upper bound on the
2754      * number of slices in the frame */
2755     s->nb_nals = 0;
2756     while (length >= 4) {
2757         HEVCNAL *nal;
2758         int extract_length = 0;
2759
2760         if (s->is_nalff) {
2761             int i;
2762             for (i = 0; i < s->nal_length_size; i++)
2763                 extract_length = (extract_length << 8) | buf[i];
2764             buf    += s->nal_length_size;
2765             length -= s->nal_length_size;
2766
2767             if (extract_length > length) {
2768                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2769                 ret = AVERROR_INVALIDDATA;
2770                 goto fail;
2771             }
2772         } else {
2773             if (buf[2] == 0) {
2774                 length--;
2775                 buf++;
2776                 continue;
2777             }
2778             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2779                 ret = AVERROR_INVALIDDATA;
2780                 goto fail;
2781             }
2782
2783             buf           += 3;
2784             length        -= 3;
2785             extract_length = length;
2786         }
2787
2788         if (s->nals_allocated < s->nb_nals + 1) {
2789             int new_size = s->nals_allocated + 1;
2790             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2791             if (!tmp) {
2792                 ret = AVERROR(ENOMEM);
2793                 goto fail;
2794             }
2795             s->nals = tmp;
2796             memset(s->nals + s->nals_allocated, 0,
2797                    (new_size - s->nals_allocated) * sizeof(*tmp));
2798             s->nals_allocated = new_size;
2799         }
2800         nal = &s->nals[s->nb_nals++];
2801
2802         consumed = extract_rbsp(buf, extract_length, nal);
2803         if (consumed < 0) {
2804             ret = consumed;
2805             goto fail;
2806         }
2807
2808         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2809         if (ret < 0)
2810             goto fail;
2811         hls_nal_unit(s);
2812
2813         if (s->nal_unit_type == NAL_EOB_NUT ||
2814             s->nal_unit_type == NAL_EOS_NUT)
2815             s->eos = 1;
2816
2817         buf    += consumed;
2818         length -= consumed;
2819     }
2820
2821     /* parse the NAL units */
2822     for (i = 0; i < s->nb_nals; i++) {
2823         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2824         if (ret < 0) {
2825             av_log(s->avctx, AV_LOG_WARNING,
2826                    "Error parsing NAL unit #%d.\n", i);
2827             goto fail;
2828         }
2829     }
2830
2831 fail:
2832     if (s->ref)
2833         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2834
2835     return ret;
2836 }
2837
2838 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2839 {
2840     int i;
2841     for (i = 0; i < 16; i++)
2842         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2843 }
2844
2845 static int verify_md5(HEVCContext *s, AVFrame *frame)
2846 {
2847     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2848     int pixel_shift;
2849     int i, j;
2850
2851     if (!desc)
2852         return AVERROR(EINVAL);
2853
2854     pixel_shift = desc->comp[0].depth_minus1 > 7;
2855
2856     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2857            s->poc);
2858
2859     /* the checksums are LE, so we have to byteswap for >8bpp formats
2860      * on BE arches */
2861 #if HAVE_BIGENDIAN
2862     if (pixel_shift && !s->checksum_buf) {
2863         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2864                        FFMAX3(frame->linesize[0], frame->linesize[1],
2865                               frame->linesize[2]));
2866         if (!s->checksum_buf)
2867             return AVERROR(ENOMEM);
2868     }
2869 #endif
2870
2871     for (i = 0; frame->data[i]; i++) {
2872         int width  = s->avctx->coded_width;
2873         int height = s->avctx->coded_height;
2874         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2875         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2876         uint8_t md5[16];
2877
2878         av_md5_init(s->md5_ctx);
2879         for (j = 0; j < h; j++) {
2880             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2881 #if HAVE_BIGENDIAN
2882             if (pixel_shift) {
2883                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2884                                    (const uint16_t*)src, w);
2885                 src = s->checksum_buf;
2886             }
2887 #endif
2888             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2889         }
2890         av_md5_final(s->md5_ctx, md5);
2891
2892         if (!memcmp(md5, s->md5[i], 16)) {
2893             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2894             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2895             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2896         } else {
2897             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2898             print_md5(s->avctx, AV_LOG_ERROR, md5);
2899             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2900             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2901             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2902             return AVERROR_INVALIDDATA;
2903         }
2904     }
2905
2906     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2907
2908     return 0;
2909 }
2910
2911 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2912                              AVPacket *avpkt)
2913 {
2914     int ret;
2915     HEVCContext *s = avctx->priv_data;
2916
2917     if (!avpkt->size) {
2918         ret = ff_hevc_output_frame(s, data, 1);
2919         if (ret < 0)
2920             return ret;
2921
2922         *got_output = ret;
2923         return 0;
2924     }
2925
2926     s->ref = NULL;
2927     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2928     if (ret < 0)
2929         return ret;
2930
2931     /* verify the SEI checksum */
2932     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2933         s->is_md5) {
2934         ret = verify_md5(s, s->ref->frame);
2935         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2936             ff_hevc_unref_frame(s, s->ref, ~0);
2937             return ret;
2938         }
2939     }
2940     s->is_md5 = 0;
2941
2942     if (s->is_decoded) {
2943         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2944         s->is_decoded = 0;
2945     }
2946
2947     if (s->output_frame->buf[0]) {
2948         av_frame_move_ref(data, s->output_frame);
2949         *got_output = 1;
2950     }
2951
2952     return avpkt->size;
2953 }
2954
2955 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2956 {
2957     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2958     if (ret < 0)
2959         return ret;
2960
2961     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2962     if (!dst->tab_mvf_buf)
2963         goto fail;
2964     dst->tab_mvf = src->tab_mvf;
2965
2966     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2967     if (!dst->rpl_tab_buf)
2968         goto fail;
2969     dst->rpl_tab = src->rpl_tab;
2970
2971     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2972     if (!dst->rpl_buf)
2973         goto fail;
2974
2975     dst->poc        = src->poc;
2976     dst->ctb_count  = src->ctb_count;
2977     dst->window     = src->window;
2978     dst->flags      = src->flags;
2979     dst->sequence   = src->sequence;
2980
2981     return 0;
2982 fail:
2983     ff_hevc_unref_frame(s, dst, ~0);
2984     return AVERROR(ENOMEM);
2985 }
2986
2987 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2988 {
2989     HEVCContext       *s = avctx->priv_data;
2990     int i;
2991
2992     pic_arrays_free(s);
2993
2994     av_freep(&s->md5_ctx);
2995
2996     av_frame_free(&s->tmp_frame);
2997     av_frame_free(&s->output_frame);
2998
2999     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3000         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3001         av_frame_free(&s->DPB[i].frame);
3002     }
3003
3004     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3005         av_buffer_unref(&s->vps_list[i]);
3006     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3007         av_buffer_unref(&s->sps_list[i]);
3008     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3009         av_buffer_unref(&s->pps_list[i]);
3010
3011     for (i = 0; i < s->nals_allocated; i++)
3012         av_freep(&s->nals[i].rbsp_buffer);
3013     av_freep(&s->nals);
3014     s->nals_allocated = 0;
3015
3016     return 0;
3017 }
3018
3019 static av_cold int hevc_init_context(AVCodecContext *avctx)
3020 {
3021     HEVCContext *s = avctx->priv_data;
3022     int i;
3023
3024     s->avctx = avctx;
3025
3026     s->tmp_frame = av_frame_alloc();
3027     if (!s->tmp_frame)
3028         goto fail;
3029
3030     s->output_frame = av_frame_alloc();
3031     if (!s->output_frame)
3032         goto fail;
3033
3034     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3035         s->DPB[i].frame = av_frame_alloc();
3036         if (!s->DPB[i].frame)
3037             goto fail;
3038         s->DPB[i].tf.f = s->DPB[i].frame;
3039     }
3040
3041     s->max_ra = INT_MAX;
3042
3043     s->md5_ctx = av_md5_alloc();
3044     if (!s->md5_ctx)
3045         goto fail;
3046
3047     ff_dsputil_init(&s->dsp, avctx);
3048
3049     s->context_initialized = 1;
3050
3051     return 0;
3052
3053 fail:
3054     hevc_decode_free(avctx);
3055     return AVERROR(ENOMEM);
3056 }
3057
3058 static int hevc_update_thread_context(AVCodecContext *dst,
3059                                       const AVCodecContext *src)
3060 {
3061     HEVCContext *s  = dst->priv_data;
3062     HEVCContext *s0 = src->priv_data;
3063     int i, ret;
3064
3065     if (!s->context_initialized) {
3066         ret = hevc_init_context(dst);
3067         if (ret < 0)
3068             return ret;
3069     }
3070
3071     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3072         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3073         if (s0->DPB[i].frame->buf[0]) {
3074             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3075             if (ret < 0)
3076                 return ret;
3077         }
3078     }
3079
3080     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3081         av_buffer_unref(&s->vps_list[i]);
3082         if (s0->vps_list[i]) {
3083             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3084             if (!s->vps_list[i])
3085                 return AVERROR(ENOMEM);
3086         }
3087     }
3088
3089     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3090         av_buffer_unref(&s->sps_list[i]);
3091         if (s0->sps_list[i]) {
3092             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3093             if (!s->sps_list[i])
3094                 return AVERROR(ENOMEM);
3095         }
3096     }
3097
3098     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3099         av_buffer_unref(&s->pps_list[i]);
3100         if (s0->pps_list[i]) {
3101             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3102             if (!s->pps_list[i])
3103                 return AVERROR(ENOMEM);
3104         }
3105     }
3106
3107     if (s->sps != s0->sps)
3108         ret = set_sps(s, s0->sps);
3109
3110     s->seq_decode = s0->seq_decode;
3111     s->seq_output = s0->seq_output;
3112     s->pocTid0    = s0->pocTid0;
3113     s->max_ra     = s0->max_ra;
3114
3115     s->is_nalff        = s0->is_nalff;
3116     s->nal_length_size = s0->nal_length_size;
3117
3118     if (s0->eos) {
3119         s->seq_decode = (s->seq_decode + 1) & 0xff;
3120         s->max_ra = INT_MAX;
3121     }
3122
3123     return 0;
3124 }
3125
3126 static int hevc_decode_extradata(HEVCContext *s)
3127 {
3128     AVCodecContext *avctx = s->avctx;
3129     GetByteContext gb;
3130     int ret;
3131
3132     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3133
3134     if (avctx->extradata_size > 3 &&
3135         (avctx->extradata[0] || avctx->extradata[1] ||
3136          avctx->extradata[2] > 1)) {
3137         /* It seems the extradata is encoded as hvcC format.
3138          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3139          * is finalized. When finalized, configurationVersion will be 1 and we
3140          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3141         int i, j, num_arrays, nal_len_size;
3142
3143         s->is_nalff = 1;
3144
3145         bytestream2_skip(&gb, 21);
3146         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3147         num_arrays   = bytestream2_get_byte(&gb);
3148
3149         /* nal units in the hvcC always have length coded with 2 bytes,
3150          * so put a fake nal_length_size = 2 while parsing them */
3151         s->nal_length_size = 2;
3152
3153         /* Decode nal units from hvcC. */
3154         for (i = 0; i < num_arrays; i++) {
3155             int type = bytestream2_get_byte(&gb) & 0x3f;
3156             int cnt  = bytestream2_get_be16(&gb);
3157
3158             for (j = 0; j < cnt; j++) {
3159                 // +2 for the nal size field
3160                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3161                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3162                     av_log(s->avctx, AV_LOG_ERROR,
3163                            "Invalid NAL unit size in extradata.\n");
3164                     return AVERROR_INVALIDDATA;
3165                 }
3166
3167                 ret = decode_nal_units(s, gb.buffer, nalsize);
3168                 if (ret < 0) {
3169                     av_log(avctx, AV_LOG_ERROR,
3170                            "Decoding nal unit %d %d from hvcC failed\n",
3171                            type, i);
3172                     return ret;
3173                 }
3174                 bytestream2_skip(&gb, nalsize);
3175             }
3176         }
3177
3178         /* Now store right nal length size, that will be used to parse
3179          * all other nals */
3180         s->nal_length_size = nal_len_size;
3181     } else {
3182         s->is_nalff = 0;
3183         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3184         if (ret < 0)
3185             return ret;
3186     }
3187     return 0;
3188 }
3189
3190 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3191 {
3192     HEVCContext *s = avctx->priv_data;
3193     int ret;
3194
3195     ff_init_cabac_states();
3196
3197     avctx->internal->allocate_progress = 1;
3198
3199     ret = hevc_init_context(avctx);
3200     if (ret < 0)
3201         return ret;
3202
3203     if (avctx->extradata_size > 0 && avctx->extradata) {
3204         ret = hevc_decode_extradata(s);
3205         if (ret < 0) {
3206             hevc_decode_free(avctx);
3207             return ret;
3208         }
3209     }
3210
3211     return 0;
3212 }
3213
3214 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3215 {
3216     HEVCContext *s = avctx->priv_data;
3217     int ret;
3218
3219     memset(s, 0, sizeof(*s));
3220
3221     ret = hevc_init_context(avctx);
3222     if (ret < 0)
3223         return ret;
3224
3225     return 0;
3226 }
3227
3228 static void hevc_decode_flush(AVCodecContext *avctx)
3229 {
3230     HEVCContext *s = avctx->priv_data;
3231     ff_hevc_flush_dpb(s);
3232     s->max_ra = INT_MAX;
3233 }
3234
3235 #define OFFSET(x) offsetof(HEVCContext, x)
3236 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3237
3238 static const AVProfile profiles[] = {
3239     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3240     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3241     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3242     { FF_PROFILE_UNKNOWN },
3243 };
3244
3245 static const AVOption options[] = {
3246     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3247         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3248     { NULL },
3249 };
3250
3251 static const AVClass hevc_decoder_class = {
3252     .class_name = "HEVC decoder",
3253     .item_name  = av_default_item_name,
3254     .option     = options,
3255     .version    = LIBAVUTIL_VERSION_INT,
3256 };
3257
3258 AVCodec ff_hevc_decoder = {
3259     .name                  = "hevc",
3260     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3261     .type                  = AVMEDIA_TYPE_VIDEO,
3262     .id                    = AV_CODEC_ID_HEVC,
3263     .priv_data_size        = sizeof(HEVCContext),
3264     .priv_class            = &hevc_decoder_class,
3265     .init                  = hevc_decode_init,
3266     .close                 = hevc_decode_free,
3267     .decode                = hevc_decode_frame,
3268     .flush                 = hevc_decode_flush,
3269     .update_thread_context = hevc_update_thread_context,
3270     .init_thread_copy      = hevc_init_thread_copy,
3271     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3272                              CODEC_CAP_FRAME_THREADS,
3273     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3274 };