]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: do not store the transform inter_split flag in the context
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
387 {
388     int ret;
389     unsigned int num = 0, den = 0;
390
391     pic_arrays_free(s);
392     ret = pic_arrays_init(s, sps);
393     if (ret < 0)
394         goto fail;
395
396     s->avctx->coded_width         = sps->width;
397     s->avctx->coded_height        = sps->height;
398     s->avctx->width               = sps->output_width;
399     s->avctx->height              = sps->output_height;
400     s->avctx->pix_fmt             = sps->pix_fmt;
401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
402
403     ff_set_sar(s->avctx, sps->vui.sar);
404
405     if (sps->vui.video_signal_type_present_flag)
406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
407                                                                : AVCOL_RANGE_MPEG;
408     else
409         s->avctx->color_range = AVCOL_RANGE_MPEG;
410
411     if (sps->vui.colour_description_present_flag) {
412         s->avctx->color_primaries = sps->vui.colour_primaries;
413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
415     } else {
416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
419     }
420
421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
424
425     if (sps->sao_enabled) {
426         av_frame_unref(s->tmp_frame);
427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
428         if (ret < 0)
429             goto fail;
430         s->frame = s->tmp_frame;
431     }
432
433     s->sps = sps;
434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
435
436     if (s->vps->vps_timing_info_present_flag) {
437         num = s->vps->vps_num_units_in_tick;
438         den = s->vps->vps_time_scale;
439     } else if (sps->vui.vui_timing_info_present_flag) {
440         num = sps->vui.vui_num_units_in_tick;
441         den = sps->vui.vui_time_scale;
442     }
443
444     if (num != 0 && den != 0)
445         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
446                   num, den, 1 << 30);
447
448     return 0;
449
450 fail:
451     pic_arrays_free(s);
452     s->sps = NULL;
453     return ret;
454 }
455
456 static int hls_slice_header(HEVCContext *s)
457 {
458     GetBitContext *gb = &s->HEVClc.gb;
459     SliceHeader *sh   = &s->sh;
460     int i, ret;
461
462     // Coded parameters
463     sh->first_slice_in_pic_flag = get_bits1(gb);
464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
465         s->seq_decode = (s->seq_decode + 1) & 0xff;
466         s->max_ra     = INT_MAX;
467         if (IS_IDR(s))
468             ff_hevc_clear_refs(s);
469     }
470     if (IS_IRAP(s))
471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
472
473     sh->pps_id = get_ue_golomb_long(gb);
474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
476         return AVERROR_INVALIDDATA;
477     }
478     if (!sh->first_slice_in_pic_flag &&
479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
481         return AVERROR_INVALIDDATA;
482     }
483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
484
485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
487
488         ff_hevc_clear_refs(s);
489         ret = set_sps(s, s->sps);
490         if (ret < 0)
491             return ret;
492
493         s->seq_decode = (s->seq_decode + 1) & 0xff;
494         s->max_ra     = INT_MAX;
495     }
496
497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
499
500     sh->dependent_slice_segment_flag = 0;
501     if (!sh->first_slice_in_pic_flag) {
502         int slice_address_length;
503
504         if (s->pps->dependent_slice_segments_enabled_flag)
505             sh->dependent_slice_segment_flag = get_bits1(gb);
506
507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
508                                             s->sps->ctb_height);
509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
511             av_log(s->avctx, AV_LOG_ERROR,
512                    "Invalid slice segment address: %u.\n",
513                    sh->slice_segment_addr);
514             return AVERROR_INVALIDDATA;
515         }
516
517         if (!sh->dependent_slice_segment_flag) {
518             sh->slice_addr = sh->slice_segment_addr;
519             s->slice_idx++;
520         }
521     } else {
522         sh->slice_segment_addr = sh->slice_addr = 0;
523         s->slice_idx           = 0;
524         s->slice_initialized   = 0;
525     }
526
527     if (!sh->dependent_slice_segment_flag) {
528         s->slice_initialized = 0;
529
530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
532
533         sh->slice_type = get_ue_golomb_long(gb);
534         if (!(sh->slice_type == I_SLICE ||
535               sh->slice_type == P_SLICE ||
536               sh->slice_type == B_SLICE)) {
537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
538                    sh->slice_type);
539             return AVERROR_INVALIDDATA;
540         }
541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
543             return AVERROR_INVALIDDATA;
544         }
545
546         // when flag is not present, picture is inferred to be output
547         sh->pic_output_flag = 1;
548         if (s->pps->output_flag_present_flag)
549             sh->pic_output_flag = get_bits1(gb);
550
551         if (s->sps->separate_colour_plane_flag)
552             sh->colour_plane_id = get_bits(gb, 2);
553
554         if (!IS_IDR(s)) {
555             int short_term_ref_pic_set_sps_flag, poc;
556
557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
560                 av_log(s->avctx, AV_LOG_WARNING,
561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
563                     return AVERROR_INVALIDDATA;
564                 poc = s->poc;
565             }
566             s->poc = poc;
567
568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
569             if (!short_term_ref_pic_set_sps_flag) {
570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
571                 if (ret < 0)
572                     return ret;
573
574                 sh->short_term_rps = &sh->slice_rps;
575             } else {
576                 int numbits, rps_idx;
577
578                 if (!s->sps->nb_st_rps) {
579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
580                     return AVERROR_INVALIDDATA;
581                 }
582
583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
586             }
587
588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
589             if (ret < 0) {
590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
592                     return AVERROR_INVALIDDATA;
593             }
594
595             if (s->sps->sps_temporal_mvp_enabled_flag)
596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
597             else
598                 sh->slice_temporal_mvp_enabled_flag = 0;
599         } else {
600             s->sh.short_term_rps = NULL;
601             s->poc               = 0;
602         }
603
604         /* 8.3.1 */
605         if (s->temporal_id == 0 &&
606             s->nal_unit_type != NAL_TRAIL_N &&
607             s->nal_unit_type != NAL_TSA_N   &&
608             s->nal_unit_type != NAL_STSA_N  &&
609             s->nal_unit_type != NAL_RADL_N  &&
610             s->nal_unit_type != NAL_RADL_R  &&
611             s->nal_unit_type != NAL_RASL_N  &&
612             s->nal_unit_type != NAL_RASL_R)
613             s->pocTid0 = s->poc;
614
615         if (s->sps->sao_enabled) {
616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
617             sh->slice_sample_adaptive_offset_flag[1] =
618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
619         } else {
620             sh->slice_sample_adaptive_offset_flag[0] = 0;
621             sh->slice_sample_adaptive_offset_flag[1] = 0;
622             sh->slice_sample_adaptive_offset_flag[2] = 0;
623         }
624
625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
627             int nb_refs;
628
629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
630             if (sh->slice_type == B_SLICE)
631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
632
633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
635                 if (sh->slice_type == B_SLICE)
636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
637             }
638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
640                        sh->nb_refs[L0], sh->nb_refs[L1]);
641                 return AVERROR_INVALIDDATA;
642             }
643
644             sh->rpl_modification_flag[0] = 0;
645             sh->rpl_modification_flag[1] = 0;
646             nb_refs = ff_hevc_frame_nb_refs(s);
647             if (!nb_refs) {
648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
649                 return AVERROR_INVALIDDATA;
650             }
651
652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
653                 sh->rpl_modification_flag[0] = get_bits1(gb);
654                 if (sh->rpl_modification_flag[0]) {
655                     for (i = 0; i < sh->nb_refs[L0]; i++)
656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658
659                 if (sh->slice_type == B_SLICE) {
660                     sh->rpl_modification_flag[1] = get_bits1(gb);
661                     if (sh->rpl_modification_flag[1] == 1)
662                         for (i = 0; i < sh->nb_refs[L1]; i++)
663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
664                 }
665             }
666
667             if (sh->slice_type == B_SLICE)
668                 sh->mvd_l1_zero_flag = get_bits1(gb);
669
670             if (s->pps->cabac_init_present_flag)
671                 sh->cabac_init_flag = get_bits1(gb);
672             else
673                 sh->cabac_init_flag = 0;
674
675             sh->collocated_ref_idx = 0;
676             if (sh->slice_temporal_mvp_enabled_flag) {
677                 sh->collocated_list = L0;
678                 if (sh->slice_type == B_SLICE)
679                     sh->collocated_list = !get_bits1(gb);
680
681                 if (sh->nb_refs[sh->collocated_list] > 1) {
682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
684                         av_log(s->avctx, AV_LOG_ERROR,
685                                "Invalid collocated_ref_idx: %d.\n",
686                                sh->collocated_ref_idx);
687                         return AVERROR_INVALIDDATA;
688                     }
689                 }
690             }
691
692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
694                 pred_weight_table(s, gb);
695             }
696
697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
699                 av_log(s->avctx, AV_LOG_ERROR,
700                        "Invalid number of merging MVP candidates: %d.\n",
701                        sh->max_num_merge_cand);
702                 return AVERROR_INVALIDDATA;
703             }
704         }
705
706         sh->slice_qp_delta = get_se_golomb(gb);
707
708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
709             sh->slice_cb_qp_offset = get_se_golomb(gb);
710             sh->slice_cr_qp_offset = get_se_golomb(gb);
711         } else {
712             sh->slice_cb_qp_offset = 0;
713             sh->slice_cr_qp_offset = 0;
714         }
715
716         if (s->pps->deblocking_filter_control_present_flag) {
717             int deblocking_filter_override_flag = 0;
718
719             if (s->pps->deblocking_filter_override_enabled_flag)
720                 deblocking_filter_override_flag = get_bits1(gb);
721
722             if (deblocking_filter_override_flag) {
723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
724                 if (!sh->disable_deblocking_filter_flag) {
725                     sh->beta_offset = get_se_golomb(gb) * 2;
726                     sh->tc_offset   = get_se_golomb(gb) * 2;
727                 }
728             } else {
729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
730                 sh->beta_offset                    = s->pps->beta_offset;
731                 sh->tc_offset                      = s->pps->tc_offset;
732             }
733         } else {
734             sh->disable_deblocking_filter_flag = 0;
735             sh->beta_offset                    = 0;
736             sh->tc_offset                      = 0;
737         }
738
739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
740             (sh->slice_sample_adaptive_offset_flag[0] ||
741              sh->slice_sample_adaptive_offset_flag[1] ||
742              !sh->disable_deblocking_filter_flag)) {
743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
744         } else {
745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
746         }
747     } else if (!s->slice_initialized) {
748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
749         return AVERROR_INVALIDDATA;
750     }
751
752     sh->num_entry_point_offsets = 0;
753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
755         if (sh->num_entry_point_offsets > 0) {
756             int offset_len = get_ue_golomb_long(gb) + 1;
757
758             for (i = 0; i < sh->num_entry_point_offsets; i++)
759                 skip_bits(gb, offset_len);
760         }
761     }
762
763     if (s->pps->slice_header_extension_present_flag) {
764         unsigned int length = get_ue_golomb_long(gb);
765         for (i = 0; i < length; i++)
766             skip_bits(gb, 8);  // slice_header_extension_data_byte
767     }
768
769     // Inferred parameters
770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771     if (sh->slice_qp > 51 ||
772         sh->slice_qp < -s->sps->qp_bd_offset) {
773         av_log(s->avctx, AV_LOG_ERROR,
774                "The slice_qp %d is outside the valid range "
775                "[%d, 51].\n",
776                sh->slice_qp,
777                -s->sps->qp_bd_offset);
778         return AVERROR_INVALIDDATA;
779     }
780
781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
782
783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785         return AVERROR_INVALIDDATA;
786     }
787
788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
789
790     if (!s->pps->cu_qp_delta_enabled_flag)
791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
793
794     s->slice_initialized = 1;
795
796     return 0;
797 }
798
799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
800
801 #define SET_SAO(elem, value)                            \
802 do {                                                    \
803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
804         sao->elem = value;                              \
805     else if (sao_merge_left_flag)                       \
806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
807     else if (sao_merge_up_flag)                         \
808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
809     else                                                \
810         sao->elem = 0;                                  \
811 } while (0)
812
813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
814 {
815     HEVCLocalContext *lc    = &s->HEVClc;
816     int sao_merge_left_flag = 0;
817     int sao_merge_up_flag   = 0;
818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
819     SAOParams *sao          = &CTB(s->sao, rx, ry);
820     int c_idx, i;
821
822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
823         s->sh.slice_sample_adaptive_offset_flag[1]) {
824         if (rx > 0) {
825             if (lc->ctb_left_flag)
826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
827         }
828         if (ry > 0 && !sao_merge_left_flag) {
829             if (lc->ctb_up_flag)
830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
831         }
832     }
833
834     for (c_idx = 0; c_idx < 3; c_idx++) {
835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
837             continue;
838         }
839
840         if (c_idx == 2) {
841             sao->type_idx[2] = sao->type_idx[1];
842             sao->eo_class[2] = sao->eo_class[1];
843         } else {
844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
845         }
846
847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
848             continue;
849
850         for (i = 0; i < 4; i++)
851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
852
853         if (sao->type_idx[c_idx] == SAO_BAND) {
854             for (i = 0; i < 4; i++) {
855                 if (sao->offset_abs[c_idx][i]) {
856                     SET_SAO(offset_sign[c_idx][i],
857                             ff_hevc_sao_offset_sign_decode(s));
858                 } else {
859                     sao->offset_sign[c_idx][i] = 0;
860                 }
861             }
862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
863         } else if (c_idx != 2) {
864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
865         }
866
867         // Inferred parameters
868         sao->offset_val[c_idx][0] = 0;
869         for (i = 0; i < 4; i++) {
870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
871             if (sao->type_idx[c_idx] == SAO_EDGE) {
872                 if (i > 1)
873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
874             } else if (sao->offset_sign[c_idx][i]) {
875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
876             }
877         }
878     }
879 }
880
881 #undef SET_SAO
882 #undef CTB
883
884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
885                                 int log2_trafo_size, enum ScanType scan_idx,
886                                 int c_idx)
887 {
888 #define GET_COORD(offset, n)                                    \
889     do {                                                        \
890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
892     } while (0)
893     HEVCLocalContext *lc    = &s->HEVClc;
894     int transform_skip_flag = 0;
895
896     int last_significant_coeff_x, last_significant_coeff_y;
897     int last_scan_pos;
898     int n_end;
899     int num_coeff    = 0;
900     int greater1_ctx = 1;
901
902     int num_last_subset;
903     int x_cg_last_sig, y_cg_last_sig;
904
905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
906
907     ptrdiff_t stride = s->frame->linesize[c_idx];
908     int hshift       = s->sps->hshift[c_idx];
909     int vshift       = s->sps->vshift[c_idx];
910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
914
915     int trafo_size = 1 << log2_trafo_size;
916     int i, qp, shift, add, scale, scale_m;
917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
918     const uint8_t *scale_matrix;
919     uint8_t dc_scale;
920
921     // Derive QP for dequant
922     if (!lc->cu.cu_transquant_bypass_flag) {
923         static const int qp_c[] = {
924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
925         };
926
927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
931         };
932
933         static const uint8_t div6[51 + 2 * 6 + 1] = {
934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
937         };
938         int qp_y = lc->qp_y;
939
940         if (c_idx == 0) {
941             qp = qp_y + s->sps->qp_bd_offset;
942         } else {
943             int qp_i, offset;
944
945             if (c_idx == 1)
946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
947             else
948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
949
950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
951             if (qp_i < 30)
952                 qp = qp_i;
953             else if (qp_i > 43)
954                 qp = qp_i - 6;
955             else
956                 qp = qp_c[qp_i - 30];
957
958             qp += s->sps->qp_bd_offset;
959         }
960
961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
962         add      = 1 << (shift - 1);
963         scale    = level_scale[rem6[qp]] << (div6[qp]);
964         scale_m  = 16; // default when no custom scaling lists.
965         dc_scale = 16;
966
967         if (s->sps->scaling_list_enable_flag) {
968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
969                                     &s->pps->scaling_list : &s->sps->scaling_list;
970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
971
972             if (log2_trafo_size != 5)
973                 matrix_id = 3 * matrix_id + c_idx;
974
975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
976             if (log2_trafo_size >= 4)
977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
978         }
979     }
980
981     if (s->pps->transform_skip_enabled_flag &&
982         !lc->cu.cu_transquant_bypass_flag   &&
983         log2_trafo_size == 2) {
984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
985     }
986
987     last_significant_coeff_x =
988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
989     last_significant_coeff_y =
990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
991
992     if (last_significant_coeff_x > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_x & 1)) +
996                                    suffix;
997     }
998
999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int trafo_depth, int blk_idx)
1237 {
1238     HEVCLocalContext *lc = &s->HEVClc;
1239
1240     if (lc->cu.pred_mode == MODE_INTRA) {
1241         int trafo_size = 1 << log2_trafo_size;
1242         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1243
1244         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1245         if (log2_trafo_size > 2) {
1246             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1247             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1248             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1250         } else if (blk_idx == 3) {
1251             trafo_size = trafo_size << s->sps->hshift[1];
1252             ff_hevc_set_neighbour_available(s, xBase, yBase,
1253                                             trafo_size, trafo_size);
1254             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1256         }
1257     }
1258
1259     if (lc->tt.cbf_luma ||
1260         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1261         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1262         int scan_idx   = SCAN_DIAG;
1263         int scan_idx_c = SCAN_DIAG;
1264
1265         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1266             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1267             if (lc->tu.cu_qp_delta != 0)
1268                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1269                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1270             lc->tu.is_cu_qp_delta_coded = 1;
1271
1272             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1273                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1274                 av_log(s->avctx, AV_LOG_ERROR,
1275                        "The cu_qp_delta %d is outside the valid range "
1276                        "[%d, %d].\n",
1277                        lc->tu.cu_qp_delta,
1278                        -(26 + s->sps->qp_bd_offset / 2),
1279                         (25 + s->sps->qp_bd_offset / 2));
1280                 return AVERROR_INVALIDDATA;
1281             }
1282
1283             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1284         }
1285
1286         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1287             if (lc->tu.cur_intra_pred_mode >= 6 &&
1288                 lc->tu.cur_intra_pred_mode <= 14) {
1289                 scan_idx = SCAN_VERT;
1290             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1291                        lc->tu.cur_intra_pred_mode <= 30) {
1292                 scan_idx = SCAN_HORIZ;
1293             }
1294
1295             if (lc->pu.intra_pred_mode_c >=  6 &&
1296                 lc->pu.intra_pred_mode_c <= 14) {
1297                 scan_idx_c = SCAN_VERT;
1298             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1299                        lc->pu.intra_pred_mode_c <= 30) {
1300                 scan_idx_c = SCAN_HORIZ;
1301             }
1302         }
1303
1304         if (lc->tt.cbf_luma)
1305             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1306         if (log2_trafo_size > 2) {
1307             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1308                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1309             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1310                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1311         } else if (blk_idx == 3) {
1312             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1313                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1314             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1315                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1316         }
1317     }
1318     return 0;
1319 }
1320
1321 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1322 {
1323     int cb_size          = 1 << log2_cb_size;
1324     int log2_min_pu_size = s->sps->log2_min_pu_size;
1325
1326     int min_pu_width     = s->sps->min_pu_width;
1327     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1328     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1329     int i, j;
1330
1331     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1332         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1333             s->is_pcm[i + j * min_pu_width] = 2;
1334 }
1335
1336 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1337                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1338                               int log2_cb_size, int log2_trafo_size,
1339                               int trafo_depth, int blk_idx)
1340 {
1341     HEVCLocalContext *lc = &s->HEVClc;
1342     uint8_t split_transform_flag;
1343     int ret;
1344
1345     if (trafo_depth > 0 && log2_trafo_size == 2) {
1346         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1347             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1348         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1349             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1350     } else {
1351         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1352         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1353     }
1354
1355     if (lc->cu.intra_split_flag) {
1356         if (trafo_depth == 1)
1357             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1358     } else {
1359         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1360     }
1361
1362     lc->tt.cbf_luma = 1;
1363
1364     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1365         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1366         trafo_depth     < lc->cu.max_trafo_depth       &&
1367         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1368         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1369     } else {
1370         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1371                           lc->cu.pred_mode == MODE_INTER &&
1372                           lc->cu.part_mode != PART_2Nx2N &&
1373                           trafo_depth == 0;
1374
1375         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1376                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1377                                inter_split;
1378     }
1379
1380     if (log2_trafo_size > 2) {
1381         if (trafo_depth == 0 ||
1382             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1383             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1384                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1385         }
1386
1387         if (trafo_depth == 0 ||
1388             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1389             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1390                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1391         }
1392     }
1393
1394     if (split_transform_flag) {
1395         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1396         const int x1 = x0 + trafo_size_split;
1397         const int y1 = y0 + trafo_size_split;
1398
1399 #define SUBDIVIDE(x, y, idx)                                                    \
1400 do {                                                                            \
1401     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1402                              log2_trafo_size - 1, trafo_depth + 1, idx);        \
1403     if (ret < 0)                                                                \
1404         return ret;                                                             \
1405 } while (0)
1406
1407         SUBDIVIDE(x0, y0, 0);
1408         SUBDIVIDE(x1, y0, 1);
1409         SUBDIVIDE(x0, y1, 2);
1410         SUBDIVIDE(x1, y1, 3);
1411
1412 #undef SUBDIVIDE
1413     } else {
1414         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1415         int log2_min_tu_size = s->sps->log2_min_tb_size;
1416         int min_tu_width     = s->sps->min_tb_width;
1417
1418         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1419             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1420             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1421             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1422         }
1423
1424         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1425                                  log2_cb_size, log2_trafo_size, trafo_depth,
1426                                  blk_idx);
1427         if (ret < 0)
1428             return ret;
1429         // TODO: store cbf_luma somewhere else
1430         if (lc->tt.cbf_luma) {
1431             int i, j;
1432             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1433                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1434                     int x_tu = (x0 + j) >> log2_min_tu_size;
1435                     int y_tu = (y0 + i) >> log2_min_tu_size;
1436                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1437                 }
1438         }
1439         if (!s->sh.disable_deblocking_filter_flag) {
1440             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1441                                                   lc->slice_or_tiles_up_boundary,
1442                                                   lc->slice_or_tiles_left_boundary);
1443             if (s->pps->transquant_bypass_enable_flag &&
1444                 lc->cu.cu_transquant_bypass_flag)
1445                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1446         }
1447     }
1448     return 0;
1449 }
1450
1451 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1452 {
1453     //TODO: non-4:2:0 support
1454     HEVCLocalContext *lc = &s->HEVClc;
1455     GetBitContext gb;
1456     int cb_size   = 1 << log2_cb_size;
1457     int stride0   = s->frame->linesize[0];
1458     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1459     int   stride1 = s->frame->linesize[1];
1460     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1461     int   stride2 = s->frame->linesize[2];
1462     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1463
1464     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1465     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1466     int ret;
1467
1468     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1469                                           lc->slice_or_tiles_up_boundary,
1470                                           lc->slice_or_tiles_left_boundary);
1471
1472     ret = init_get_bits(&gb, pcm, length);
1473     if (ret < 0)
1474         return ret;
1475
1476     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1477     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1478     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1479     return 0;
1480 }
1481
1482 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1483 {
1484     HEVCLocalContext *lc = &s->HEVClc;
1485     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1486     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1487
1488     if (x)
1489         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1490     if (y)
1491         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1492
1493     switch (x) {
1494     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1495     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1496     case 0: lc->pu.mvd.x = 0;                               break;
1497     }
1498
1499     switch (y) {
1500     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1501     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1502     case 0: lc->pu.mvd.y = 0;                               break;
1503     }
1504 }
1505
1506 /**
1507  * 8.5.3.2.2.1 Luma sample interpolation process
1508  *
1509  * @param s HEVC decoding context
1510  * @param dst target buffer for block data at block position
1511  * @param dststride stride of the dst buffer
1512  * @param ref reference picture buffer at origin (0, 0)
1513  * @param mv motion vector (relative to block position) to get pixel data from
1514  * @param x_off horizontal position of block from origin (0, 0)
1515  * @param y_off vertical position of block from origin (0, 0)
1516  * @param block_w width of block
1517  * @param block_h height of block
1518  */
1519 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1520                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1521                     int block_w, int block_h)
1522 {
1523     HEVCLocalContext *lc = &s->HEVClc;
1524     uint8_t *src         = ref->data[0];
1525     ptrdiff_t srcstride  = ref->linesize[0];
1526     int pic_width        = s->sps->width;
1527     int pic_height       = s->sps->height;
1528
1529     int mx         = mv->x & 3;
1530     int my         = mv->y & 3;
1531     int extra_left = ff_hevc_qpel_extra_before[mx];
1532     int extra_top  = ff_hevc_qpel_extra_before[my];
1533
1534     x_off += mv->x >> 2;
1535     y_off += mv->y >> 2;
1536     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1537
1538     if (x_off < extra_left || y_off < extra_top ||
1539         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1540         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1541         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1542         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1543         int buf_offset = extra_top *
1544                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1545
1546         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1547                                  edge_emu_stride, srcstride,
1548                                  block_w + ff_hevc_qpel_extra[mx],
1549                                  block_h + ff_hevc_qpel_extra[my],
1550                                  x_off - extra_left, y_off - extra_top,
1551                                  pic_width, pic_height);
1552         src = lc->edge_emu_buffer + buf_offset;
1553         srcstride = edge_emu_stride;
1554     }
1555     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1556                                      block_h, lc->mc_buffer);
1557 }
1558
1559 /**
1560  * 8.5.3.2.2.2 Chroma sample interpolation process
1561  *
1562  * @param s HEVC decoding context
1563  * @param dst1 target buffer for block data at block position (U plane)
1564  * @param dst2 target buffer for block data at block position (V plane)
1565  * @param dststride stride of the dst1 and dst2 buffers
1566  * @param ref reference picture buffer at origin (0, 0)
1567  * @param mv motion vector (relative to block position) to get pixel data from
1568  * @param x_off horizontal position of block from origin (0, 0)
1569  * @param y_off vertical position of block from origin (0, 0)
1570  * @param block_w width of block
1571  * @param block_h height of block
1572  */
1573 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1574                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1575                       int x_off, int y_off, int block_w, int block_h)
1576 {
1577     HEVCLocalContext *lc = &s->HEVClc;
1578     uint8_t *src1        = ref->data[1];
1579     uint8_t *src2        = ref->data[2];
1580     ptrdiff_t src1stride = ref->linesize[1];
1581     ptrdiff_t src2stride = ref->linesize[2];
1582     int pic_width        = s->sps->width >> 1;
1583     int pic_height       = s->sps->height >> 1;
1584
1585     int mx = mv->x & 7;
1586     int my = mv->y & 7;
1587
1588     x_off += mv->x >> 3;
1589     y_off += mv->y >> 3;
1590     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1591     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1592
1593     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1594         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1595         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1596         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1597         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1598         int buf_offset1 = EPEL_EXTRA_BEFORE *
1599                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1600         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1601         int buf_offset2 = EPEL_EXTRA_BEFORE *
1602                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1603
1604         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1605                                  edge_emu_stride, src1stride,
1606                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1607                                  x_off - EPEL_EXTRA_BEFORE,
1608                                  y_off - EPEL_EXTRA_BEFORE,
1609                                  pic_width, pic_height);
1610
1611         src1 = lc->edge_emu_buffer + buf_offset1;
1612         src1stride = edge_emu_stride;
1613         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1614                                              block_w, block_h, mx, my, lc->mc_buffer);
1615
1616         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1617                                  edge_emu_stride, src2stride,
1618                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1619                                  x_off - EPEL_EXTRA_BEFORE,
1620                                  y_off - EPEL_EXTRA_BEFORE,
1621                                  pic_width, pic_height);
1622         src2 = lc->edge_emu_buffer + buf_offset2;
1623         src2stride = edge_emu_stride;
1624
1625         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1626                                              block_w, block_h, mx, my,
1627                                              lc->mc_buffer);
1628     } else {
1629         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1630                                              block_w, block_h, mx, my,
1631                                              lc->mc_buffer);
1632         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1633                                              block_w, block_h, mx, my,
1634                                              lc->mc_buffer);
1635     }
1636 }
1637
1638 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1639                                 const Mv *mv, int y0, int height)
1640 {
1641     int y = (mv->y >> 2) + y0 + height + 9;
1642     ff_thread_await_progress(&ref->tf, y, 0);
1643 }
1644
1645 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1646                                 int nPbW, int nPbH,
1647                                 int log2_cb_size, int partIdx)
1648 {
1649 #define POS(c_idx, x, y)                                                              \
1650     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1651                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1652     HEVCLocalContext *lc = &s->HEVClc;
1653     int merge_idx = 0;
1654     struct MvField current_mv = {{{ 0 }}};
1655
1656     int min_pu_width = s->sps->min_pu_width;
1657
1658     MvField *tab_mvf = s->ref->tab_mvf;
1659     RefPicList  *refPicList = s->ref->refPicList;
1660     HEVCFrame *ref0, *ref1;
1661
1662     int tmpstride = MAX_PB_SIZE;
1663
1664     uint8_t *dst0 = POS(0, x0, y0);
1665     uint8_t *dst1 = POS(1, x0, y0);
1666     uint8_t *dst2 = POS(2, x0, y0);
1667     int log2_min_cb_size = s->sps->log2_min_cb_size;
1668     int min_cb_width     = s->sps->min_cb_width;
1669     int x_cb             = x0 >> log2_min_cb_size;
1670     int y_cb             = y0 >> log2_min_cb_size;
1671     int ref_idx[2];
1672     int mvp_flag[2];
1673     int x_pu, y_pu;
1674     int i, j;
1675
1676     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1677         if (s->sh.max_num_merge_cand > 1)
1678             merge_idx = ff_hevc_merge_idx_decode(s);
1679         else
1680             merge_idx = 0;
1681
1682         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1683                                    1 << log2_cb_size,
1684                                    1 << log2_cb_size,
1685                                    log2_cb_size, partIdx,
1686                                    merge_idx, &current_mv);
1687         x_pu = x0 >> s->sps->log2_min_pu_size;
1688         y_pu = y0 >> s->sps->log2_min_pu_size;
1689
1690         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1691             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1692                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1693     } else { /* MODE_INTER */
1694         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1695         if (lc->pu.merge_flag) {
1696             if (s->sh.max_num_merge_cand > 1)
1697                 merge_idx = ff_hevc_merge_idx_decode(s);
1698             else
1699                 merge_idx = 0;
1700
1701             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1702                                        partIdx, merge_idx, &current_mv);
1703             x_pu = x0 >> s->sps->log2_min_pu_size;
1704             y_pu = y0 >> s->sps->log2_min_pu_size;
1705
1706             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1707                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1708                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1709         } else {
1710             enum InterPredIdc inter_pred_idc = PRED_L0;
1711             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1712             if (s->sh.slice_type == B_SLICE)
1713                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1714
1715             if (inter_pred_idc != PRED_L1) {
1716                 if (s->sh.nb_refs[L0]) {
1717                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1718                     current_mv.ref_idx[0] = ref_idx[0];
1719                 }
1720                 current_mv.pred_flag[0] = 1;
1721                 hls_mvd_coding(s, x0, y0, 0);
1722                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1723                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1724                                          partIdx, merge_idx, &current_mv,
1725                                          mvp_flag[0], 0);
1726                 current_mv.mv[0].x += lc->pu.mvd.x;
1727                 current_mv.mv[0].y += lc->pu.mvd.y;
1728             }
1729
1730             if (inter_pred_idc != PRED_L0) {
1731                 if (s->sh.nb_refs[L1]) {
1732                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1733                     current_mv.ref_idx[1] = ref_idx[1];
1734                 }
1735
1736                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1737                     lc->pu.mvd.x = 0;
1738                     lc->pu.mvd.y = 0;
1739                 } else {
1740                     hls_mvd_coding(s, x0, y0, 1);
1741                 }
1742
1743                 current_mv.pred_flag[1] = 1;
1744                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1745                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1746                                          partIdx, merge_idx, &current_mv,
1747                                          mvp_flag[1], 1);
1748                 current_mv.mv[1].x += lc->pu.mvd.x;
1749                 current_mv.mv[1].y += lc->pu.mvd.y;
1750             }
1751
1752             x_pu = x0 >> s->sps->log2_min_pu_size;
1753             y_pu = y0 >> s->sps->log2_min_pu_size;
1754
1755             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1756                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1757                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1758         }
1759     }
1760
1761     if (current_mv.pred_flag[0]) {
1762         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1763         if (!ref0)
1764             return;
1765         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1766     }
1767     if (current_mv.pred_flag[1]) {
1768         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1769         if (!ref1)
1770             return;
1771         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1772     }
1773
1774     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1775         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1776         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1777
1778         luma_mc(s, tmp, tmpstride, ref0->frame,
1779                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1780
1781         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1782             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1783             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1784                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1785                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1786                                      dst0, s->frame->linesize[0], tmp,
1787                                      tmpstride, nPbW, nPbH);
1788         } else {
1789             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1790         }
1791         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1792                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1793
1794         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1795             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1796             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1797                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1798                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1799                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1800                                      nPbW / 2, nPbH / 2);
1801             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1802                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1803                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1804                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1805                                      nPbW / 2, nPbH / 2);
1806         } else {
1807             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1808             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1809         }
1810     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1811         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1812         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1813
1814         if (!ref1)
1815             return;
1816
1817         luma_mc(s, tmp, tmpstride, ref1->frame,
1818                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1819
1820         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1821             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1822             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1823                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1824                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1825                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1826                                       nPbW, nPbH);
1827         } else {
1828             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1829         }
1830
1831         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1832                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1833
1834         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1835             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1836             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1837                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1838                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1839                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1840             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1841                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1842                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1843                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1844         } else {
1845             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1846             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1847         }
1848     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1849         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1850         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1851         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1852         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1853         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1854         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1855
1856         if (!ref0 || !ref1)
1857             return;
1858
1859         luma_mc(s, tmp, tmpstride, ref0->frame,
1860                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1861         luma_mc(s, tmp2, tmpstride, ref1->frame,
1862                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1863
1864         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1865             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1866             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1867                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1868                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1869                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1870                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1871                                          dst0, s->frame->linesize[0],
1872                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1873         } else {
1874             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1875                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1876         }
1877
1878         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1879                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1880         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1881                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1882
1883         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1884             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1885             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1886                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1887                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1888                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1889                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1890                                          dst1, s->frame->linesize[1], tmp, tmp3,
1891                                          tmpstride, nPbW / 2, nPbH / 2);
1892             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1893                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1894                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1895                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1896                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1897                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1898                                          tmpstride, nPbW / 2, nPbH / 2);
1899         } else {
1900             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1901             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1902         }
1903     }
1904 }
1905
1906 /**
1907  * 8.4.1
1908  */
1909 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1910                                 int prev_intra_luma_pred_flag)
1911 {
1912     HEVCLocalContext *lc = &s->HEVClc;
1913     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1914     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1915     int min_pu_width     = s->sps->min_pu_width;
1916     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1917     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1918     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1919
1920     int cand_up   = (lc->ctb_up_flag || y0b) ?
1921                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1922     int cand_left = (lc->ctb_left_flag || x0b) ?
1923                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1924
1925     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1926
1927     MvField *tab_mvf = s->ref->tab_mvf;
1928     int intra_pred_mode;
1929     int candidate[3];
1930     int i, j;
1931
1932     // intra_pred_mode prediction does not cross vertical CTB boundaries
1933     if ((y0 - 1) < y_ctb)
1934         cand_up = INTRA_DC;
1935
1936     if (cand_left == cand_up) {
1937         if (cand_left < 2) {
1938             candidate[0] = INTRA_PLANAR;
1939             candidate[1] = INTRA_DC;
1940             candidate[2] = INTRA_ANGULAR_26;
1941         } else {
1942             candidate[0] = cand_left;
1943             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1944             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1945         }
1946     } else {
1947         candidate[0] = cand_left;
1948         candidate[1] = cand_up;
1949         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1950             candidate[2] = INTRA_PLANAR;
1951         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1952             candidate[2] = INTRA_DC;
1953         } else {
1954             candidate[2] = INTRA_ANGULAR_26;
1955         }
1956     }
1957
1958     if (prev_intra_luma_pred_flag) {
1959         intra_pred_mode = candidate[lc->pu.mpm_idx];
1960     } else {
1961         if (candidate[0] > candidate[1])
1962             FFSWAP(uint8_t, candidate[0], candidate[1]);
1963         if (candidate[0] > candidate[2])
1964             FFSWAP(uint8_t, candidate[0], candidate[2]);
1965         if (candidate[1] > candidate[2])
1966             FFSWAP(uint8_t, candidate[1], candidate[2]);
1967
1968         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1969         for (i = 0; i < 3; i++)
1970             if (intra_pred_mode >= candidate[i])
1971                 intra_pred_mode++;
1972     }
1973
1974     /* write the intra prediction units into the mv array */
1975     if (!size_in_pus)
1976         size_in_pus = 1;
1977     for (i = 0; i < size_in_pus; i++) {
1978         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1979                intra_pred_mode, size_in_pus);
1980
1981         for (j = 0; j < size_in_pus; j++) {
1982             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1983             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1984             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1985             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1986             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1987             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1988             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1989             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1991         }
1992     }
1993
1994     return intra_pred_mode;
1995 }
1996
1997 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1998                                           int log2_cb_size, int ct_depth)
1999 {
2000     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
2001     int x_cb   = x0 >> s->sps->log2_min_cb_size;
2002     int y_cb   = y0 >> s->sps->log2_min_cb_size;
2003     int y;
2004
2005     for (y = 0; y < length; y++)
2006         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
2007                ct_depth, length);
2008 }
2009
2010 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2011                                   int log2_cb_size)
2012 {
2013     HEVCLocalContext *lc = &s->HEVClc;
2014     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2015     uint8_t prev_intra_luma_pred_flag[4];
2016     int split   = lc->cu.part_mode == PART_NxN;
2017     int pb_size = (1 << log2_cb_size) >> split;
2018     int side    = split + 1;
2019     int chroma_mode;
2020     int i, j;
2021
2022     for (i = 0; i < side; i++)
2023         for (j = 0; j < side; j++)
2024             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2025
2026     for (i = 0; i < side; i++) {
2027         for (j = 0; j < side; j++) {
2028             if (prev_intra_luma_pred_flag[2 * i + j])
2029                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2030             else
2031                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2032
2033             lc->pu.intra_pred_mode[2 * i + j] =
2034                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2035                                      prev_intra_luma_pred_flag[2 * i + j]);
2036         }
2037     }
2038
2039     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2040     if (chroma_mode != 4) {
2041         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2042             lc->pu.intra_pred_mode_c = 34;
2043         else
2044             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2045     } else {
2046         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2047     }
2048 }
2049
2050 static void intra_prediction_unit_default_value(HEVCContext *s,
2051                                                 int x0, int y0,
2052                                                 int log2_cb_size)
2053 {
2054     HEVCLocalContext *lc = &s->HEVClc;
2055     int pb_size          = 1 << log2_cb_size;
2056     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2057     int min_pu_width     = s->sps->min_pu_width;
2058     MvField *tab_mvf     = s->ref->tab_mvf;
2059     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2060     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2061     int j, k;
2062
2063     if (size_in_pus == 0)
2064         size_in_pus = 1;
2065     for (j = 0; j < size_in_pus; j++) {
2066         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2067         for (k = 0; k < size_in_pus; k++)
2068             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2069     }
2070 }
2071
2072 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2073 {
2074     int cb_size          = 1 << log2_cb_size;
2075     HEVCLocalContext *lc = &s->HEVClc;
2076     int log2_min_cb_size = s->sps->log2_min_cb_size;
2077     int length           = cb_size >> log2_min_cb_size;
2078     int min_cb_width     = s->sps->min_cb_width;
2079     int x_cb             = x0 >> log2_min_cb_size;
2080     int y_cb             = y0 >> log2_min_cb_size;
2081     int x, y, ret;
2082
2083     lc->cu.x                = x0;
2084     lc->cu.y                = y0;
2085     lc->cu.rqt_root_cbf     = 1;
2086     lc->cu.pred_mode        = MODE_INTRA;
2087     lc->cu.part_mode        = PART_2Nx2N;
2088     lc->cu.intra_split_flag = 0;
2089     lc->cu.pcm_flag         = 0;
2090
2091     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2092     for (x = 0; x < 4; x++)
2093         lc->pu.intra_pred_mode[x] = 1;
2094     if (s->pps->transquant_bypass_enable_flag) {
2095         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2096         if (lc->cu.cu_transquant_bypass_flag)
2097             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2098     } else
2099         lc->cu.cu_transquant_bypass_flag = 0;
2100
2101     if (s->sh.slice_type != I_SLICE) {
2102         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2103
2104         lc->cu.pred_mode = MODE_SKIP;
2105         x = y_cb * min_cb_width + x_cb;
2106         for (y = 0; y < length; y++) {
2107             memset(&s->skip_flag[x], skip_flag, length);
2108             x += min_cb_width;
2109         }
2110         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2111     }
2112
2113     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2114         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2115         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2116
2117         if (!s->sh.disable_deblocking_filter_flag)
2118             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2119                                                   lc->slice_or_tiles_up_boundary,
2120                                                   lc->slice_or_tiles_left_boundary);
2121     } else {
2122         if (s->sh.slice_type != I_SLICE)
2123             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2124         if (lc->cu.pred_mode != MODE_INTRA ||
2125             log2_cb_size == s->sps->log2_min_cb_size) {
2126             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2127             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2128                                       lc->cu.pred_mode == MODE_INTRA;
2129         }
2130
2131         if (lc->cu.pred_mode == MODE_INTRA) {
2132             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2133                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2134                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2135                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2136             }
2137             if (lc->cu.pcm_flag) {
2138                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2139                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2140                 if (s->sps->pcm.loop_filter_disable_flag)
2141                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2142
2143                 if (ret < 0)
2144                     return ret;
2145             } else {
2146                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2147             }
2148         } else {
2149             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2150             switch (lc->cu.part_mode) {
2151             case PART_2Nx2N:
2152                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2153                 break;
2154             case PART_2NxN:
2155                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2156                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2157                 break;
2158             case PART_Nx2N:
2159                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2160                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2161                 break;
2162             case PART_2NxnU:
2163                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2164                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2165                 break;
2166             case PART_2NxnD:
2167                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2168                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2169                 break;
2170             case PART_nLx2N:
2171                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2172                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2173                 break;
2174             case PART_nRx2N:
2175                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2176                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2177                 break;
2178             case PART_NxN:
2179                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2180                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2181                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2182                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2183                 break;
2184             }
2185         }
2186
2187         if (!lc->cu.pcm_flag) {
2188             if (lc->cu.pred_mode != MODE_INTRA &&
2189                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2190                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2191             }
2192             if (lc->cu.rqt_root_cbf) {
2193                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2194                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2195                                          s->sps->max_transform_hierarchy_depth_inter;
2196                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2197                                          log2_cb_size,
2198                                          log2_cb_size, 0, 0);
2199                 if (ret < 0)
2200                     return ret;
2201             } else {
2202                 if (!s->sh.disable_deblocking_filter_flag)
2203                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2204                                                           lc->slice_or_tiles_up_boundary,
2205                                                           lc->slice_or_tiles_left_boundary);
2206             }
2207         }
2208     }
2209
2210     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2211         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2212
2213     x = y_cb * min_cb_width + x_cb;
2214     for (y = 0; y < length; y++) {
2215         memset(&s->qp_y_tab[x], lc->qp_y, length);
2216         x += min_cb_width;
2217     }
2218
2219     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2220
2221     return 0;
2222 }
2223
2224 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2225                                int log2_cb_size, int cb_depth)
2226 {
2227     HEVCLocalContext *lc = &s->HEVClc;
2228     const int cb_size    = 1 << log2_cb_size;
2229     int split_cu;
2230
2231     lc->ct.depth = cb_depth;
2232     if (x0 + cb_size <= s->sps->width  &&
2233         y0 + cb_size <= s->sps->height &&
2234         log2_cb_size > s->sps->log2_min_cb_size) {
2235         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2236     } else {
2237         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2238     }
2239     if (s->pps->cu_qp_delta_enabled_flag &&
2240         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2241         lc->tu.is_cu_qp_delta_coded = 0;
2242         lc->tu.cu_qp_delta          = 0;
2243     }
2244
2245     if (split_cu) {
2246         const int cb_size_split = cb_size >> 1;
2247         const int x1 = x0 + cb_size_split;
2248         const int y1 = y0 + cb_size_split;
2249
2250         log2_cb_size--;
2251         cb_depth++;
2252
2253 #define SUBDIVIDE(x, y)                                                \
2254 do {                                                                   \
2255     if (x < s->sps->width && y < s->sps->height) {                     \
2256         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2257         if (ret < 0)                                                   \
2258             return ret;                                                \
2259     }                                                                  \
2260 } while (0)
2261
2262         SUBDIVIDE(x0, y0);
2263         SUBDIVIDE(x1, y0);
2264         SUBDIVIDE(x0, y1);
2265         SUBDIVIDE(x1, y1);
2266     } else {
2267         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2268         if (ret < 0)
2269             return ret;
2270     }
2271
2272     return 0;
2273 }
2274
2275 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2276                                  int ctb_addr_ts)
2277 {
2278     HEVCLocalContext *lc  = &s->HEVClc;
2279     int ctb_size          = 1 << s->sps->log2_ctb_size;
2280     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2281     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2282
2283     int tile_left_boundary, tile_up_boundary;
2284     int slice_left_boundary, slice_up_boundary;
2285
2286     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2287
2288     if (s->pps->entropy_coding_sync_enabled_flag) {
2289         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2290             lc->first_qp_group = 1;
2291         lc->end_of_tiles_x = s->sps->width;
2292     } else if (s->pps->tiles_enabled_flag) {
2293         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2294             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2295             lc->start_of_tiles_x = x_ctb;
2296             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2297             lc->first_qp_group   = 1;
2298         }
2299     } else {
2300         lc->end_of_tiles_x = s->sps->width;
2301     }
2302
2303     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2304
2305     if (s->pps->tiles_enabled_flag) {
2306         tile_left_boundary  = x_ctb > 0 &&
2307                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2308         slice_left_boundary = x_ctb > 0 &&
2309                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2310         tile_up_boundary  = y_ctb > 0 &&
2311                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2312         slice_up_boundary = y_ctb > 0 &&
2313                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2314     } else {
2315         tile_left_boundary  =
2316         tile_up_boundary    = 1;
2317         slice_left_boundary = ctb_addr_in_slice > 0;
2318         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2319     }
2320     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2321     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2322     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2323     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2324     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2325     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2326 }
2327
2328 static int hls_slice_data(HEVCContext *s)
2329 {
2330     int ctb_size    = 1 << s->sps->log2_ctb_size;
2331     int more_data   = 1;
2332     int x_ctb       = 0;
2333     int y_ctb       = 0;
2334     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2335     int ret;
2336
2337     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2338         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2339
2340         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2341         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2342         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2343
2344         ff_hevc_cabac_init(s, ctb_addr_ts);
2345
2346         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2347
2348         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2349         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2350         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2351
2352         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2353         if (ret < 0)
2354             return ret;
2355         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2356
2357         ctb_addr_ts++;
2358         ff_hevc_save_states(s, ctb_addr_ts);
2359         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2360     }
2361
2362     if (x_ctb + ctb_size >= s->sps->width &&
2363         y_ctb + ctb_size >= s->sps->height)
2364         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2365
2366     return ctb_addr_ts;
2367 }
2368
2369 /**
2370  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2371  * 0 if the unit should be skipped, 1 otherwise
2372  */
2373 static int hls_nal_unit(HEVCContext *s)
2374 {
2375     GetBitContext *gb = &s->HEVClc.gb;
2376     int nuh_layer_id;
2377
2378     if (get_bits1(gb) != 0)
2379         return AVERROR_INVALIDDATA;
2380
2381     s->nal_unit_type = get_bits(gb, 6);
2382
2383     nuh_layer_id   = get_bits(gb, 6);
2384     s->temporal_id = get_bits(gb, 3) - 1;
2385     if (s->temporal_id < 0)
2386         return AVERROR_INVALIDDATA;
2387
2388     av_log(s->avctx, AV_LOG_DEBUG,
2389            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2390            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2391
2392     return nuh_layer_id == 0;
2393 }
2394
2395 static void restore_tqb_pixels(HEVCContext *s)
2396 {
2397     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2398     int x, y, c_idx;
2399
2400     for (c_idx = 0; c_idx < 3; c_idx++) {
2401         ptrdiff_t stride = s->frame->linesize[c_idx];
2402         int hshift       = s->sps->hshift[c_idx];
2403         int vshift       = s->sps->vshift[c_idx];
2404         for (y = 0; y < s->sps->min_pu_height; y++) {
2405             for (x = 0; x < s->sps->min_pu_width; x++) {
2406                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2407                     int n;
2408                     int len      = min_pu_size >> hshift;
2409                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2410                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2411                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2412                         memcpy(dst, src, len);
2413                         src += stride;
2414                         dst += stride;
2415                     }
2416                 }
2417             }
2418         }
2419     }
2420 }
2421
2422 static int set_side_data(HEVCContext *s)
2423 {
2424     AVFrame *out = s->ref->frame;
2425
2426     if (s->sei_frame_packing_present &&
2427         s->frame_packing_arrangement_type >= 3 &&
2428         s->frame_packing_arrangement_type <= 5 &&
2429         s->content_interpretation_type > 0 &&
2430         s->content_interpretation_type < 3) {
2431         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2432         if (!stereo)
2433             return AVERROR(ENOMEM);
2434
2435         switch (s->frame_packing_arrangement_type) {
2436         case 3:
2437             if (s->quincunx_subsampling)
2438                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2439             else
2440                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2441             break;
2442         case 4:
2443             stereo->type = AV_STEREO3D_TOPBOTTOM;
2444             break;
2445         case 5:
2446             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2447             break;
2448         }
2449
2450         if (s->content_interpretation_type == 2)
2451             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2452     }
2453
2454     if (s->sei_display_orientation_present &&
2455         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2456         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2457         AVFrameSideData *rotation = av_frame_new_side_data(out,
2458                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2459                                                            sizeof(int32_t) * 9);
2460         if (!rotation)
2461             return AVERROR(ENOMEM);
2462
2463         av_display_rotation_set((int32_t *)rotation->data, angle);
2464         av_display_matrix_flip((int32_t *)rotation->data,
2465                                s->sei_vflip, s->sei_hflip);
2466     }
2467
2468     return 0;
2469 }
2470
2471 static int hevc_frame_start(HEVCContext *s)
2472 {
2473     HEVCLocalContext *lc = &s->HEVClc;
2474     int ret;
2475
2476     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2477     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2478     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2479     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2480
2481     lc->start_of_tiles_x = 0;
2482     s->is_decoded        = 0;
2483     s->first_nal_type    = s->nal_unit_type;
2484
2485     if (s->pps->tiles_enabled_flag)
2486         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2487
2488     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2489                               s->poc);
2490     if (ret < 0)
2491         goto fail;
2492
2493     ret = ff_hevc_frame_rps(s);
2494     if (ret < 0) {
2495         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2496         goto fail;
2497     }
2498
2499     s->ref->frame->key_frame = IS_IRAP(s);
2500
2501     ret = set_side_data(s);
2502     if (ret < 0)
2503         goto fail;
2504
2505     av_frame_unref(s->output_frame);
2506     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2507     if (ret < 0)
2508         goto fail;
2509
2510     ff_thread_finish_setup(s->avctx);
2511
2512     return 0;
2513
2514 fail:
2515     if (s->ref)
2516         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2517     s->ref = NULL;
2518     return ret;
2519 }
2520
2521 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2522 {
2523     HEVCLocalContext *lc = &s->HEVClc;
2524     GetBitContext *gb    = &lc->gb;
2525     int ctb_addr_ts, ret;
2526
2527     ret = init_get_bits8(gb, nal, length);
2528     if (ret < 0)
2529         return ret;
2530
2531     ret = hls_nal_unit(s);
2532     if (ret < 0) {
2533         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2534                s->nal_unit_type);
2535         goto fail;
2536     } else if (!ret)
2537         return 0;
2538
2539     switch (s->nal_unit_type) {
2540     case NAL_VPS:
2541         ret = ff_hevc_decode_nal_vps(s);
2542         if (ret < 0)
2543             goto fail;
2544         break;
2545     case NAL_SPS:
2546         ret = ff_hevc_decode_nal_sps(s);
2547         if (ret < 0)
2548             goto fail;
2549         break;
2550     case NAL_PPS:
2551         ret = ff_hevc_decode_nal_pps(s);
2552         if (ret < 0)
2553             goto fail;
2554         break;
2555     case NAL_SEI_PREFIX:
2556     case NAL_SEI_SUFFIX:
2557         ret = ff_hevc_decode_nal_sei(s);
2558         if (ret < 0)
2559             goto fail;
2560         break;
2561     case NAL_TRAIL_R:
2562     case NAL_TRAIL_N:
2563     case NAL_TSA_N:
2564     case NAL_TSA_R:
2565     case NAL_STSA_N:
2566     case NAL_STSA_R:
2567     case NAL_BLA_W_LP:
2568     case NAL_BLA_W_RADL:
2569     case NAL_BLA_N_LP:
2570     case NAL_IDR_W_RADL:
2571     case NAL_IDR_N_LP:
2572     case NAL_CRA_NUT:
2573     case NAL_RADL_N:
2574     case NAL_RADL_R:
2575     case NAL_RASL_N:
2576     case NAL_RASL_R:
2577         ret = hls_slice_header(s);
2578         if (ret < 0)
2579             return ret;
2580
2581         if (s->max_ra == INT_MAX) {
2582             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2583                 s->max_ra = s->poc;
2584             } else {
2585                 if (IS_IDR(s))
2586                     s->max_ra = INT_MIN;
2587             }
2588         }
2589
2590         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2591             s->poc <= s->max_ra) {
2592             s->is_decoded = 0;
2593             break;
2594         } else {
2595             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2596                 s->max_ra = INT_MIN;
2597         }
2598
2599         if (s->sh.first_slice_in_pic_flag) {
2600             ret = hevc_frame_start(s);
2601             if (ret < 0)
2602                 return ret;
2603         } else if (!s->ref) {
2604             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2605             goto fail;
2606         }
2607
2608         if (s->nal_unit_type != s->first_nal_type) {
2609             av_log(s->avctx, AV_LOG_ERROR,
2610                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2611                    s->first_nal_type, s->nal_unit_type);
2612             return AVERROR_INVALIDDATA;
2613         }
2614
2615         if (!s->sh.dependent_slice_segment_flag &&
2616             s->sh.slice_type != I_SLICE) {
2617             ret = ff_hevc_slice_rpl(s);
2618             if (ret < 0) {
2619                 av_log(s->avctx, AV_LOG_WARNING,
2620                        "Error constructing the reference lists for the current slice.\n");
2621                 goto fail;
2622             }
2623         }
2624
2625         ctb_addr_ts = hls_slice_data(s);
2626         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2627             s->is_decoded = 1;
2628             if ((s->pps->transquant_bypass_enable_flag ||
2629                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2630                 s->sps->sao_enabled)
2631                 restore_tqb_pixels(s);
2632         }
2633
2634         if (ctb_addr_ts < 0) {
2635             ret = ctb_addr_ts;
2636             goto fail;
2637         }
2638         break;
2639     case NAL_EOS_NUT:
2640     case NAL_EOB_NUT:
2641         s->seq_decode = (s->seq_decode + 1) & 0xff;
2642         s->max_ra     = INT_MAX;
2643         break;
2644     case NAL_AUD:
2645     case NAL_FD_NUT:
2646         break;
2647     default:
2648         av_log(s->avctx, AV_LOG_INFO,
2649                "Skipping NAL unit %d\n", s->nal_unit_type);
2650     }
2651
2652     return 0;
2653 fail:
2654     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2655         return ret;
2656     return 0;
2657 }
2658
2659 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2660  * between these functions would be nice. */
2661 static int extract_rbsp(const uint8_t *src, int length,
2662                         HEVCNAL *nal)
2663 {
2664     int i, si, di;
2665     uint8_t *dst;
2666
2667 #define STARTCODE_TEST                                                  \
2668         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2669             if (src[i + 2] != 3) {                                      \
2670                 /* startcode, so we must be past the end */             \
2671                 length = i;                                             \
2672             }                                                           \
2673             break;                                                      \
2674         }
2675 #if HAVE_FAST_UNALIGNED
2676 #define FIND_FIRST_ZERO                                                 \
2677         if (i > 0 && !src[i])                                           \
2678             i--;                                                        \
2679         while (src[i])                                                  \
2680             i++
2681 #if HAVE_FAST_64BIT
2682     for (i = 0; i + 1 < length; i += 9) {
2683         if (!((~AV_RN64A(src + i) &
2684                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2685               0x8000800080008080ULL))
2686             continue;
2687         FIND_FIRST_ZERO;
2688         STARTCODE_TEST;
2689         i -= 7;
2690     }
2691 #else
2692     for (i = 0; i + 1 < length; i += 5) {
2693         if (!((~AV_RN32A(src + i) &
2694                (AV_RN32A(src + i) - 0x01000101U)) &
2695               0x80008080U))
2696             continue;
2697         FIND_FIRST_ZERO;
2698         STARTCODE_TEST;
2699         i -= 3;
2700     }
2701 #endif /* HAVE_FAST_64BIT */
2702 #else
2703     for (i = 0; i + 1 < length; i += 2) {
2704         if (src[i])
2705             continue;
2706         if (i > 0 && src[i - 1] == 0)
2707             i--;
2708         STARTCODE_TEST;
2709     }
2710 #endif /* HAVE_FAST_UNALIGNED */
2711
2712     if (i >= length - 1) { // no escaped 0
2713         nal->data = src;
2714         nal->size = length;
2715         return length;
2716     }
2717
2718     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2719                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2720     if (!nal->rbsp_buffer)
2721         return AVERROR(ENOMEM);
2722
2723     dst = nal->rbsp_buffer;
2724
2725     memcpy(dst, src, i);
2726     si = di = i;
2727     while (si + 2 < length) {
2728         // remove escapes (very rare 1:2^22)
2729         if (src[si + 2] > 3) {
2730             dst[di++] = src[si++];
2731             dst[di++] = src[si++];
2732         } else if (src[si] == 0 && src[si + 1] == 0) {
2733             if (src[si + 2] == 3) { // escape
2734                 dst[di++] = 0;
2735                 dst[di++] = 0;
2736                 si       += 3;
2737
2738                 continue;
2739             } else // next start code
2740                 goto nsc;
2741         }
2742
2743         dst[di++] = src[si++];
2744     }
2745     while (si < length)
2746         dst[di++] = src[si++];
2747
2748 nsc:
2749     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2750
2751     nal->data = dst;
2752     nal->size = di;
2753     return si;
2754 }
2755
2756 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2757 {
2758     int i, consumed, ret = 0;
2759
2760     s->ref = NULL;
2761     s->eos = 0;
2762
2763     /* split the input packet into NAL units, so we know the upper bound on the
2764      * number of slices in the frame */
2765     s->nb_nals = 0;
2766     while (length >= 4) {
2767         HEVCNAL *nal;
2768         int extract_length = 0;
2769
2770         if (s->is_nalff) {
2771             int i;
2772             for (i = 0; i < s->nal_length_size; i++)
2773                 extract_length = (extract_length << 8) | buf[i];
2774             buf    += s->nal_length_size;
2775             length -= s->nal_length_size;
2776
2777             if (extract_length > length) {
2778                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2779                 ret = AVERROR_INVALIDDATA;
2780                 goto fail;
2781             }
2782         } else {
2783             if (buf[2] == 0) {
2784                 length--;
2785                 buf++;
2786                 continue;
2787             }
2788             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2789                 ret = AVERROR_INVALIDDATA;
2790                 goto fail;
2791             }
2792
2793             buf           += 3;
2794             length        -= 3;
2795             extract_length = length;
2796         }
2797
2798         if (s->nals_allocated < s->nb_nals + 1) {
2799             int new_size = s->nals_allocated + 1;
2800             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2801             if (!tmp) {
2802                 ret = AVERROR(ENOMEM);
2803                 goto fail;
2804             }
2805             s->nals = tmp;
2806             memset(s->nals + s->nals_allocated, 0,
2807                    (new_size - s->nals_allocated) * sizeof(*tmp));
2808             s->nals_allocated = new_size;
2809         }
2810         nal = &s->nals[s->nb_nals++];
2811
2812         consumed = extract_rbsp(buf, extract_length, nal);
2813         if (consumed < 0) {
2814             ret = consumed;
2815             goto fail;
2816         }
2817
2818         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2819         if (ret < 0)
2820             goto fail;
2821         hls_nal_unit(s);
2822
2823         if (s->nal_unit_type == NAL_EOB_NUT ||
2824             s->nal_unit_type == NAL_EOS_NUT)
2825             s->eos = 1;
2826
2827         buf    += consumed;
2828         length -= consumed;
2829     }
2830
2831     /* parse the NAL units */
2832     for (i = 0; i < s->nb_nals; i++) {
2833         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2834         if (ret < 0) {
2835             av_log(s->avctx, AV_LOG_WARNING,
2836                    "Error parsing NAL unit #%d.\n", i);
2837             goto fail;
2838         }
2839     }
2840
2841 fail:
2842     if (s->ref)
2843         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2844
2845     return ret;
2846 }
2847
2848 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2849 {
2850     int i;
2851     for (i = 0; i < 16; i++)
2852         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2853 }
2854
2855 static int verify_md5(HEVCContext *s, AVFrame *frame)
2856 {
2857     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2858     int pixel_shift;
2859     int i, j;
2860
2861     if (!desc)
2862         return AVERROR(EINVAL);
2863
2864     pixel_shift = desc->comp[0].depth_minus1 > 7;
2865
2866     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2867            s->poc);
2868
2869     /* the checksums are LE, so we have to byteswap for >8bpp formats
2870      * on BE arches */
2871 #if HAVE_BIGENDIAN
2872     if (pixel_shift && !s->checksum_buf) {
2873         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2874                        FFMAX3(frame->linesize[0], frame->linesize[1],
2875                               frame->linesize[2]));
2876         if (!s->checksum_buf)
2877             return AVERROR(ENOMEM);
2878     }
2879 #endif
2880
2881     for (i = 0; frame->data[i]; i++) {
2882         int width  = s->avctx->coded_width;
2883         int height = s->avctx->coded_height;
2884         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2885         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2886         uint8_t md5[16];
2887
2888         av_md5_init(s->md5_ctx);
2889         for (j = 0; j < h; j++) {
2890             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2891 #if HAVE_BIGENDIAN
2892             if (pixel_shift) {
2893                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2894                                     (const uint16_t *) src, w);
2895                 src = s->checksum_buf;
2896             }
2897 #endif
2898             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2899         }
2900         av_md5_final(s->md5_ctx, md5);
2901
2902         if (!memcmp(md5, s->md5[i], 16)) {
2903             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2904             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2905             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2906         } else {
2907             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2908             print_md5(s->avctx, AV_LOG_ERROR, md5);
2909             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2910             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2911             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2912             return AVERROR_INVALIDDATA;
2913         }
2914     }
2915
2916     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2917
2918     return 0;
2919 }
2920
2921 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2922                              AVPacket *avpkt)
2923 {
2924     int ret;
2925     HEVCContext *s = avctx->priv_data;
2926
2927     if (!avpkt->size) {
2928         ret = ff_hevc_output_frame(s, data, 1);
2929         if (ret < 0)
2930             return ret;
2931
2932         *got_output = ret;
2933         return 0;
2934     }
2935
2936     s->ref = NULL;
2937     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2938     if (ret < 0)
2939         return ret;
2940
2941     /* verify the SEI checksum */
2942     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2943         s->is_md5) {
2944         ret = verify_md5(s, s->ref->frame);
2945         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2946             ff_hevc_unref_frame(s, s->ref, ~0);
2947             return ret;
2948         }
2949     }
2950     s->is_md5 = 0;
2951
2952     if (s->is_decoded) {
2953         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2954         s->is_decoded = 0;
2955     }
2956
2957     if (s->output_frame->buf[0]) {
2958         av_frame_move_ref(data, s->output_frame);
2959         *got_output = 1;
2960     }
2961
2962     return avpkt->size;
2963 }
2964
2965 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2966 {
2967     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2968     if (ret < 0)
2969         return ret;
2970
2971     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2972     if (!dst->tab_mvf_buf)
2973         goto fail;
2974     dst->tab_mvf = src->tab_mvf;
2975
2976     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2977     if (!dst->rpl_tab_buf)
2978         goto fail;
2979     dst->rpl_tab = src->rpl_tab;
2980
2981     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2982     if (!dst->rpl_buf)
2983         goto fail;
2984
2985     dst->poc        = src->poc;
2986     dst->ctb_count  = src->ctb_count;
2987     dst->window     = src->window;
2988     dst->flags      = src->flags;
2989     dst->sequence   = src->sequence;
2990
2991     return 0;
2992 fail:
2993     ff_hevc_unref_frame(s, dst, ~0);
2994     return AVERROR(ENOMEM);
2995 }
2996
2997 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2998 {
2999     HEVCContext       *s = avctx->priv_data;
3000     int i;
3001
3002     pic_arrays_free(s);
3003
3004     av_freep(&s->md5_ctx);
3005
3006     av_frame_free(&s->tmp_frame);
3007     av_frame_free(&s->output_frame);
3008
3009     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3010         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3011         av_frame_free(&s->DPB[i].frame);
3012     }
3013
3014     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3015         av_buffer_unref(&s->vps_list[i]);
3016     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3017         av_buffer_unref(&s->sps_list[i]);
3018     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3019         av_buffer_unref(&s->pps_list[i]);
3020
3021     for (i = 0; i < s->nals_allocated; i++)
3022         av_freep(&s->nals[i].rbsp_buffer);
3023     av_freep(&s->nals);
3024     s->nals_allocated = 0;
3025
3026     return 0;
3027 }
3028
3029 static av_cold int hevc_init_context(AVCodecContext *avctx)
3030 {
3031     HEVCContext *s = avctx->priv_data;
3032     int i;
3033
3034     s->avctx = avctx;
3035
3036     s->tmp_frame = av_frame_alloc();
3037     if (!s->tmp_frame)
3038         goto fail;
3039
3040     s->output_frame = av_frame_alloc();
3041     if (!s->output_frame)
3042         goto fail;
3043
3044     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3045         s->DPB[i].frame = av_frame_alloc();
3046         if (!s->DPB[i].frame)
3047             goto fail;
3048         s->DPB[i].tf.f = s->DPB[i].frame;
3049     }
3050
3051     s->max_ra = INT_MAX;
3052
3053     s->md5_ctx = av_md5_alloc();
3054     if (!s->md5_ctx)
3055         goto fail;
3056
3057     ff_bswapdsp_init(&s->bdsp);
3058
3059     s->context_initialized = 1;
3060
3061     return 0;
3062
3063 fail:
3064     hevc_decode_free(avctx);
3065     return AVERROR(ENOMEM);
3066 }
3067
3068 static int hevc_update_thread_context(AVCodecContext *dst,
3069                                       const AVCodecContext *src)
3070 {
3071     HEVCContext *s  = dst->priv_data;
3072     HEVCContext *s0 = src->priv_data;
3073     int i, ret;
3074
3075     if (!s->context_initialized) {
3076         ret = hevc_init_context(dst);
3077         if (ret < 0)
3078             return ret;
3079     }
3080
3081     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3082         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3083         if (s0->DPB[i].frame->buf[0]) {
3084             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3085             if (ret < 0)
3086                 return ret;
3087         }
3088     }
3089
3090     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3091         av_buffer_unref(&s->vps_list[i]);
3092         if (s0->vps_list[i]) {
3093             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3094             if (!s->vps_list[i])
3095                 return AVERROR(ENOMEM);
3096         }
3097     }
3098
3099     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3100         av_buffer_unref(&s->sps_list[i]);
3101         if (s0->sps_list[i]) {
3102             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3103             if (!s->sps_list[i])
3104                 return AVERROR(ENOMEM);
3105         }
3106     }
3107
3108     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3109         av_buffer_unref(&s->pps_list[i]);
3110         if (s0->pps_list[i]) {
3111             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3112             if (!s->pps_list[i])
3113                 return AVERROR(ENOMEM);
3114         }
3115     }
3116
3117     if (s->sps != s0->sps)
3118         ret = set_sps(s, s0->sps);
3119
3120     s->seq_decode = s0->seq_decode;
3121     s->seq_output = s0->seq_output;
3122     s->pocTid0    = s0->pocTid0;
3123     s->max_ra     = s0->max_ra;
3124
3125     s->is_nalff        = s0->is_nalff;
3126     s->nal_length_size = s0->nal_length_size;
3127
3128     if (s0->eos) {
3129         s->seq_decode = (s->seq_decode + 1) & 0xff;
3130         s->max_ra = INT_MAX;
3131     }
3132
3133     return 0;
3134 }
3135
3136 static int hevc_decode_extradata(HEVCContext *s)
3137 {
3138     AVCodecContext *avctx = s->avctx;
3139     GetByteContext gb;
3140     int ret;
3141
3142     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3143
3144     if (avctx->extradata_size > 3 &&
3145         (avctx->extradata[0] || avctx->extradata[1] ||
3146          avctx->extradata[2] > 1)) {
3147         /* It seems the extradata is encoded as hvcC format.
3148          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3149          * is finalized. When finalized, configurationVersion will be 1 and we
3150          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3151         int i, j, num_arrays, nal_len_size;
3152
3153         s->is_nalff = 1;
3154
3155         bytestream2_skip(&gb, 21);
3156         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3157         num_arrays   = bytestream2_get_byte(&gb);
3158
3159         /* nal units in the hvcC always have length coded with 2 bytes,
3160          * so put a fake nal_length_size = 2 while parsing them */
3161         s->nal_length_size = 2;
3162
3163         /* Decode nal units from hvcC. */
3164         for (i = 0; i < num_arrays; i++) {
3165             int type = bytestream2_get_byte(&gb) & 0x3f;
3166             int cnt  = bytestream2_get_be16(&gb);
3167
3168             for (j = 0; j < cnt; j++) {
3169                 // +2 for the nal size field
3170                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3171                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3172                     av_log(s->avctx, AV_LOG_ERROR,
3173                            "Invalid NAL unit size in extradata.\n");
3174                     return AVERROR_INVALIDDATA;
3175                 }
3176
3177                 ret = decode_nal_units(s, gb.buffer, nalsize);
3178                 if (ret < 0) {
3179                     av_log(avctx, AV_LOG_ERROR,
3180                            "Decoding nal unit %d %d from hvcC failed\n",
3181                            type, i);
3182                     return ret;
3183                 }
3184                 bytestream2_skip(&gb, nalsize);
3185             }
3186         }
3187
3188         /* Now store right nal length size, that will be used to parse
3189          * all other nals */
3190         s->nal_length_size = nal_len_size;
3191     } else {
3192         s->is_nalff = 0;
3193         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3194         if (ret < 0)
3195             return ret;
3196     }
3197     return 0;
3198 }
3199
3200 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3201 {
3202     HEVCContext *s = avctx->priv_data;
3203     int ret;
3204
3205     ff_init_cabac_states();
3206
3207     avctx->internal->allocate_progress = 1;
3208
3209     ret = hevc_init_context(avctx);
3210     if (ret < 0)
3211         return ret;
3212
3213     if (avctx->extradata_size > 0 && avctx->extradata) {
3214         ret = hevc_decode_extradata(s);
3215         if (ret < 0) {
3216             hevc_decode_free(avctx);
3217             return ret;
3218         }
3219     }
3220
3221     return 0;
3222 }
3223
3224 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3225 {
3226     HEVCContext *s = avctx->priv_data;
3227     int ret;
3228
3229     memset(s, 0, sizeof(*s));
3230
3231     ret = hevc_init_context(avctx);
3232     if (ret < 0)
3233         return ret;
3234
3235     return 0;
3236 }
3237
3238 static void hevc_decode_flush(AVCodecContext *avctx)
3239 {
3240     HEVCContext *s = avctx->priv_data;
3241     ff_hevc_flush_dpb(s);
3242     s->max_ra = INT_MAX;
3243 }
3244
3245 #define OFFSET(x) offsetof(HEVCContext, x)
3246 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3247
3248 static const AVProfile profiles[] = {
3249     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3250     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3251     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3252     { FF_PROFILE_UNKNOWN },
3253 };
3254
3255 static const AVOption options[] = {
3256     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3257         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3258     { NULL },
3259 };
3260
3261 static const AVClass hevc_decoder_class = {
3262     .class_name = "HEVC decoder",
3263     .item_name  = av_default_item_name,
3264     .option     = options,
3265     .version    = LIBAVUTIL_VERSION_INT,
3266 };
3267
3268 AVCodec ff_hevc_decoder = {
3269     .name                  = "hevc",
3270     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3271     .type                  = AVMEDIA_TYPE_VIDEO,
3272     .id                    = AV_CODEC_ID_HEVC,
3273     .priv_data_size        = sizeof(HEVCContext),
3274     .priv_class            = &hevc_decoder_class,
3275     .init                  = hevc_decode_init,
3276     .close                 = hevc_decode_free,
3277     .decode                = hevc_decode_frame,
3278     .flush                 = hevc_decode_flush,
3279     .update_thread_context = hevc_update_thread_context,
3280     .init_thread_copy      = hevc_init_thread_copy,
3281     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3282                              CODEC_CAP_FRAME_THREADS,
3283     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3284 };