]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: reduce code duplication in hls_prediction_unit()
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
387 {
388     int ret;
389     unsigned int num = 0, den = 0;
390
391     pic_arrays_free(s);
392     ret = pic_arrays_init(s, sps);
393     if (ret < 0)
394         goto fail;
395
396     s->avctx->coded_width         = sps->width;
397     s->avctx->coded_height        = sps->height;
398     s->avctx->width               = sps->output_width;
399     s->avctx->height              = sps->output_height;
400     s->avctx->pix_fmt             = sps->pix_fmt;
401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
402
403     ff_set_sar(s->avctx, sps->vui.sar);
404
405     if (sps->vui.video_signal_type_present_flag)
406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
407                                                                : AVCOL_RANGE_MPEG;
408     else
409         s->avctx->color_range = AVCOL_RANGE_MPEG;
410
411     if (sps->vui.colour_description_present_flag) {
412         s->avctx->color_primaries = sps->vui.colour_primaries;
413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
415     } else {
416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
419     }
420
421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
424
425     if (sps->sao_enabled) {
426         av_frame_unref(s->tmp_frame);
427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
428         if (ret < 0)
429             goto fail;
430         s->frame = s->tmp_frame;
431     }
432
433     s->sps = sps;
434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
435
436     if (s->vps->vps_timing_info_present_flag) {
437         num = s->vps->vps_num_units_in_tick;
438         den = s->vps->vps_time_scale;
439     } else if (sps->vui.vui_timing_info_present_flag) {
440         num = sps->vui.vui_num_units_in_tick;
441         den = sps->vui.vui_time_scale;
442     }
443
444     if (num != 0 && den != 0)
445         av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
446                   num, den, 1 << 30);
447
448     return 0;
449
450 fail:
451     pic_arrays_free(s);
452     s->sps = NULL;
453     return ret;
454 }
455
456 static int hls_slice_header(HEVCContext *s)
457 {
458     GetBitContext *gb = &s->HEVClc.gb;
459     SliceHeader *sh   = &s->sh;
460     int i, ret;
461
462     // Coded parameters
463     sh->first_slice_in_pic_flag = get_bits1(gb);
464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
465         s->seq_decode = (s->seq_decode + 1) & 0xff;
466         s->max_ra     = INT_MAX;
467         if (IS_IDR(s))
468             ff_hevc_clear_refs(s);
469     }
470     if (IS_IRAP(s))
471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
472
473     sh->pps_id = get_ue_golomb_long(gb);
474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
476         return AVERROR_INVALIDDATA;
477     }
478     if (!sh->first_slice_in_pic_flag &&
479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
481         return AVERROR_INVALIDDATA;
482     }
483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
484
485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
487
488         ff_hevc_clear_refs(s);
489         ret = set_sps(s, s->sps);
490         if (ret < 0)
491             return ret;
492
493         s->seq_decode = (s->seq_decode + 1) & 0xff;
494         s->max_ra     = INT_MAX;
495     }
496
497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
499
500     sh->dependent_slice_segment_flag = 0;
501     if (!sh->first_slice_in_pic_flag) {
502         int slice_address_length;
503
504         if (s->pps->dependent_slice_segments_enabled_flag)
505             sh->dependent_slice_segment_flag = get_bits1(gb);
506
507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
508                                             s->sps->ctb_height);
509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
511             av_log(s->avctx, AV_LOG_ERROR,
512                    "Invalid slice segment address: %u.\n",
513                    sh->slice_segment_addr);
514             return AVERROR_INVALIDDATA;
515         }
516
517         if (!sh->dependent_slice_segment_flag) {
518             sh->slice_addr = sh->slice_segment_addr;
519             s->slice_idx++;
520         }
521     } else {
522         sh->slice_segment_addr = sh->slice_addr = 0;
523         s->slice_idx           = 0;
524         s->slice_initialized   = 0;
525     }
526
527     if (!sh->dependent_slice_segment_flag) {
528         s->slice_initialized = 0;
529
530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
532
533         sh->slice_type = get_ue_golomb_long(gb);
534         if (!(sh->slice_type == I_SLICE ||
535               sh->slice_type == P_SLICE ||
536               sh->slice_type == B_SLICE)) {
537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
538                    sh->slice_type);
539             return AVERROR_INVALIDDATA;
540         }
541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
543             return AVERROR_INVALIDDATA;
544         }
545
546         // when flag is not present, picture is inferred to be output
547         sh->pic_output_flag = 1;
548         if (s->pps->output_flag_present_flag)
549             sh->pic_output_flag = get_bits1(gb);
550
551         if (s->sps->separate_colour_plane_flag)
552             sh->colour_plane_id = get_bits(gb, 2);
553
554         if (!IS_IDR(s)) {
555             int short_term_ref_pic_set_sps_flag, poc;
556
557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
560                 av_log(s->avctx, AV_LOG_WARNING,
561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
563                     return AVERROR_INVALIDDATA;
564                 poc = s->poc;
565             }
566             s->poc = poc;
567
568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
569             if (!short_term_ref_pic_set_sps_flag) {
570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
571                 if (ret < 0)
572                     return ret;
573
574                 sh->short_term_rps = &sh->slice_rps;
575             } else {
576                 int numbits, rps_idx;
577
578                 if (!s->sps->nb_st_rps) {
579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
580                     return AVERROR_INVALIDDATA;
581                 }
582
583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
586             }
587
588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
589             if (ret < 0) {
590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
592                     return AVERROR_INVALIDDATA;
593             }
594
595             if (s->sps->sps_temporal_mvp_enabled_flag)
596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
597             else
598                 sh->slice_temporal_mvp_enabled_flag = 0;
599         } else {
600             s->sh.short_term_rps = NULL;
601             s->poc               = 0;
602         }
603
604         /* 8.3.1 */
605         if (s->temporal_id == 0 &&
606             s->nal_unit_type != NAL_TRAIL_N &&
607             s->nal_unit_type != NAL_TSA_N   &&
608             s->nal_unit_type != NAL_STSA_N  &&
609             s->nal_unit_type != NAL_RADL_N  &&
610             s->nal_unit_type != NAL_RADL_R  &&
611             s->nal_unit_type != NAL_RASL_N  &&
612             s->nal_unit_type != NAL_RASL_R)
613             s->pocTid0 = s->poc;
614
615         if (s->sps->sao_enabled) {
616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
617             sh->slice_sample_adaptive_offset_flag[1] =
618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
619         } else {
620             sh->slice_sample_adaptive_offset_flag[0] = 0;
621             sh->slice_sample_adaptive_offset_flag[1] = 0;
622             sh->slice_sample_adaptive_offset_flag[2] = 0;
623         }
624
625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
627             int nb_refs;
628
629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
630             if (sh->slice_type == B_SLICE)
631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
632
633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
635                 if (sh->slice_type == B_SLICE)
636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
637             }
638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
640                        sh->nb_refs[L0], sh->nb_refs[L1]);
641                 return AVERROR_INVALIDDATA;
642             }
643
644             sh->rpl_modification_flag[0] = 0;
645             sh->rpl_modification_flag[1] = 0;
646             nb_refs = ff_hevc_frame_nb_refs(s);
647             if (!nb_refs) {
648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
649                 return AVERROR_INVALIDDATA;
650             }
651
652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
653                 sh->rpl_modification_flag[0] = get_bits1(gb);
654                 if (sh->rpl_modification_flag[0]) {
655                     for (i = 0; i < sh->nb_refs[L0]; i++)
656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658
659                 if (sh->slice_type == B_SLICE) {
660                     sh->rpl_modification_flag[1] = get_bits1(gb);
661                     if (sh->rpl_modification_flag[1] == 1)
662                         for (i = 0; i < sh->nb_refs[L1]; i++)
663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
664                 }
665             }
666
667             if (sh->slice_type == B_SLICE)
668                 sh->mvd_l1_zero_flag = get_bits1(gb);
669
670             if (s->pps->cabac_init_present_flag)
671                 sh->cabac_init_flag = get_bits1(gb);
672             else
673                 sh->cabac_init_flag = 0;
674
675             sh->collocated_ref_idx = 0;
676             if (sh->slice_temporal_mvp_enabled_flag) {
677                 sh->collocated_list = L0;
678                 if (sh->slice_type == B_SLICE)
679                     sh->collocated_list = !get_bits1(gb);
680
681                 if (sh->nb_refs[sh->collocated_list] > 1) {
682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
684                         av_log(s->avctx, AV_LOG_ERROR,
685                                "Invalid collocated_ref_idx: %d.\n",
686                                sh->collocated_ref_idx);
687                         return AVERROR_INVALIDDATA;
688                     }
689                 }
690             }
691
692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
694                 pred_weight_table(s, gb);
695             }
696
697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
699                 av_log(s->avctx, AV_LOG_ERROR,
700                        "Invalid number of merging MVP candidates: %d.\n",
701                        sh->max_num_merge_cand);
702                 return AVERROR_INVALIDDATA;
703             }
704         }
705
706         sh->slice_qp_delta = get_se_golomb(gb);
707
708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
709             sh->slice_cb_qp_offset = get_se_golomb(gb);
710             sh->slice_cr_qp_offset = get_se_golomb(gb);
711         } else {
712             sh->slice_cb_qp_offset = 0;
713             sh->slice_cr_qp_offset = 0;
714         }
715
716         if (s->pps->deblocking_filter_control_present_flag) {
717             int deblocking_filter_override_flag = 0;
718
719             if (s->pps->deblocking_filter_override_enabled_flag)
720                 deblocking_filter_override_flag = get_bits1(gb);
721
722             if (deblocking_filter_override_flag) {
723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
724                 if (!sh->disable_deblocking_filter_flag) {
725                     sh->beta_offset = get_se_golomb(gb) * 2;
726                     sh->tc_offset   = get_se_golomb(gb) * 2;
727                 }
728             } else {
729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
730                 sh->beta_offset                    = s->pps->beta_offset;
731                 sh->tc_offset                      = s->pps->tc_offset;
732             }
733         } else {
734             sh->disable_deblocking_filter_flag = 0;
735             sh->beta_offset                    = 0;
736             sh->tc_offset                      = 0;
737         }
738
739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
740             (sh->slice_sample_adaptive_offset_flag[0] ||
741              sh->slice_sample_adaptive_offset_flag[1] ||
742              !sh->disable_deblocking_filter_flag)) {
743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
744         } else {
745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
746         }
747     } else if (!s->slice_initialized) {
748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
749         return AVERROR_INVALIDDATA;
750     }
751
752     sh->num_entry_point_offsets = 0;
753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
755         if (sh->num_entry_point_offsets > 0) {
756             int offset_len = get_ue_golomb_long(gb) + 1;
757
758             for (i = 0; i < sh->num_entry_point_offsets; i++)
759                 skip_bits(gb, offset_len);
760         }
761     }
762
763     if (s->pps->slice_header_extension_present_flag) {
764         unsigned int length = get_ue_golomb_long(gb);
765         for (i = 0; i < length; i++)
766             skip_bits(gb, 8);  // slice_header_extension_data_byte
767     }
768
769     // Inferred parameters
770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771     if (sh->slice_qp > 51 ||
772         sh->slice_qp < -s->sps->qp_bd_offset) {
773         av_log(s->avctx, AV_LOG_ERROR,
774                "The slice_qp %d is outside the valid range "
775                "[%d, 51].\n",
776                sh->slice_qp,
777                -s->sps->qp_bd_offset);
778         return AVERROR_INVALIDDATA;
779     }
780
781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
782
783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785         return AVERROR_INVALIDDATA;
786     }
787
788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
789
790     if (!s->pps->cu_qp_delta_enabled_flag)
791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
793
794     s->slice_initialized = 1;
795
796     return 0;
797 }
798
799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
800
801 #define SET_SAO(elem, value)                            \
802 do {                                                    \
803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
804         sao->elem = value;                              \
805     else if (sao_merge_left_flag)                       \
806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
807     else if (sao_merge_up_flag)                         \
808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
809     else                                                \
810         sao->elem = 0;                                  \
811 } while (0)
812
813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
814 {
815     HEVCLocalContext *lc    = &s->HEVClc;
816     int sao_merge_left_flag = 0;
817     int sao_merge_up_flag   = 0;
818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
819     SAOParams *sao          = &CTB(s->sao, rx, ry);
820     int c_idx, i;
821
822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
823         s->sh.slice_sample_adaptive_offset_flag[1]) {
824         if (rx > 0) {
825             if (lc->ctb_left_flag)
826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
827         }
828         if (ry > 0 && !sao_merge_left_flag) {
829             if (lc->ctb_up_flag)
830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
831         }
832     }
833
834     for (c_idx = 0; c_idx < 3; c_idx++) {
835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
837             continue;
838         }
839
840         if (c_idx == 2) {
841             sao->type_idx[2] = sao->type_idx[1];
842             sao->eo_class[2] = sao->eo_class[1];
843         } else {
844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
845         }
846
847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
848             continue;
849
850         for (i = 0; i < 4; i++)
851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
852
853         if (sao->type_idx[c_idx] == SAO_BAND) {
854             for (i = 0; i < 4; i++) {
855                 if (sao->offset_abs[c_idx][i]) {
856                     SET_SAO(offset_sign[c_idx][i],
857                             ff_hevc_sao_offset_sign_decode(s));
858                 } else {
859                     sao->offset_sign[c_idx][i] = 0;
860                 }
861             }
862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
863         } else if (c_idx != 2) {
864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
865         }
866
867         // Inferred parameters
868         sao->offset_val[c_idx][0] = 0;
869         for (i = 0; i < 4; i++) {
870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
871             if (sao->type_idx[c_idx] == SAO_EDGE) {
872                 if (i > 1)
873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
874             } else if (sao->offset_sign[c_idx][i]) {
875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
876             }
877         }
878     }
879 }
880
881 #undef SET_SAO
882 #undef CTB
883
884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
885                                 int log2_trafo_size, enum ScanType scan_idx,
886                                 int c_idx)
887 {
888 #define GET_COORD(offset, n)                                    \
889     do {                                                        \
890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
892     } while (0)
893     HEVCLocalContext *lc    = &s->HEVClc;
894     int transform_skip_flag = 0;
895
896     int last_significant_coeff_x, last_significant_coeff_y;
897     int last_scan_pos;
898     int n_end;
899     int num_coeff    = 0;
900     int greater1_ctx = 1;
901
902     int num_last_subset;
903     int x_cg_last_sig, y_cg_last_sig;
904
905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
906
907     ptrdiff_t stride = s->frame->linesize[c_idx];
908     int hshift       = s->sps->hshift[c_idx];
909     int vshift       = s->sps->vshift[c_idx];
910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
914
915     int trafo_size = 1 << log2_trafo_size;
916     int i, qp, shift, add, scale, scale_m;
917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
918     const uint8_t *scale_matrix;
919     uint8_t dc_scale;
920
921     // Derive QP for dequant
922     if (!lc->cu.cu_transquant_bypass_flag) {
923         static const int qp_c[] = {
924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
925         };
926
927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
931         };
932
933         static const uint8_t div6[51 + 2 * 6 + 1] = {
934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
937         };
938         int qp_y = lc->qp_y;
939
940         if (c_idx == 0) {
941             qp = qp_y + s->sps->qp_bd_offset;
942         } else {
943             int qp_i, offset;
944
945             if (c_idx == 1)
946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
947             else
948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
949
950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
951             if (qp_i < 30)
952                 qp = qp_i;
953             else if (qp_i > 43)
954                 qp = qp_i - 6;
955             else
956                 qp = qp_c[qp_i - 30];
957
958             qp += s->sps->qp_bd_offset;
959         }
960
961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
962         add      = 1 << (shift - 1);
963         scale    = level_scale[rem6[qp]] << (div6[qp]);
964         scale_m  = 16; // default when no custom scaling lists.
965         dc_scale = 16;
966
967         if (s->sps->scaling_list_enable_flag) {
968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
969                                     &s->pps->scaling_list : &s->sps->scaling_list;
970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
971
972             if (log2_trafo_size != 5)
973                 matrix_id = 3 * matrix_id + c_idx;
974
975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
976             if (log2_trafo_size >= 4)
977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
978         }
979     }
980
981     if (s->pps->transform_skip_enabled_flag &&
982         !lc->cu.cu_transquant_bypass_flag   &&
983         log2_trafo_size == 2) {
984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
985     }
986
987     last_significant_coeff_x =
988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
989     last_significant_coeff_y =
990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
991
992     if (last_significant_coeff_x > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_x & 1)) +
996                                    suffix;
997     }
998
999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1237 {
1238     HEVCLocalContext *lc = &s->HEVClc;
1239
1240     if (lc->cu.pred_mode == MODE_INTRA) {
1241         int trafo_size = 1 << log2_trafo_size;
1242         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1243
1244         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1245         if (log2_trafo_size > 2) {
1246             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1247             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1248             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1250         } else if (blk_idx == 3) {
1251             trafo_size = trafo_size << s->sps->hshift[1];
1252             ff_hevc_set_neighbour_available(s, xBase, yBase,
1253                                             trafo_size, trafo_size);
1254             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1256         }
1257     }
1258
1259     if (cbf_luma || cbf_cb || cbf_cr) {
1260         int scan_idx   = SCAN_DIAG;
1261         int scan_idx_c = SCAN_DIAG;
1262
1263         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1264             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1265             if (lc->tu.cu_qp_delta != 0)
1266                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1267                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1268             lc->tu.is_cu_qp_delta_coded = 1;
1269
1270             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1271                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1272                 av_log(s->avctx, AV_LOG_ERROR,
1273                        "The cu_qp_delta %d is outside the valid range "
1274                        "[%d, %d].\n",
1275                        lc->tu.cu_qp_delta,
1276                        -(26 + s->sps->qp_bd_offset / 2),
1277                         (25 + s->sps->qp_bd_offset / 2));
1278                 return AVERROR_INVALIDDATA;
1279             }
1280
1281             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1282         }
1283
1284         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1285             if (lc->tu.cur_intra_pred_mode >= 6 &&
1286                 lc->tu.cur_intra_pred_mode <= 14) {
1287                 scan_idx = SCAN_VERT;
1288             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1289                        lc->tu.cur_intra_pred_mode <= 30) {
1290                 scan_idx = SCAN_HORIZ;
1291             }
1292
1293             if (lc->pu.intra_pred_mode_c >=  6 &&
1294                 lc->pu.intra_pred_mode_c <= 14) {
1295                 scan_idx_c = SCAN_VERT;
1296             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1297                        lc->pu.intra_pred_mode_c <= 30) {
1298                 scan_idx_c = SCAN_HORIZ;
1299             }
1300         }
1301
1302         if (cbf_luma)
1303             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1304         if (log2_trafo_size > 2) {
1305             if (cbf_cb)
1306                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1307             if (cbf_cr)
1308                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1309         } else if (blk_idx == 3) {
1310             if (cbf_cb)
1311                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1312             if (cbf_cr)
1313                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1314         }
1315     }
1316     return 0;
1317 }
1318
1319 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1320 {
1321     int cb_size          = 1 << log2_cb_size;
1322     int log2_min_pu_size = s->sps->log2_min_pu_size;
1323
1324     int min_pu_width     = s->sps->min_pu_width;
1325     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1326     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1327     int i, j;
1328
1329     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1330         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1331             s->is_pcm[i + j * min_pu_width] = 2;
1332 }
1333
1334 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1335                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1336                               int log2_cb_size, int log2_trafo_size,
1337                               int trafo_depth, int blk_idx,
1338                               int cbf_cb, int cbf_cr)
1339 {
1340     HEVCLocalContext *lc = &s->HEVClc;
1341     uint8_t split_transform_flag;
1342     int ret;
1343
1344     if (lc->cu.intra_split_flag) {
1345         if (trafo_depth == 1)
1346             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1347     } else {
1348         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1349     }
1350
1351     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1352         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1353         trafo_depth     < lc->cu.max_trafo_depth       &&
1354         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1355         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1356     } else {
1357         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1358                           lc->cu.pred_mode == MODE_INTER &&
1359                           lc->cu.part_mode != PART_2Nx2N &&
1360                           trafo_depth == 0;
1361
1362         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1363                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1364                                inter_split;
1365     }
1366
1367     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1368         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1369     else if (log2_trafo_size > 2 || trafo_depth == 0)
1370         cbf_cb = 0;
1371     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1372         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1373     else if (log2_trafo_size > 2 || trafo_depth == 0)
1374         cbf_cr = 0;
1375
1376     if (split_transform_flag) {
1377         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1378         const int x1 = x0 + trafo_size_split;
1379         const int y1 = y0 + trafo_size_split;
1380
1381 #define SUBDIVIDE(x, y, idx)                                                    \
1382 do {                                                                            \
1383     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1384                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1385                              cbf_cb, cbf_cr);                                   \
1386     if (ret < 0)                                                                \
1387         return ret;                                                             \
1388 } while (0)
1389
1390         SUBDIVIDE(x0, y0, 0);
1391         SUBDIVIDE(x1, y0, 1);
1392         SUBDIVIDE(x0, y1, 2);
1393         SUBDIVIDE(x1, y1, 3);
1394
1395 #undef SUBDIVIDE
1396     } else {
1397         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1398         int log2_min_tu_size = s->sps->log2_min_tb_size;
1399         int min_tu_width     = s->sps->min_tb_width;
1400         int cbf_luma         = 1;
1401
1402         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1403             cbf_cb || cbf_cr)
1404             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1405
1406         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1407                                  log2_cb_size, log2_trafo_size,
1408                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1409         if (ret < 0)
1410             return ret;
1411         // TODO: store cbf_luma somewhere else
1412         if (cbf_luma) {
1413             int i, j;
1414             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1415                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1416                     int x_tu = (x0 + j) >> log2_min_tu_size;
1417                     int y_tu = (y0 + i) >> log2_min_tu_size;
1418                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1419                 }
1420         }
1421         if (!s->sh.disable_deblocking_filter_flag) {
1422             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1423             if (s->pps->transquant_bypass_enable_flag &&
1424                 lc->cu.cu_transquant_bypass_flag)
1425                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1426         }
1427     }
1428     return 0;
1429 }
1430
1431 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1432 {
1433     //TODO: non-4:2:0 support
1434     HEVCLocalContext *lc = &s->HEVClc;
1435     GetBitContext gb;
1436     int cb_size   = 1 << log2_cb_size;
1437     int stride0   = s->frame->linesize[0];
1438     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1439     int   stride1 = s->frame->linesize[1];
1440     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1441     int   stride2 = s->frame->linesize[2];
1442     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1443
1444     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1445     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1446     int ret;
1447
1448     if (!s->sh.disable_deblocking_filter_flag)
1449         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1450
1451     ret = init_get_bits(&gb, pcm, length);
1452     if (ret < 0)
1453         return ret;
1454
1455     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1456     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1457     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1458     return 0;
1459 }
1460
1461 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1462 {
1463     HEVCLocalContext *lc = &s->HEVClc;
1464     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1465     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1466
1467     if (x)
1468         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1469     if (y)
1470         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1471
1472     switch (x) {
1473     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1474     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1475     case 0: lc->pu.mvd.x = 0;                               break;
1476     }
1477
1478     switch (y) {
1479     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1480     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1481     case 0: lc->pu.mvd.y = 0;                               break;
1482     }
1483 }
1484
1485 /**
1486  * 8.5.3.2.2.1 Luma sample interpolation process
1487  *
1488  * @param s HEVC decoding context
1489  * @param dst target buffer for block data at block position
1490  * @param dststride stride of the dst buffer
1491  * @param ref reference picture buffer at origin (0, 0)
1492  * @param mv motion vector (relative to block position) to get pixel data from
1493  * @param x_off horizontal position of block from origin (0, 0)
1494  * @param y_off vertical position of block from origin (0, 0)
1495  * @param block_w width of block
1496  * @param block_h height of block
1497  */
1498 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1499                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1500                     int block_w, int block_h)
1501 {
1502     HEVCLocalContext *lc = &s->HEVClc;
1503     uint8_t *src         = ref->data[0];
1504     ptrdiff_t srcstride  = ref->linesize[0];
1505     int pic_width        = s->sps->width;
1506     int pic_height       = s->sps->height;
1507
1508     int mx         = mv->x & 3;
1509     int my         = mv->y & 3;
1510     int extra_left = ff_hevc_qpel_extra_before[mx];
1511     int extra_top  = ff_hevc_qpel_extra_before[my];
1512
1513     x_off += mv->x >> 2;
1514     y_off += mv->y >> 2;
1515     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1516
1517     if (x_off < extra_left || y_off < extra_top ||
1518         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1519         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1520         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1521         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1522         int buf_offset = extra_top *
1523                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1524
1525         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1526                                  edge_emu_stride, srcstride,
1527                                  block_w + ff_hevc_qpel_extra[mx],
1528                                  block_h + ff_hevc_qpel_extra[my],
1529                                  x_off - extra_left, y_off - extra_top,
1530                                  pic_width, pic_height);
1531         src = lc->edge_emu_buffer + buf_offset;
1532         srcstride = edge_emu_stride;
1533     }
1534     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1535                                      block_h, lc->mc_buffer);
1536 }
1537
1538 /**
1539  * 8.5.3.2.2.2 Chroma sample interpolation process
1540  *
1541  * @param s HEVC decoding context
1542  * @param dst1 target buffer for block data at block position (U plane)
1543  * @param dst2 target buffer for block data at block position (V plane)
1544  * @param dststride stride of the dst1 and dst2 buffers
1545  * @param ref reference picture buffer at origin (0, 0)
1546  * @param mv motion vector (relative to block position) to get pixel data from
1547  * @param x_off horizontal position of block from origin (0, 0)
1548  * @param y_off vertical position of block from origin (0, 0)
1549  * @param block_w width of block
1550  * @param block_h height of block
1551  */
1552 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1553                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1554                       int x_off, int y_off, int block_w, int block_h)
1555 {
1556     HEVCLocalContext *lc = &s->HEVClc;
1557     uint8_t *src1        = ref->data[1];
1558     uint8_t *src2        = ref->data[2];
1559     ptrdiff_t src1stride = ref->linesize[1];
1560     ptrdiff_t src2stride = ref->linesize[2];
1561     int pic_width        = s->sps->width >> 1;
1562     int pic_height       = s->sps->height >> 1;
1563
1564     int mx = mv->x & 7;
1565     int my = mv->y & 7;
1566
1567     x_off += mv->x >> 3;
1568     y_off += mv->y >> 3;
1569     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1570     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1571
1572     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1573         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1574         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1575         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1576         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1577         int buf_offset1 = EPEL_EXTRA_BEFORE *
1578                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1579         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1580         int buf_offset2 = EPEL_EXTRA_BEFORE *
1581                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1582
1583         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1584                                  edge_emu_stride, src1stride,
1585                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1586                                  x_off - EPEL_EXTRA_BEFORE,
1587                                  y_off - EPEL_EXTRA_BEFORE,
1588                                  pic_width, pic_height);
1589
1590         src1 = lc->edge_emu_buffer + buf_offset1;
1591         src1stride = edge_emu_stride;
1592         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1593                                              block_w, block_h, mx, my, lc->mc_buffer);
1594
1595         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1596                                  edge_emu_stride, src2stride,
1597                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1598                                  x_off - EPEL_EXTRA_BEFORE,
1599                                  y_off - EPEL_EXTRA_BEFORE,
1600                                  pic_width, pic_height);
1601         src2 = lc->edge_emu_buffer + buf_offset2;
1602         src2stride = edge_emu_stride;
1603
1604         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1605                                              block_w, block_h, mx, my,
1606                                              lc->mc_buffer);
1607     } else {
1608         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1609                                              block_w, block_h, mx, my,
1610                                              lc->mc_buffer);
1611         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1612                                              block_w, block_h, mx, my,
1613                                              lc->mc_buffer);
1614     }
1615 }
1616
1617 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1618                                 const Mv *mv, int y0, int height)
1619 {
1620     int y = (mv->y >> 2) + y0 + height + 9;
1621     ff_thread_await_progress(&ref->tf, y, 0);
1622 }
1623
1624 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1625                                 int nPbW, int nPbH,
1626                                 int log2_cb_size, int partIdx)
1627 {
1628 #define POS(c_idx, x, y)                                                              \
1629     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1630                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1631     HEVCLocalContext *lc = &s->HEVClc;
1632     int merge_idx = 0;
1633     struct MvField current_mv = {{{ 0 }}};
1634
1635     int min_pu_width = s->sps->min_pu_width;
1636
1637     MvField *tab_mvf = s->ref->tab_mvf;
1638     RefPicList  *refPicList = s->ref->refPicList;
1639     HEVCFrame *ref0, *ref1;
1640
1641     int tmpstride = MAX_PB_SIZE;
1642
1643     uint8_t *dst0 = POS(0, x0, y0);
1644     uint8_t *dst1 = POS(1, x0, y0);
1645     uint8_t *dst2 = POS(2, x0, y0);
1646     int log2_min_cb_size = s->sps->log2_min_cb_size;
1647     int min_cb_width     = s->sps->min_cb_width;
1648     int x_cb             = x0 >> log2_min_cb_size;
1649     int y_cb             = y0 >> log2_min_cb_size;
1650     int ref_idx[2];
1651     int mvp_flag[2];
1652     int x_pu, y_pu;
1653     int i, j;
1654
1655     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1656
1657     if (!skip_flag)
1658         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1659
1660     if (skip_flag || lc->pu.merge_flag) {
1661         if (s->sh.max_num_merge_cand > 1)
1662             merge_idx = ff_hevc_merge_idx_decode(s);
1663         else
1664             merge_idx = 0;
1665
1666         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1667                                    partIdx, merge_idx, &current_mv);
1668         x_pu = x0 >> s->sps->log2_min_pu_size;
1669         y_pu = y0 >> s->sps->log2_min_pu_size;
1670
1671         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1672             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1673                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1674     } else {
1675         enum InterPredIdc inter_pred_idc = PRED_L0;
1676         ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1677         if (s->sh.slice_type == B_SLICE)
1678             inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1679
1680         if (inter_pred_idc != PRED_L1) {
1681             if (s->sh.nb_refs[L0]) {
1682                 ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1683                 current_mv.ref_idx[0] = ref_idx[0];
1684             }
1685             current_mv.pred_flag[0] = 1;
1686             hls_mvd_coding(s, x0, y0, 0);
1687             mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1688             ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1689                                      partIdx, merge_idx, &current_mv,
1690                                      mvp_flag[0], 0);
1691             current_mv.mv[0].x += lc->pu.mvd.x;
1692             current_mv.mv[0].y += lc->pu.mvd.y;
1693         }
1694
1695         if (inter_pred_idc != PRED_L0) {
1696             if (s->sh.nb_refs[L1]) {
1697                 ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1698                 current_mv.ref_idx[1] = ref_idx[1];
1699             }
1700
1701             if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1702                 AV_ZERO32(&lc->pu.mvd);
1703             } else {
1704                 hls_mvd_coding(s, x0, y0, 1);
1705             }
1706
1707             current_mv.pred_flag[1] = 1;
1708             mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1709             ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1710                                      partIdx, merge_idx, &current_mv,
1711                                      mvp_flag[1], 1);
1712             current_mv.mv[1].x += lc->pu.mvd.x;
1713             current_mv.mv[1].y += lc->pu.mvd.y;
1714         }
1715
1716         x_pu = x0 >> s->sps->log2_min_pu_size;
1717         y_pu = y0 >> s->sps->log2_min_pu_size;
1718
1719         for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1720             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1721                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1722     }
1723
1724     if (current_mv.pred_flag[0]) {
1725         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1726         if (!ref0)
1727             return;
1728         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1729     }
1730     if (current_mv.pred_flag[1]) {
1731         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1732         if (!ref1)
1733             return;
1734         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1735     }
1736
1737     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1738         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1739         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1740
1741         luma_mc(s, tmp, tmpstride, ref0->frame,
1742                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1743
1744         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1745             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1746             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1747                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1748                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1749                                      dst0, s->frame->linesize[0], tmp,
1750                                      tmpstride, nPbW, nPbH);
1751         } else {
1752             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1753         }
1754         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1755                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1756
1757         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1758             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1759             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1760                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1761                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1762                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1763                                      nPbW / 2, nPbH / 2);
1764             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1765                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1766                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1767                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1768                                      nPbW / 2, nPbH / 2);
1769         } else {
1770             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1771             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1772         }
1773     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1774         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1775         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1776
1777         if (!ref1)
1778             return;
1779
1780         luma_mc(s, tmp, tmpstride, ref1->frame,
1781                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1782
1783         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1784             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1785             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1786                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1787                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1788                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1789                                       nPbW, nPbH);
1790         } else {
1791             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1792         }
1793
1794         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1795                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1796
1797         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1798             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1799             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1800                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1801                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1802                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1803             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1804                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1805                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1806                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1807         } else {
1808             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1809             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1810         }
1811     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1812         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1813         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1814         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1815         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1816         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1817         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1818
1819         if (!ref0 || !ref1)
1820             return;
1821
1822         luma_mc(s, tmp, tmpstride, ref0->frame,
1823                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1824         luma_mc(s, tmp2, tmpstride, ref1->frame,
1825                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1826
1827         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1828             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1829             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1830                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1831                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1832                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1833                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1834                                          dst0, s->frame->linesize[0],
1835                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1836         } else {
1837             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1838                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1839         }
1840
1841         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1842                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1843         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1844                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1845
1846         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1847             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1848             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1849                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1850                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1851                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1852                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1853                                          dst1, s->frame->linesize[1], tmp, tmp3,
1854                                          tmpstride, nPbW / 2, nPbH / 2);
1855             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1856                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1857                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1858                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1859                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1860                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1861                                          tmpstride, nPbW / 2, nPbH / 2);
1862         } else {
1863             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1864             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1865         }
1866     }
1867 }
1868
1869 /**
1870  * 8.4.1
1871  */
1872 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1873                                 int prev_intra_luma_pred_flag)
1874 {
1875     HEVCLocalContext *lc = &s->HEVClc;
1876     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1877     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1878     int min_pu_width     = s->sps->min_pu_width;
1879     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1880     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1881     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1882
1883     int cand_up   = (lc->ctb_up_flag || y0b) ?
1884                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1885     int cand_left = (lc->ctb_left_flag || x0b) ?
1886                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1887
1888     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1889
1890     MvField *tab_mvf = s->ref->tab_mvf;
1891     int intra_pred_mode;
1892     int candidate[3];
1893     int i, j;
1894
1895     // intra_pred_mode prediction does not cross vertical CTB boundaries
1896     if ((y0 - 1) < y_ctb)
1897         cand_up = INTRA_DC;
1898
1899     if (cand_left == cand_up) {
1900         if (cand_left < 2) {
1901             candidate[0] = INTRA_PLANAR;
1902             candidate[1] = INTRA_DC;
1903             candidate[2] = INTRA_ANGULAR_26;
1904         } else {
1905             candidate[0] = cand_left;
1906             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1907             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1908         }
1909     } else {
1910         candidate[0] = cand_left;
1911         candidate[1] = cand_up;
1912         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1913             candidate[2] = INTRA_PLANAR;
1914         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1915             candidate[2] = INTRA_DC;
1916         } else {
1917             candidate[2] = INTRA_ANGULAR_26;
1918         }
1919     }
1920
1921     if (prev_intra_luma_pred_flag) {
1922         intra_pred_mode = candidate[lc->pu.mpm_idx];
1923     } else {
1924         if (candidate[0] > candidate[1])
1925             FFSWAP(uint8_t, candidate[0], candidate[1]);
1926         if (candidate[0] > candidate[2])
1927             FFSWAP(uint8_t, candidate[0], candidate[2]);
1928         if (candidate[1] > candidate[2])
1929             FFSWAP(uint8_t, candidate[1], candidate[2]);
1930
1931         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1932         for (i = 0; i < 3; i++)
1933             if (intra_pred_mode >= candidate[i])
1934                 intra_pred_mode++;
1935     }
1936
1937     /* write the intra prediction units into the mv array */
1938     if (!size_in_pus)
1939         size_in_pus = 1;
1940     for (i = 0; i < size_in_pus; i++) {
1941         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1942                intra_pred_mode, size_in_pus);
1943
1944         for (j = 0; j < size_in_pus; j++) {
1945             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1946             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1947             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1948             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1949             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1950             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1951             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1952             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1953             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1954         }
1955     }
1956
1957     return intra_pred_mode;
1958 }
1959
1960 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1961                                           int log2_cb_size, int ct_depth)
1962 {
1963     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1964     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1965     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1966     int y;
1967
1968     for (y = 0; y < length; y++)
1969         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1970                ct_depth, length);
1971 }
1972
1973 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1974                                   int log2_cb_size)
1975 {
1976     HEVCLocalContext *lc = &s->HEVClc;
1977     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1978     uint8_t prev_intra_luma_pred_flag[4];
1979     int split   = lc->cu.part_mode == PART_NxN;
1980     int pb_size = (1 << log2_cb_size) >> split;
1981     int side    = split + 1;
1982     int chroma_mode;
1983     int i, j;
1984
1985     for (i = 0; i < side; i++)
1986         for (j = 0; j < side; j++)
1987             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1988
1989     for (i = 0; i < side; i++) {
1990         for (j = 0; j < side; j++) {
1991             if (prev_intra_luma_pred_flag[2 * i + j])
1992                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1993             else
1994                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1995
1996             lc->pu.intra_pred_mode[2 * i + j] =
1997                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1998                                      prev_intra_luma_pred_flag[2 * i + j]);
1999         }
2000     }
2001
2002     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2003     if (chroma_mode != 4) {
2004         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2005             lc->pu.intra_pred_mode_c = 34;
2006         else
2007             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2008     } else {
2009         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2010     }
2011 }
2012
2013 static void intra_prediction_unit_default_value(HEVCContext *s,
2014                                                 int x0, int y0,
2015                                                 int log2_cb_size)
2016 {
2017     HEVCLocalContext *lc = &s->HEVClc;
2018     int pb_size          = 1 << log2_cb_size;
2019     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2020     int min_pu_width     = s->sps->min_pu_width;
2021     MvField *tab_mvf     = s->ref->tab_mvf;
2022     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2023     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2024     int j, k;
2025
2026     if (size_in_pus == 0)
2027         size_in_pus = 1;
2028     for (j = 0; j < size_in_pus; j++) {
2029         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2030         for (k = 0; k < size_in_pus; k++)
2031             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2032     }
2033 }
2034
2035 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2036 {
2037     int cb_size          = 1 << log2_cb_size;
2038     HEVCLocalContext *lc = &s->HEVClc;
2039     int log2_min_cb_size = s->sps->log2_min_cb_size;
2040     int length           = cb_size >> log2_min_cb_size;
2041     int min_cb_width     = s->sps->min_cb_width;
2042     int x_cb             = x0 >> log2_min_cb_size;
2043     int y_cb             = y0 >> log2_min_cb_size;
2044     int x, y, ret;
2045
2046     lc->cu.x                = x0;
2047     lc->cu.y                = y0;
2048     lc->cu.pred_mode        = MODE_INTRA;
2049     lc->cu.part_mode        = PART_2Nx2N;
2050     lc->cu.intra_split_flag = 0;
2051
2052     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2053     for (x = 0; x < 4; x++)
2054         lc->pu.intra_pred_mode[x] = 1;
2055     if (s->pps->transquant_bypass_enable_flag) {
2056         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2057         if (lc->cu.cu_transquant_bypass_flag)
2058             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2059     } else
2060         lc->cu.cu_transquant_bypass_flag = 0;
2061
2062     if (s->sh.slice_type != I_SLICE) {
2063         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2064
2065         x = y_cb * min_cb_width + x_cb;
2066         for (y = 0; y < length; y++) {
2067             memset(&s->skip_flag[x], skip_flag, length);
2068             x += min_cb_width;
2069         }
2070         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2071     }
2072
2073     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2074         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2075         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2076
2077         if (!s->sh.disable_deblocking_filter_flag)
2078             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2079     } else {
2080         int pcm_flag = 0;
2081
2082         if (s->sh.slice_type != I_SLICE)
2083             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2084         if (lc->cu.pred_mode != MODE_INTRA ||
2085             log2_cb_size == s->sps->log2_min_cb_size) {
2086             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2087             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2088                                       lc->cu.pred_mode == MODE_INTRA;
2089         }
2090
2091         if (lc->cu.pred_mode == MODE_INTRA) {
2092             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2093                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2094                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2095                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2096             }
2097             if (pcm_flag) {
2098                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2099                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2100                 if (s->sps->pcm.loop_filter_disable_flag)
2101                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2102
2103                 if (ret < 0)
2104                     return ret;
2105             } else {
2106                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2107             }
2108         } else {
2109             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2110             switch (lc->cu.part_mode) {
2111             case PART_2Nx2N:
2112                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2113                 break;
2114             case PART_2NxN:
2115                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2116                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2117                 break;
2118             case PART_Nx2N:
2119                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2120                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2121                 break;
2122             case PART_2NxnU:
2123                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2124                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2125                 break;
2126             case PART_2NxnD:
2127                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2128                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2129                 break;
2130             case PART_nLx2N:
2131                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2132                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2133                 break;
2134             case PART_nRx2N:
2135                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2136                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2137                 break;
2138             case PART_NxN:
2139                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2140                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2141                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2142                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2143                 break;
2144             }
2145         }
2146
2147         if (!pcm_flag) {
2148             int rqt_root_cbf = 1;
2149
2150             if (lc->cu.pred_mode != MODE_INTRA &&
2151                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2152                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2153             }
2154             if (rqt_root_cbf) {
2155                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2156                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2157                                          s->sps->max_transform_hierarchy_depth_inter;
2158                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2159                                          log2_cb_size,
2160                                          log2_cb_size, 0, 0, 0, 0);
2161                 if (ret < 0)
2162                     return ret;
2163             } else {
2164                 if (!s->sh.disable_deblocking_filter_flag)
2165                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2166             }
2167         }
2168     }
2169
2170     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2171         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2172
2173     x = y_cb * min_cb_width + x_cb;
2174     for (y = 0; y < length; y++) {
2175         memset(&s->qp_y_tab[x], lc->qp_y, length);
2176         x += min_cb_width;
2177     }
2178
2179     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2180
2181     return 0;
2182 }
2183
2184 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2185                                int log2_cb_size, int cb_depth)
2186 {
2187     HEVCLocalContext *lc = &s->HEVClc;
2188     const int cb_size    = 1 << log2_cb_size;
2189     int split_cu;
2190
2191     lc->ct.depth = cb_depth;
2192     if (x0 + cb_size <= s->sps->width  &&
2193         y0 + cb_size <= s->sps->height &&
2194         log2_cb_size > s->sps->log2_min_cb_size) {
2195         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2196     } else {
2197         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2198     }
2199     if (s->pps->cu_qp_delta_enabled_flag &&
2200         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2201         lc->tu.is_cu_qp_delta_coded = 0;
2202         lc->tu.cu_qp_delta          = 0;
2203     }
2204
2205     if (split_cu) {
2206         const int cb_size_split = cb_size >> 1;
2207         const int x1 = x0 + cb_size_split;
2208         const int y1 = y0 + cb_size_split;
2209
2210         log2_cb_size--;
2211         cb_depth++;
2212
2213 #define SUBDIVIDE(x, y)                                                \
2214 do {                                                                   \
2215     if (x < s->sps->width && y < s->sps->height) {                     \
2216         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2217         if (ret < 0)                                                   \
2218             return ret;                                                \
2219     }                                                                  \
2220 } while (0)
2221
2222         SUBDIVIDE(x0, y0);
2223         SUBDIVIDE(x1, y0);
2224         SUBDIVIDE(x0, y1);
2225         SUBDIVIDE(x1, y1);
2226     } else {
2227         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2228         if (ret < 0)
2229             return ret;
2230     }
2231
2232     return 0;
2233 }
2234
2235 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2236                                  int ctb_addr_ts)
2237 {
2238     HEVCLocalContext *lc  = &s->HEVClc;
2239     int ctb_size          = 1 << s->sps->log2_ctb_size;
2240     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2241     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2242
2243     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2244
2245     if (s->pps->entropy_coding_sync_enabled_flag) {
2246         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2247             lc->first_qp_group = 1;
2248         lc->end_of_tiles_x = s->sps->width;
2249     } else if (s->pps->tiles_enabled_flag) {
2250         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2251             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2252             lc->start_of_tiles_x = x_ctb;
2253             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2254             lc->first_qp_group   = 1;
2255         }
2256     } else {
2257         lc->end_of_tiles_x = s->sps->width;
2258     }
2259
2260     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2261
2262     lc->boundary_flags = 0;
2263     if (s->pps->tiles_enabled_flag) {
2264         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2265             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2266         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2267             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2268         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2269             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2270         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2271             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2272     } else {
2273         if (!ctb_addr_in_slice > 0)
2274             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2275         if (ctb_addr_in_slice < s->sps->ctb_width)
2276             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2277     }
2278
2279     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2280     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2281     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2282     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2283 }
2284
2285 static int hls_slice_data(HEVCContext *s)
2286 {
2287     int ctb_size    = 1 << s->sps->log2_ctb_size;
2288     int more_data   = 1;
2289     int x_ctb       = 0;
2290     int y_ctb       = 0;
2291     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2292     int ret;
2293
2294     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2295         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2296
2297         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2298         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2299         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2300
2301         ff_hevc_cabac_init(s, ctb_addr_ts);
2302
2303         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2304
2305         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2306         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2307         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2308
2309         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2310         if (ret < 0)
2311             return ret;
2312         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2313
2314         ctb_addr_ts++;
2315         ff_hevc_save_states(s, ctb_addr_ts);
2316         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2317     }
2318
2319     if (x_ctb + ctb_size >= s->sps->width &&
2320         y_ctb + ctb_size >= s->sps->height)
2321         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2322
2323     return ctb_addr_ts;
2324 }
2325
2326 /**
2327  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2328  * 0 if the unit should be skipped, 1 otherwise
2329  */
2330 static int hls_nal_unit(HEVCContext *s)
2331 {
2332     GetBitContext *gb = &s->HEVClc.gb;
2333     int nuh_layer_id;
2334
2335     if (get_bits1(gb) != 0)
2336         return AVERROR_INVALIDDATA;
2337
2338     s->nal_unit_type = get_bits(gb, 6);
2339
2340     nuh_layer_id   = get_bits(gb, 6);
2341     s->temporal_id = get_bits(gb, 3) - 1;
2342     if (s->temporal_id < 0)
2343         return AVERROR_INVALIDDATA;
2344
2345     av_log(s->avctx, AV_LOG_DEBUG,
2346            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2347            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2348
2349     return nuh_layer_id == 0;
2350 }
2351
2352 static void restore_tqb_pixels(HEVCContext *s)
2353 {
2354     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2355     int x, y, c_idx;
2356
2357     for (c_idx = 0; c_idx < 3; c_idx++) {
2358         ptrdiff_t stride = s->frame->linesize[c_idx];
2359         int hshift       = s->sps->hshift[c_idx];
2360         int vshift       = s->sps->vshift[c_idx];
2361         for (y = 0; y < s->sps->min_pu_height; y++) {
2362             for (x = 0; x < s->sps->min_pu_width; x++) {
2363                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2364                     int n;
2365                     int len      = min_pu_size >> hshift;
2366                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2367                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2368                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2369                         memcpy(dst, src, len);
2370                         src += stride;
2371                         dst += stride;
2372                     }
2373                 }
2374             }
2375         }
2376     }
2377 }
2378
2379 static int set_side_data(HEVCContext *s)
2380 {
2381     AVFrame *out = s->ref->frame;
2382
2383     if (s->sei_frame_packing_present &&
2384         s->frame_packing_arrangement_type >= 3 &&
2385         s->frame_packing_arrangement_type <= 5 &&
2386         s->content_interpretation_type > 0 &&
2387         s->content_interpretation_type < 3) {
2388         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2389         if (!stereo)
2390             return AVERROR(ENOMEM);
2391
2392         switch (s->frame_packing_arrangement_type) {
2393         case 3:
2394             if (s->quincunx_subsampling)
2395                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2396             else
2397                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2398             break;
2399         case 4:
2400             stereo->type = AV_STEREO3D_TOPBOTTOM;
2401             break;
2402         case 5:
2403             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2404             break;
2405         }
2406
2407         if (s->content_interpretation_type == 2)
2408             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2409     }
2410
2411     if (s->sei_display_orientation_present &&
2412         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2413         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2414         AVFrameSideData *rotation = av_frame_new_side_data(out,
2415                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2416                                                            sizeof(int32_t) * 9);
2417         if (!rotation)
2418             return AVERROR(ENOMEM);
2419
2420         av_display_rotation_set((int32_t *)rotation->data, angle);
2421         av_display_matrix_flip((int32_t *)rotation->data,
2422                                s->sei_hflip, s->sei_vflip);
2423     }
2424
2425     return 0;
2426 }
2427
2428 static int hevc_frame_start(HEVCContext *s)
2429 {
2430     HEVCLocalContext *lc = &s->HEVClc;
2431     int ret;
2432
2433     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2434     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2435     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2436     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2437
2438     lc->start_of_tiles_x = 0;
2439     s->is_decoded        = 0;
2440     s->first_nal_type    = s->nal_unit_type;
2441
2442     if (s->pps->tiles_enabled_flag)
2443         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2444
2445     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2446                               s->poc);
2447     if (ret < 0)
2448         goto fail;
2449
2450     ret = ff_hevc_frame_rps(s);
2451     if (ret < 0) {
2452         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2453         goto fail;
2454     }
2455
2456     s->ref->frame->key_frame = IS_IRAP(s);
2457
2458     ret = set_side_data(s);
2459     if (ret < 0)
2460         goto fail;
2461
2462     av_frame_unref(s->output_frame);
2463     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2464     if (ret < 0)
2465         goto fail;
2466
2467     ff_thread_finish_setup(s->avctx);
2468
2469     return 0;
2470
2471 fail:
2472     if (s->ref)
2473         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2474     s->ref = NULL;
2475     return ret;
2476 }
2477
2478 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2479 {
2480     HEVCLocalContext *lc = &s->HEVClc;
2481     GetBitContext *gb    = &lc->gb;
2482     int ctb_addr_ts, ret;
2483
2484     ret = init_get_bits8(gb, nal, length);
2485     if (ret < 0)
2486         return ret;
2487
2488     ret = hls_nal_unit(s);
2489     if (ret < 0) {
2490         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2491                s->nal_unit_type);
2492         goto fail;
2493     } else if (!ret)
2494         return 0;
2495
2496     switch (s->nal_unit_type) {
2497     case NAL_VPS:
2498         ret = ff_hevc_decode_nal_vps(s);
2499         if (ret < 0)
2500             goto fail;
2501         break;
2502     case NAL_SPS:
2503         ret = ff_hevc_decode_nal_sps(s);
2504         if (ret < 0)
2505             goto fail;
2506         break;
2507     case NAL_PPS:
2508         ret = ff_hevc_decode_nal_pps(s);
2509         if (ret < 0)
2510             goto fail;
2511         break;
2512     case NAL_SEI_PREFIX:
2513     case NAL_SEI_SUFFIX:
2514         ret = ff_hevc_decode_nal_sei(s);
2515         if (ret < 0)
2516             goto fail;
2517         break;
2518     case NAL_TRAIL_R:
2519     case NAL_TRAIL_N:
2520     case NAL_TSA_N:
2521     case NAL_TSA_R:
2522     case NAL_STSA_N:
2523     case NAL_STSA_R:
2524     case NAL_BLA_W_LP:
2525     case NAL_BLA_W_RADL:
2526     case NAL_BLA_N_LP:
2527     case NAL_IDR_W_RADL:
2528     case NAL_IDR_N_LP:
2529     case NAL_CRA_NUT:
2530     case NAL_RADL_N:
2531     case NAL_RADL_R:
2532     case NAL_RASL_N:
2533     case NAL_RASL_R:
2534         ret = hls_slice_header(s);
2535         if (ret < 0)
2536             return ret;
2537
2538         if (s->max_ra == INT_MAX) {
2539             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2540                 s->max_ra = s->poc;
2541             } else {
2542                 if (IS_IDR(s))
2543                     s->max_ra = INT_MIN;
2544             }
2545         }
2546
2547         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2548             s->poc <= s->max_ra) {
2549             s->is_decoded = 0;
2550             break;
2551         } else {
2552             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2553                 s->max_ra = INT_MIN;
2554         }
2555
2556         if (s->sh.first_slice_in_pic_flag) {
2557             ret = hevc_frame_start(s);
2558             if (ret < 0)
2559                 return ret;
2560         } else if (!s->ref) {
2561             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2562             goto fail;
2563         }
2564
2565         if (s->nal_unit_type != s->first_nal_type) {
2566             av_log(s->avctx, AV_LOG_ERROR,
2567                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2568                    s->first_nal_type, s->nal_unit_type);
2569             return AVERROR_INVALIDDATA;
2570         }
2571
2572         if (!s->sh.dependent_slice_segment_flag &&
2573             s->sh.slice_type != I_SLICE) {
2574             ret = ff_hevc_slice_rpl(s);
2575             if (ret < 0) {
2576                 av_log(s->avctx, AV_LOG_WARNING,
2577                        "Error constructing the reference lists for the current slice.\n");
2578                 goto fail;
2579             }
2580         }
2581
2582         ctb_addr_ts = hls_slice_data(s);
2583         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2584             s->is_decoded = 1;
2585             if ((s->pps->transquant_bypass_enable_flag ||
2586                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2587                 s->sps->sao_enabled)
2588                 restore_tqb_pixels(s);
2589         }
2590
2591         if (ctb_addr_ts < 0) {
2592             ret = ctb_addr_ts;
2593             goto fail;
2594         }
2595         break;
2596     case NAL_EOS_NUT:
2597     case NAL_EOB_NUT:
2598         s->seq_decode = (s->seq_decode + 1) & 0xff;
2599         s->max_ra     = INT_MAX;
2600         break;
2601     case NAL_AUD:
2602     case NAL_FD_NUT:
2603         break;
2604     default:
2605         av_log(s->avctx, AV_LOG_INFO,
2606                "Skipping NAL unit %d\n", s->nal_unit_type);
2607     }
2608
2609     return 0;
2610 fail:
2611     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2612         return ret;
2613     return 0;
2614 }
2615
2616 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2617  * between these functions would be nice. */
2618 static int extract_rbsp(const uint8_t *src, int length,
2619                         HEVCNAL *nal)
2620 {
2621     int i, si, di;
2622     uint8_t *dst;
2623
2624 #define STARTCODE_TEST                                                  \
2625         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2626             if (src[i + 2] != 3) {                                      \
2627                 /* startcode, so we must be past the end */             \
2628                 length = i;                                             \
2629             }                                                           \
2630             break;                                                      \
2631         }
2632 #if HAVE_FAST_UNALIGNED
2633 #define FIND_FIRST_ZERO                                                 \
2634         if (i > 0 && !src[i])                                           \
2635             i--;                                                        \
2636         while (src[i])                                                  \
2637             i++
2638 #if HAVE_FAST_64BIT
2639     for (i = 0; i + 1 < length; i += 9) {
2640         if (!((~AV_RN64A(src + i) &
2641                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2642               0x8000800080008080ULL))
2643             continue;
2644         FIND_FIRST_ZERO;
2645         STARTCODE_TEST;
2646         i -= 7;
2647     }
2648 #else
2649     for (i = 0; i + 1 < length; i += 5) {
2650         if (!((~AV_RN32A(src + i) &
2651                (AV_RN32A(src + i) - 0x01000101U)) &
2652               0x80008080U))
2653             continue;
2654         FIND_FIRST_ZERO;
2655         STARTCODE_TEST;
2656         i -= 3;
2657     }
2658 #endif /* HAVE_FAST_64BIT */
2659 #else
2660     for (i = 0; i + 1 < length; i += 2) {
2661         if (src[i])
2662             continue;
2663         if (i > 0 && src[i - 1] == 0)
2664             i--;
2665         STARTCODE_TEST;
2666     }
2667 #endif /* HAVE_FAST_UNALIGNED */
2668
2669     if (i >= length - 1) { // no escaped 0
2670         nal->data = src;
2671         nal->size = length;
2672         return length;
2673     }
2674
2675     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2676                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2677     if (!nal->rbsp_buffer)
2678         return AVERROR(ENOMEM);
2679
2680     dst = nal->rbsp_buffer;
2681
2682     memcpy(dst, src, i);
2683     si = di = i;
2684     while (si + 2 < length) {
2685         // remove escapes (very rare 1:2^22)
2686         if (src[si + 2] > 3) {
2687             dst[di++] = src[si++];
2688             dst[di++] = src[si++];
2689         } else if (src[si] == 0 && src[si + 1] == 0) {
2690             if (src[si + 2] == 3) { // escape
2691                 dst[di++] = 0;
2692                 dst[di++] = 0;
2693                 si       += 3;
2694
2695                 continue;
2696             } else // next start code
2697                 goto nsc;
2698         }
2699
2700         dst[di++] = src[si++];
2701     }
2702     while (si < length)
2703         dst[di++] = src[si++];
2704
2705 nsc:
2706     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2707
2708     nal->data = dst;
2709     nal->size = di;
2710     return si;
2711 }
2712
2713 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2714 {
2715     int i, consumed, ret = 0;
2716
2717     s->ref = NULL;
2718     s->eos = 0;
2719
2720     /* split the input packet into NAL units, so we know the upper bound on the
2721      * number of slices in the frame */
2722     s->nb_nals = 0;
2723     while (length >= 4) {
2724         HEVCNAL *nal;
2725         int extract_length = 0;
2726
2727         if (s->is_nalff) {
2728             int i;
2729             for (i = 0; i < s->nal_length_size; i++)
2730                 extract_length = (extract_length << 8) | buf[i];
2731             buf    += s->nal_length_size;
2732             length -= s->nal_length_size;
2733
2734             if (extract_length > length) {
2735                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2736                 ret = AVERROR_INVALIDDATA;
2737                 goto fail;
2738             }
2739         } else {
2740             if (buf[2] == 0) {
2741                 length--;
2742                 buf++;
2743                 continue;
2744             }
2745             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2746                 ret = AVERROR_INVALIDDATA;
2747                 goto fail;
2748             }
2749
2750             buf           += 3;
2751             length        -= 3;
2752             extract_length = length;
2753         }
2754
2755         if (s->nals_allocated < s->nb_nals + 1) {
2756             int new_size = s->nals_allocated + 1;
2757             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2758             if (!tmp) {
2759                 ret = AVERROR(ENOMEM);
2760                 goto fail;
2761             }
2762             s->nals = tmp;
2763             memset(s->nals + s->nals_allocated, 0,
2764                    (new_size - s->nals_allocated) * sizeof(*tmp));
2765             s->nals_allocated = new_size;
2766         }
2767         nal = &s->nals[s->nb_nals++];
2768
2769         consumed = extract_rbsp(buf, extract_length, nal);
2770         if (consumed < 0) {
2771             ret = consumed;
2772             goto fail;
2773         }
2774
2775         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2776         if (ret < 0)
2777             goto fail;
2778         hls_nal_unit(s);
2779
2780         if (s->nal_unit_type == NAL_EOB_NUT ||
2781             s->nal_unit_type == NAL_EOS_NUT)
2782             s->eos = 1;
2783
2784         buf    += consumed;
2785         length -= consumed;
2786     }
2787
2788     /* parse the NAL units */
2789     for (i = 0; i < s->nb_nals; i++) {
2790         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2791         if (ret < 0) {
2792             av_log(s->avctx, AV_LOG_WARNING,
2793                    "Error parsing NAL unit #%d.\n", i);
2794             goto fail;
2795         }
2796     }
2797
2798 fail:
2799     if (s->ref)
2800         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2801
2802     return ret;
2803 }
2804
2805 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2806 {
2807     int i;
2808     for (i = 0; i < 16; i++)
2809         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2810 }
2811
2812 static int verify_md5(HEVCContext *s, AVFrame *frame)
2813 {
2814     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2815     int pixel_shift;
2816     int i, j;
2817
2818     if (!desc)
2819         return AVERROR(EINVAL);
2820
2821     pixel_shift = desc->comp[0].depth_minus1 > 7;
2822
2823     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2824            s->poc);
2825
2826     /* the checksums are LE, so we have to byteswap for >8bpp formats
2827      * on BE arches */
2828 #if HAVE_BIGENDIAN
2829     if (pixel_shift && !s->checksum_buf) {
2830         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2831                        FFMAX3(frame->linesize[0], frame->linesize[1],
2832                               frame->linesize[2]));
2833         if (!s->checksum_buf)
2834             return AVERROR(ENOMEM);
2835     }
2836 #endif
2837
2838     for (i = 0; frame->data[i]; i++) {
2839         int width  = s->avctx->coded_width;
2840         int height = s->avctx->coded_height;
2841         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2842         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2843         uint8_t md5[16];
2844
2845         av_md5_init(s->md5_ctx);
2846         for (j = 0; j < h; j++) {
2847             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2848 #if HAVE_BIGENDIAN
2849             if (pixel_shift) {
2850                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2851                                     (const uint16_t *) src, w);
2852                 src = s->checksum_buf;
2853             }
2854 #endif
2855             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2856         }
2857         av_md5_final(s->md5_ctx, md5);
2858
2859         if (!memcmp(md5, s->md5[i], 16)) {
2860             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2861             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2862             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2863         } else {
2864             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2865             print_md5(s->avctx, AV_LOG_ERROR, md5);
2866             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2867             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2868             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2869             return AVERROR_INVALIDDATA;
2870         }
2871     }
2872
2873     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2874
2875     return 0;
2876 }
2877
2878 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2879                              AVPacket *avpkt)
2880 {
2881     int ret;
2882     HEVCContext *s = avctx->priv_data;
2883
2884     if (!avpkt->size) {
2885         ret = ff_hevc_output_frame(s, data, 1);
2886         if (ret < 0)
2887             return ret;
2888
2889         *got_output = ret;
2890         return 0;
2891     }
2892
2893     s->ref = NULL;
2894     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2895     if (ret < 0)
2896         return ret;
2897
2898     /* verify the SEI checksum */
2899     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2900         s->is_md5) {
2901         ret = verify_md5(s, s->ref->frame);
2902         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2903             ff_hevc_unref_frame(s, s->ref, ~0);
2904             return ret;
2905         }
2906     }
2907     s->is_md5 = 0;
2908
2909     if (s->is_decoded) {
2910         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2911         s->is_decoded = 0;
2912     }
2913
2914     if (s->output_frame->buf[0]) {
2915         av_frame_move_ref(data, s->output_frame);
2916         *got_output = 1;
2917     }
2918
2919     return avpkt->size;
2920 }
2921
2922 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2923 {
2924     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2925     if (ret < 0)
2926         return ret;
2927
2928     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2929     if (!dst->tab_mvf_buf)
2930         goto fail;
2931     dst->tab_mvf = src->tab_mvf;
2932
2933     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2934     if (!dst->rpl_tab_buf)
2935         goto fail;
2936     dst->rpl_tab = src->rpl_tab;
2937
2938     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2939     if (!dst->rpl_buf)
2940         goto fail;
2941
2942     dst->poc        = src->poc;
2943     dst->ctb_count  = src->ctb_count;
2944     dst->window     = src->window;
2945     dst->flags      = src->flags;
2946     dst->sequence   = src->sequence;
2947
2948     return 0;
2949 fail:
2950     ff_hevc_unref_frame(s, dst, ~0);
2951     return AVERROR(ENOMEM);
2952 }
2953
2954 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2955 {
2956     HEVCContext       *s = avctx->priv_data;
2957     int i;
2958
2959     pic_arrays_free(s);
2960
2961     av_freep(&s->md5_ctx);
2962
2963     av_frame_free(&s->tmp_frame);
2964     av_frame_free(&s->output_frame);
2965
2966     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2967         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2968         av_frame_free(&s->DPB[i].frame);
2969     }
2970
2971     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2972         av_buffer_unref(&s->vps_list[i]);
2973     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2974         av_buffer_unref(&s->sps_list[i]);
2975     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2976         av_buffer_unref(&s->pps_list[i]);
2977
2978     for (i = 0; i < s->nals_allocated; i++)
2979         av_freep(&s->nals[i].rbsp_buffer);
2980     av_freep(&s->nals);
2981     s->nals_allocated = 0;
2982
2983     return 0;
2984 }
2985
2986 static av_cold int hevc_init_context(AVCodecContext *avctx)
2987 {
2988     HEVCContext *s = avctx->priv_data;
2989     int i;
2990
2991     s->avctx = avctx;
2992
2993     s->tmp_frame = av_frame_alloc();
2994     if (!s->tmp_frame)
2995         goto fail;
2996
2997     s->output_frame = av_frame_alloc();
2998     if (!s->output_frame)
2999         goto fail;
3000
3001     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3002         s->DPB[i].frame = av_frame_alloc();
3003         if (!s->DPB[i].frame)
3004             goto fail;
3005         s->DPB[i].tf.f = s->DPB[i].frame;
3006     }
3007
3008     s->max_ra = INT_MAX;
3009
3010     s->md5_ctx = av_md5_alloc();
3011     if (!s->md5_ctx)
3012         goto fail;
3013
3014     ff_bswapdsp_init(&s->bdsp);
3015
3016     s->context_initialized = 1;
3017
3018     return 0;
3019
3020 fail:
3021     hevc_decode_free(avctx);
3022     return AVERROR(ENOMEM);
3023 }
3024
3025 static int hevc_update_thread_context(AVCodecContext *dst,
3026                                       const AVCodecContext *src)
3027 {
3028     HEVCContext *s  = dst->priv_data;
3029     HEVCContext *s0 = src->priv_data;
3030     int i, ret;
3031
3032     if (!s->context_initialized) {
3033         ret = hevc_init_context(dst);
3034         if (ret < 0)
3035             return ret;
3036     }
3037
3038     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3039         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3040         if (s0->DPB[i].frame->buf[0]) {
3041             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3042             if (ret < 0)
3043                 return ret;
3044         }
3045     }
3046
3047     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3048         av_buffer_unref(&s->vps_list[i]);
3049         if (s0->vps_list[i]) {
3050             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3051             if (!s->vps_list[i])
3052                 return AVERROR(ENOMEM);
3053         }
3054     }
3055
3056     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3057         av_buffer_unref(&s->sps_list[i]);
3058         if (s0->sps_list[i]) {
3059             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3060             if (!s->sps_list[i])
3061                 return AVERROR(ENOMEM);
3062         }
3063     }
3064
3065     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3066         av_buffer_unref(&s->pps_list[i]);
3067         if (s0->pps_list[i]) {
3068             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3069             if (!s->pps_list[i])
3070                 return AVERROR(ENOMEM);
3071         }
3072     }
3073
3074     if (s->sps != s0->sps)
3075         ret = set_sps(s, s0->sps);
3076
3077     s->seq_decode = s0->seq_decode;
3078     s->seq_output = s0->seq_output;
3079     s->pocTid0    = s0->pocTid0;
3080     s->max_ra     = s0->max_ra;
3081
3082     s->is_nalff        = s0->is_nalff;
3083     s->nal_length_size = s0->nal_length_size;
3084
3085     if (s0->eos) {
3086         s->seq_decode = (s->seq_decode + 1) & 0xff;
3087         s->max_ra = INT_MAX;
3088     }
3089
3090     return 0;
3091 }
3092
3093 static int hevc_decode_extradata(HEVCContext *s)
3094 {
3095     AVCodecContext *avctx = s->avctx;
3096     GetByteContext gb;
3097     int ret;
3098
3099     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3100
3101     if (avctx->extradata_size > 3 &&
3102         (avctx->extradata[0] || avctx->extradata[1] ||
3103          avctx->extradata[2] > 1)) {
3104         /* It seems the extradata is encoded as hvcC format.
3105          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3106          * is finalized. When finalized, configurationVersion will be 1 and we
3107          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3108         int i, j, num_arrays, nal_len_size;
3109
3110         s->is_nalff = 1;
3111
3112         bytestream2_skip(&gb, 21);
3113         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3114         num_arrays   = bytestream2_get_byte(&gb);
3115
3116         /* nal units in the hvcC always have length coded with 2 bytes,
3117          * so put a fake nal_length_size = 2 while parsing them */
3118         s->nal_length_size = 2;
3119
3120         /* Decode nal units from hvcC. */
3121         for (i = 0; i < num_arrays; i++) {
3122             int type = bytestream2_get_byte(&gb) & 0x3f;
3123             int cnt  = bytestream2_get_be16(&gb);
3124
3125             for (j = 0; j < cnt; j++) {
3126                 // +2 for the nal size field
3127                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3128                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3129                     av_log(s->avctx, AV_LOG_ERROR,
3130                            "Invalid NAL unit size in extradata.\n");
3131                     return AVERROR_INVALIDDATA;
3132                 }
3133
3134                 ret = decode_nal_units(s, gb.buffer, nalsize);
3135                 if (ret < 0) {
3136                     av_log(avctx, AV_LOG_ERROR,
3137                            "Decoding nal unit %d %d from hvcC failed\n",
3138                            type, i);
3139                     return ret;
3140                 }
3141                 bytestream2_skip(&gb, nalsize);
3142             }
3143         }
3144
3145         /* Now store right nal length size, that will be used to parse
3146          * all other nals */
3147         s->nal_length_size = nal_len_size;
3148     } else {
3149         s->is_nalff = 0;
3150         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3151         if (ret < 0)
3152             return ret;
3153     }
3154     return 0;
3155 }
3156
3157 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3158 {
3159     HEVCContext *s = avctx->priv_data;
3160     int ret;
3161
3162     ff_init_cabac_states();
3163
3164     avctx->internal->allocate_progress = 1;
3165
3166     ret = hevc_init_context(avctx);
3167     if (ret < 0)
3168         return ret;
3169
3170     if (avctx->extradata_size > 0 && avctx->extradata) {
3171         ret = hevc_decode_extradata(s);
3172         if (ret < 0) {
3173             hevc_decode_free(avctx);
3174             return ret;
3175         }
3176     }
3177
3178     return 0;
3179 }
3180
3181 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3182 {
3183     HEVCContext *s = avctx->priv_data;
3184     int ret;
3185
3186     memset(s, 0, sizeof(*s));
3187
3188     ret = hevc_init_context(avctx);
3189     if (ret < 0)
3190         return ret;
3191
3192     return 0;
3193 }
3194
3195 static void hevc_decode_flush(AVCodecContext *avctx)
3196 {
3197     HEVCContext *s = avctx->priv_data;
3198     ff_hevc_flush_dpb(s);
3199     s->max_ra = INT_MAX;
3200 }
3201
3202 #define OFFSET(x) offsetof(HEVCContext, x)
3203 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3204
3205 static const AVProfile profiles[] = {
3206     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3207     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3208     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3209     { FF_PROFILE_UNKNOWN },
3210 };
3211
3212 static const AVOption options[] = {
3213     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3214         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3215     { NULL },
3216 };
3217
3218 static const AVClass hevc_decoder_class = {
3219     .class_name = "HEVC decoder",
3220     .item_name  = av_default_item_name,
3221     .option     = options,
3222     .version    = LIBAVUTIL_VERSION_INT,
3223 };
3224
3225 AVCodec ff_hevc_decoder = {
3226     .name                  = "hevc",
3227     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3228     .type                  = AVMEDIA_TYPE_VIDEO,
3229     .id                    = AV_CODEC_ID_HEVC,
3230     .priv_data_size        = sizeof(HEVCContext),
3231     .priv_class            = &hevc_decoder_class,
3232     .init                  = hevc_decode_init,
3233     .close                 = hevc_decode_free,
3234     .decode                = hevc_decode_frame,
3235     .flush                 = hevc_decode_flush,
3236     .update_thread_context = hevc_update_thread_context,
3237     .init_thread_copy      = hevc_init_thread_copy,
3238     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3239                              CODEC_CAP_FRAME_THREADS,
3240     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3241 };