]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: reorder loops
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
387 {
388     int ret;
389     unsigned int num = 0, den = 0;
390
391     pic_arrays_free(s);
392     ret = pic_arrays_init(s, sps);
393     if (ret < 0)
394         goto fail;
395
396     s->avctx->coded_width         = sps->width;
397     s->avctx->coded_height        = sps->height;
398     s->avctx->width               = sps->output_width;
399     s->avctx->height              = sps->output_height;
400     s->avctx->pix_fmt             = sps->pix_fmt;
401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
402
403     ff_set_sar(s->avctx, sps->vui.sar);
404
405     if (sps->vui.video_signal_type_present_flag)
406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
407                                                                : AVCOL_RANGE_MPEG;
408     else
409         s->avctx->color_range = AVCOL_RANGE_MPEG;
410
411     if (sps->vui.colour_description_present_flag) {
412         s->avctx->color_primaries = sps->vui.colour_primaries;
413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
415     } else {
416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
419     }
420
421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
424
425     if (sps->sao_enabled) {
426         av_frame_unref(s->tmp_frame);
427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
428         if (ret < 0)
429             goto fail;
430         s->frame = s->tmp_frame;
431     }
432
433     s->sps = sps;
434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
435
436     if (s->vps->vps_timing_info_present_flag) {
437         num = s->vps->vps_num_units_in_tick;
438         den = s->vps->vps_time_scale;
439     } else if (sps->vui.vui_timing_info_present_flag) {
440         num = sps->vui.vui_num_units_in_tick;
441         den = sps->vui.vui_time_scale;
442     }
443
444     if (num != 0 && den != 0)
445         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
446                   num, den, 1 << 30);
447
448     return 0;
449
450 fail:
451     pic_arrays_free(s);
452     s->sps = NULL;
453     return ret;
454 }
455
456 static int hls_slice_header(HEVCContext *s)
457 {
458     GetBitContext *gb = &s->HEVClc.gb;
459     SliceHeader *sh   = &s->sh;
460     int i, ret;
461
462     // Coded parameters
463     sh->first_slice_in_pic_flag = get_bits1(gb);
464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
465         s->seq_decode = (s->seq_decode + 1) & 0xff;
466         s->max_ra     = INT_MAX;
467         if (IS_IDR(s))
468             ff_hevc_clear_refs(s);
469     }
470     if (IS_IRAP(s))
471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
472
473     sh->pps_id = get_ue_golomb_long(gb);
474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
476         return AVERROR_INVALIDDATA;
477     }
478     if (!sh->first_slice_in_pic_flag &&
479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
481         return AVERROR_INVALIDDATA;
482     }
483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
484
485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
487
488         ff_hevc_clear_refs(s);
489         ret = set_sps(s, s->sps);
490         if (ret < 0)
491             return ret;
492
493         s->seq_decode = (s->seq_decode + 1) & 0xff;
494         s->max_ra     = INT_MAX;
495     }
496
497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
499
500     sh->dependent_slice_segment_flag = 0;
501     if (!sh->first_slice_in_pic_flag) {
502         int slice_address_length;
503
504         if (s->pps->dependent_slice_segments_enabled_flag)
505             sh->dependent_slice_segment_flag = get_bits1(gb);
506
507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
508                                             s->sps->ctb_height);
509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
511             av_log(s->avctx, AV_LOG_ERROR,
512                    "Invalid slice segment address: %u.\n",
513                    sh->slice_segment_addr);
514             return AVERROR_INVALIDDATA;
515         }
516
517         if (!sh->dependent_slice_segment_flag) {
518             sh->slice_addr = sh->slice_segment_addr;
519             s->slice_idx++;
520         }
521     } else {
522         sh->slice_segment_addr = sh->slice_addr = 0;
523         s->slice_idx           = 0;
524         s->slice_initialized   = 0;
525     }
526
527     if (!sh->dependent_slice_segment_flag) {
528         s->slice_initialized = 0;
529
530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
532
533         sh->slice_type = get_ue_golomb_long(gb);
534         if (!(sh->slice_type == I_SLICE ||
535               sh->slice_type == P_SLICE ||
536               sh->slice_type == B_SLICE)) {
537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
538                    sh->slice_type);
539             return AVERROR_INVALIDDATA;
540         }
541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
543             return AVERROR_INVALIDDATA;
544         }
545
546         // when flag is not present, picture is inferred to be output
547         sh->pic_output_flag = 1;
548         if (s->pps->output_flag_present_flag)
549             sh->pic_output_flag = get_bits1(gb);
550
551         if (s->sps->separate_colour_plane_flag)
552             sh->colour_plane_id = get_bits(gb, 2);
553
554         if (!IS_IDR(s)) {
555             int short_term_ref_pic_set_sps_flag, poc;
556
557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
560                 av_log(s->avctx, AV_LOG_WARNING,
561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
563                     return AVERROR_INVALIDDATA;
564                 poc = s->poc;
565             }
566             s->poc = poc;
567
568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
569             if (!short_term_ref_pic_set_sps_flag) {
570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
571                 if (ret < 0)
572                     return ret;
573
574                 sh->short_term_rps = &sh->slice_rps;
575             } else {
576                 int numbits, rps_idx;
577
578                 if (!s->sps->nb_st_rps) {
579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
580                     return AVERROR_INVALIDDATA;
581                 }
582
583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
586             }
587
588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
589             if (ret < 0) {
590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
592                     return AVERROR_INVALIDDATA;
593             }
594
595             if (s->sps->sps_temporal_mvp_enabled_flag)
596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
597             else
598                 sh->slice_temporal_mvp_enabled_flag = 0;
599         } else {
600             s->sh.short_term_rps = NULL;
601             s->poc               = 0;
602         }
603
604         /* 8.3.1 */
605         if (s->temporal_id == 0 &&
606             s->nal_unit_type != NAL_TRAIL_N &&
607             s->nal_unit_type != NAL_TSA_N   &&
608             s->nal_unit_type != NAL_STSA_N  &&
609             s->nal_unit_type != NAL_RADL_N  &&
610             s->nal_unit_type != NAL_RADL_R  &&
611             s->nal_unit_type != NAL_RASL_N  &&
612             s->nal_unit_type != NAL_RASL_R)
613             s->pocTid0 = s->poc;
614
615         if (s->sps->sao_enabled) {
616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
617             sh->slice_sample_adaptive_offset_flag[1] =
618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
619         } else {
620             sh->slice_sample_adaptive_offset_flag[0] = 0;
621             sh->slice_sample_adaptive_offset_flag[1] = 0;
622             sh->slice_sample_adaptive_offset_flag[2] = 0;
623         }
624
625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
627             int nb_refs;
628
629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
630             if (sh->slice_type == B_SLICE)
631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
632
633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
635                 if (sh->slice_type == B_SLICE)
636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
637             }
638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
640                        sh->nb_refs[L0], sh->nb_refs[L1]);
641                 return AVERROR_INVALIDDATA;
642             }
643
644             sh->rpl_modification_flag[0] = 0;
645             sh->rpl_modification_flag[1] = 0;
646             nb_refs = ff_hevc_frame_nb_refs(s);
647             if (!nb_refs) {
648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
649                 return AVERROR_INVALIDDATA;
650             }
651
652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
653                 sh->rpl_modification_flag[0] = get_bits1(gb);
654                 if (sh->rpl_modification_flag[0]) {
655                     for (i = 0; i < sh->nb_refs[L0]; i++)
656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658
659                 if (sh->slice_type == B_SLICE) {
660                     sh->rpl_modification_flag[1] = get_bits1(gb);
661                     if (sh->rpl_modification_flag[1] == 1)
662                         for (i = 0; i < sh->nb_refs[L1]; i++)
663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
664                 }
665             }
666
667             if (sh->slice_type == B_SLICE)
668                 sh->mvd_l1_zero_flag = get_bits1(gb);
669
670             if (s->pps->cabac_init_present_flag)
671                 sh->cabac_init_flag = get_bits1(gb);
672             else
673                 sh->cabac_init_flag = 0;
674
675             sh->collocated_ref_idx = 0;
676             if (sh->slice_temporal_mvp_enabled_flag) {
677                 sh->collocated_list = L0;
678                 if (sh->slice_type == B_SLICE)
679                     sh->collocated_list = !get_bits1(gb);
680
681                 if (sh->nb_refs[sh->collocated_list] > 1) {
682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
684                         av_log(s->avctx, AV_LOG_ERROR,
685                                "Invalid collocated_ref_idx: %d.\n",
686                                sh->collocated_ref_idx);
687                         return AVERROR_INVALIDDATA;
688                     }
689                 }
690             }
691
692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
694                 pred_weight_table(s, gb);
695             }
696
697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
699                 av_log(s->avctx, AV_LOG_ERROR,
700                        "Invalid number of merging MVP candidates: %d.\n",
701                        sh->max_num_merge_cand);
702                 return AVERROR_INVALIDDATA;
703             }
704         }
705
706         sh->slice_qp_delta = get_se_golomb(gb);
707
708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
709             sh->slice_cb_qp_offset = get_se_golomb(gb);
710             sh->slice_cr_qp_offset = get_se_golomb(gb);
711         } else {
712             sh->slice_cb_qp_offset = 0;
713             sh->slice_cr_qp_offset = 0;
714         }
715
716         if (s->pps->deblocking_filter_control_present_flag) {
717             int deblocking_filter_override_flag = 0;
718
719             if (s->pps->deblocking_filter_override_enabled_flag)
720                 deblocking_filter_override_flag = get_bits1(gb);
721
722             if (deblocking_filter_override_flag) {
723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
724                 if (!sh->disable_deblocking_filter_flag) {
725                     sh->beta_offset = get_se_golomb(gb) * 2;
726                     sh->tc_offset   = get_se_golomb(gb) * 2;
727                 }
728             } else {
729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
730                 sh->beta_offset                    = s->pps->beta_offset;
731                 sh->tc_offset                      = s->pps->tc_offset;
732             }
733         } else {
734             sh->disable_deblocking_filter_flag = 0;
735             sh->beta_offset                    = 0;
736             sh->tc_offset                      = 0;
737         }
738
739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
740             (sh->slice_sample_adaptive_offset_flag[0] ||
741              sh->slice_sample_adaptive_offset_flag[1] ||
742              !sh->disable_deblocking_filter_flag)) {
743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
744         } else {
745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
746         }
747     } else if (!s->slice_initialized) {
748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
749         return AVERROR_INVALIDDATA;
750     }
751
752     sh->num_entry_point_offsets = 0;
753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
755         if (sh->num_entry_point_offsets > 0) {
756             int offset_len = get_ue_golomb_long(gb) + 1;
757
758             for (i = 0; i < sh->num_entry_point_offsets; i++)
759                 skip_bits(gb, offset_len);
760         }
761     }
762
763     if (s->pps->slice_header_extension_present_flag) {
764         unsigned int length = get_ue_golomb_long(gb);
765         for (i = 0; i < length; i++)
766             skip_bits(gb, 8);  // slice_header_extension_data_byte
767     }
768
769     // Inferred parameters
770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771     if (sh->slice_qp > 51 ||
772         sh->slice_qp < -s->sps->qp_bd_offset) {
773         av_log(s->avctx, AV_LOG_ERROR,
774                "The slice_qp %d is outside the valid range "
775                "[%d, 51].\n",
776                sh->slice_qp,
777                -s->sps->qp_bd_offset);
778         return AVERROR_INVALIDDATA;
779     }
780
781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
782
783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785         return AVERROR_INVALIDDATA;
786     }
787
788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
789
790     if (!s->pps->cu_qp_delta_enabled_flag)
791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
793
794     s->slice_initialized = 1;
795
796     return 0;
797 }
798
799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
800
801 #define SET_SAO(elem, value)                            \
802 do {                                                    \
803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
804         sao->elem = value;                              \
805     else if (sao_merge_left_flag)                       \
806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
807     else if (sao_merge_up_flag)                         \
808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
809     else                                                \
810         sao->elem = 0;                                  \
811 } while (0)
812
813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
814 {
815     HEVCLocalContext *lc    = &s->HEVClc;
816     int sao_merge_left_flag = 0;
817     int sao_merge_up_flag   = 0;
818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
819     SAOParams *sao          = &CTB(s->sao, rx, ry);
820     int c_idx, i;
821
822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
823         s->sh.slice_sample_adaptive_offset_flag[1]) {
824         if (rx > 0) {
825             if (lc->ctb_left_flag)
826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
827         }
828         if (ry > 0 && !sao_merge_left_flag) {
829             if (lc->ctb_up_flag)
830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
831         }
832     }
833
834     for (c_idx = 0; c_idx < 3; c_idx++) {
835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
837             continue;
838         }
839
840         if (c_idx == 2) {
841             sao->type_idx[2] = sao->type_idx[1];
842             sao->eo_class[2] = sao->eo_class[1];
843         } else {
844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
845         }
846
847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
848             continue;
849
850         for (i = 0; i < 4; i++)
851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
852
853         if (sao->type_idx[c_idx] == SAO_BAND) {
854             for (i = 0; i < 4; i++) {
855                 if (sao->offset_abs[c_idx][i]) {
856                     SET_SAO(offset_sign[c_idx][i],
857                             ff_hevc_sao_offset_sign_decode(s));
858                 } else {
859                     sao->offset_sign[c_idx][i] = 0;
860                 }
861             }
862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
863         } else if (c_idx != 2) {
864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
865         }
866
867         // Inferred parameters
868         sao->offset_val[c_idx][0] = 0;
869         for (i = 0; i < 4; i++) {
870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
871             if (sao->type_idx[c_idx] == SAO_EDGE) {
872                 if (i > 1)
873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
874             } else if (sao->offset_sign[c_idx][i]) {
875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
876             }
877         }
878     }
879 }
880
881 #undef SET_SAO
882 #undef CTB
883
884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
885                                 int log2_trafo_size, enum ScanType scan_idx,
886                                 int c_idx)
887 {
888 #define GET_COORD(offset, n)                                    \
889     do {                                                        \
890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
892     } while (0)
893     HEVCLocalContext *lc    = &s->HEVClc;
894     int transform_skip_flag = 0;
895
896     int last_significant_coeff_x, last_significant_coeff_y;
897     int last_scan_pos;
898     int n_end;
899     int num_coeff    = 0;
900     int greater1_ctx = 1;
901
902     int num_last_subset;
903     int x_cg_last_sig, y_cg_last_sig;
904
905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
906
907     ptrdiff_t stride = s->frame->linesize[c_idx];
908     int hshift       = s->sps->hshift[c_idx];
909     int vshift       = s->sps->vshift[c_idx];
910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
914
915     int trafo_size = 1 << log2_trafo_size;
916     int i, qp, shift, add, scale, scale_m;
917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
918     const uint8_t *scale_matrix;
919     uint8_t dc_scale;
920
921     // Derive QP for dequant
922     if (!lc->cu.cu_transquant_bypass_flag) {
923         static const int qp_c[] = {
924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
925         };
926
927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
931         };
932
933         static const uint8_t div6[51 + 2 * 6 + 1] = {
934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
937         };
938         int qp_y = lc->qp_y;
939
940         if (c_idx == 0) {
941             qp = qp_y + s->sps->qp_bd_offset;
942         } else {
943             int qp_i, offset;
944
945             if (c_idx == 1)
946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
947             else
948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
949
950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
951             if (qp_i < 30)
952                 qp = qp_i;
953             else if (qp_i > 43)
954                 qp = qp_i - 6;
955             else
956                 qp = qp_c[qp_i - 30];
957
958             qp += s->sps->qp_bd_offset;
959         }
960
961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
962         add      = 1 << (shift - 1);
963         scale    = level_scale[rem6[qp]] << (div6[qp]);
964         scale_m  = 16; // default when no custom scaling lists.
965         dc_scale = 16;
966
967         if (s->sps->scaling_list_enable_flag) {
968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
969                                     &s->pps->scaling_list : &s->sps->scaling_list;
970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
971
972             if (log2_trafo_size != 5)
973                 matrix_id = 3 * matrix_id + c_idx;
974
975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
976             if (log2_trafo_size >= 4)
977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
978         }
979     }
980
981     if (s->pps->transform_skip_enabled_flag &&
982         !lc->cu.cu_transquant_bypass_flag   &&
983         log2_trafo_size == 2) {
984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
985     }
986
987     last_significant_coeff_x =
988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
989     last_significant_coeff_y =
990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
991
992     if (last_significant_coeff_x > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_x & 1)) +
996                                    suffix;
997     }
998
999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int trafo_depth, int blk_idx,
1237                               int cbf_luma, int cbf_cb, int cbf_cr)
1238 {
1239     HEVCLocalContext *lc = &s->HEVClc;
1240
1241     if (lc->cu.pred_mode == MODE_INTRA) {
1242         int trafo_size = 1 << log2_trafo_size;
1243         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1244
1245         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1246         if (log2_trafo_size > 2) {
1247             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1248             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1250             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1251         } else if (blk_idx == 3) {
1252             trafo_size = trafo_size << s->sps->hshift[1];
1253             ff_hevc_set_neighbour_available(s, xBase, yBase,
1254                                             trafo_size, trafo_size);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1256             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1257         }
1258     }
1259
1260     if (cbf_luma || cbf_cb || cbf_cr) {
1261         int scan_idx   = SCAN_DIAG;
1262         int scan_idx_c = SCAN_DIAG;
1263
1264         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1265             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1266             if (lc->tu.cu_qp_delta != 0)
1267                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1268                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1269             lc->tu.is_cu_qp_delta_coded = 1;
1270
1271             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1272                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1273                 av_log(s->avctx, AV_LOG_ERROR,
1274                        "The cu_qp_delta %d is outside the valid range "
1275                        "[%d, %d].\n",
1276                        lc->tu.cu_qp_delta,
1277                        -(26 + s->sps->qp_bd_offset / 2),
1278                         (25 + s->sps->qp_bd_offset / 2));
1279                 return AVERROR_INVALIDDATA;
1280             }
1281
1282             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1283         }
1284
1285         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1286             if (lc->tu.cur_intra_pred_mode >= 6 &&
1287                 lc->tu.cur_intra_pred_mode <= 14) {
1288                 scan_idx = SCAN_VERT;
1289             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1290                        lc->tu.cur_intra_pred_mode <= 30) {
1291                 scan_idx = SCAN_HORIZ;
1292             }
1293
1294             if (lc->pu.intra_pred_mode_c >=  6 &&
1295                 lc->pu.intra_pred_mode_c <= 14) {
1296                 scan_idx_c = SCAN_VERT;
1297             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1298                        lc->pu.intra_pred_mode_c <= 30) {
1299                 scan_idx_c = SCAN_HORIZ;
1300             }
1301         }
1302
1303         if (cbf_luma)
1304             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1305         if (log2_trafo_size > 2) {
1306             if (cbf_cb)
1307                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1308             if (cbf_cr)
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1310         } else if (blk_idx == 3) {
1311             if (cbf_cb)
1312                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1313             if (cbf_cr)
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1315         }
1316     }
1317     return 0;
1318 }
1319
1320 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1321 {
1322     int cb_size          = 1 << log2_cb_size;
1323     int log2_min_pu_size = s->sps->log2_min_pu_size;
1324
1325     int min_pu_width     = s->sps->min_pu_width;
1326     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1327     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1328     int i, j;
1329
1330     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1331         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1332             s->is_pcm[i + j * min_pu_width] = 2;
1333 }
1334
1335 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1336                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1337                               int log2_cb_size, int log2_trafo_size,
1338                               int trafo_depth, int blk_idx,
1339                               int cbf_cb, int cbf_cr)
1340 {
1341     HEVCLocalContext *lc = &s->HEVClc;
1342     uint8_t split_transform_flag;
1343     int ret;
1344
1345     if (lc->cu.intra_split_flag) {
1346         if (trafo_depth == 1)
1347             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1348     } else {
1349         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1350     }
1351
1352     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1353         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1354         trafo_depth     < lc->cu.max_trafo_depth       &&
1355         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1356         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1357     } else {
1358         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1359                           lc->cu.pred_mode == MODE_INTER &&
1360                           lc->cu.part_mode != PART_2Nx2N &&
1361                           trafo_depth == 0;
1362
1363         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1364                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1365                                inter_split;
1366     }
1367
1368     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1369         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1370     else if (log2_trafo_size > 2 || trafo_depth == 0)
1371         cbf_cb = 0;
1372     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1373         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1374     else if (log2_trafo_size > 2 || trafo_depth == 0)
1375         cbf_cr = 0;
1376
1377     if (split_transform_flag) {
1378         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1379         const int x1 = x0 + trafo_size_split;
1380         const int y1 = y0 + trafo_size_split;
1381
1382 #define SUBDIVIDE(x, y, idx)                                                    \
1383 do {                                                                            \
1384     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1385                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1386                              cbf_cb, cbf_cr);                                   \
1387     if (ret < 0)                                                                \
1388         return ret;                                                             \
1389 } while (0)
1390
1391         SUBDIVIDE(x0, y0, 0);
1392         SUBDIVIDE(x1, y0, 1);
1393         SUBDIVIDE(x0, y1, 2);
1394         SUBDIVIDE(x1, y1, 3);
1395
1396 #undef SUBDIVIDE
1397     } else {
1398         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1399         int log2_min_tu_size = s->sps->log2_min_tb_size;
1400         int min_tu_width     = s->sps->min_tb_width;
1401         int cbf_luma         = 1;
1402
1403         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1404             cbf_cb || cbf_cr)
1405             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1406
1407         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1408                                  log2_cb_size, log2_trafo_size, trafo_depth,
1409                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1410         if (ret < 0)
1411             return ret;
1412         // TODO: store cbf_luma somewhere else
1413         if (cbf_luma) {
1414             int i, j;
1415             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1416                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1417                     int x_tu = (x0 + j) >> log2_min_tu_size;
1418                     int y_tu = (y0 + i) >> log2_min_tu_size;
1419                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1420                 }
1421         }
1422         if (!s->sh.disable_deblocking_filter_flag) {
1423             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1424             if (s->pps->transquant_bypass_enable_flag &&
1425                 lc->cu.cu_transquant_bypass_flag)
1426                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1427         }
1428     }
1429     return 0;
1430 }
1431
1432 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1433 {
1434     //TODO: non-4:2:0 support
1435     HEVCLocalContext *lc = &s->HEVClc;
1436     GetBitContext gb;
1437     int cb_size   = 1 << log2_cb_size;
1438     int stride0   = s->frame->linesize[0];
1439     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1440     int   stride1 = s->frame->linesize[1];
1441     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1442     int   stride2 = s->frame->linesize[2];
1443     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1444
1445     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1446     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1447     int ret;
1448
1449     if (!s->sh.disable_deblocking_filter_flag)
1450         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1451
1452     ret = init_get_bits(&gb, pcm, length);
1453     if (ret < 0)
1454         return ret;
1455
1456     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1457     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1458     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1459     return 0;
1460 }
1461
1462 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1463 {
1464     HEVCLocalContext *lc = &s->HEVClc;
1465     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1466     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1467
1468     if (x)
1469         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1470     if (y)
1471         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1472
1473     switch (x) {
1474     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1475     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1476     case 0: lc->pu.mvd.x = 0;                               break;
1477     }
1478
1479     switch (y) {
1480     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1481     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1482     case 0: lc->pu.mvd.y = 0;                               break;
1483     }
1484 }
1485
1486 /**
1487  * 8.5.3.2.2.1 Luma sample interpolation process
1488  *
1489  * @param s HEVC decoding context
1490  * @param dst target buffer for block data at block position
1491  * @param dststride stride of the dst buffer
1492  * @param ref reference picture buffer at origin (0, 0)
1493  * @param mv motion vector (relative to block position) to get pixel data from
1494  * @param x_off horizontal position of block from origin (0, 0)
1495  * @param y_off vertical position of block from origin (0, 0)
1496  * @param block_w width of block
1497  * @param block_h height of block
1498  */
1499 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1500                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1501                     int block_w, int block_h)
1502 {
1503     HEVCLocalContext *lc = &s->HEVClc;
1504     uint8_t *src         = ref->data[0];
1505     ptrdiff_t srcstride  = ref->linesize[0];
1506     int pic_width        = s->sps->width;
1507     int pic_height       = s->sps->height;
1508
1509     int mx         = mv->x & 3;
1510     int my         = mv->y & 3;
1511     int extra_left = ff_hevc_qpel_extra_before[mx];
1512     int extra_top  = ff_hevc_qpel_extra_before[my];
1513
1514     x_off += mv->x >> 2;
1515     y_off += mv->y >> 2;
1516     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1517
1518     if (x_off < extra_left || y_off < extra_top ||
1519         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1520         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1521         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1522         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1523         int buf_offset = extra_top *
1524                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1525
1526         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1527                                  edge_emu_stride, srcstride,
1528                                  block_w + ff_hevc_qpel_extra[mx],
1529                                  block_h + ff_hevc_qpel_extra[my],
1530                                  x_off - extra_left, y_off - extra_top,
1531                                  pic_width, pic_height);
1532         src = lc->edge_emu_buffer + buf_offset;
1533         srcstride = edge_emu_stride;
1534     }
1535     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1536                                      block_h, lc->mc_buffer);
1537 }
1538
1539 /**
1540  * 8.5.3.2.2.2 Chroma sample interpolation process
1541  *
1542  * @param s HEVC decoding context
1543  * @param dst1 target buffer for block data at block position (U plane)
1544  * @param dst2 target buffer for block data at block position (V plane)
1545  * @param dststride stride of the dst1 and dst2 buffers
1546  * @param ref reference picture buffer at origin (0, 0)
1547  * @param mv motion vector (relative to block position) to get pixel data from
1548  * @param x_off horizontal position of block from origin (0, 0)
1549  * @param y_off vertical position of block from origin (0, 0)
1550  * @param block_w width of block
1551  * @param block_h height of block
1552  */
1553 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1554                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1555                       int x_off, int y_off, int block_w, int block_h)
1556 {
1557     HEVCLocalContext *lc = &s->HEVClc;
1558     uint8_t *src1        = ref->data[1];
1559     uint8_t *src2        = ref->data[2];
1560     ptrdiff_t src1stride = ref->linesize[1];
1561     ptrdiff_t src2stride = ref->linesize[2];
1562     int pic_width        = s->sps->width >> 1;
1563     int pic_height       = s->sps->height >> 1;
1564
1565     int mx = mv->x & 7;
1566     int my = mv->y & 7;
1567
1568     x_off += mv->x >> 3;
1569     y_off += mv->y >> 3;
1570     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1571     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1572
1573     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1574         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1575         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1576         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1577         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1578         int buf_offset1 = EPEL_EXTRA_BEFORE *
1579                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1580         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1581         int buf_offset2 = EPEL_EXTRA_BEFORE *
1582                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1583
1584         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1585                                  edge_emu_stride, src1stride,
1586                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1587                                  x_off - EPEL_EXTRA_BEFORE,
1588                                  y_off - EPEL_EXTRA_BEFORE,
1589                                  pic_width, pic_height);
1590
1591         src1 = lc->edge_emu_buffer + buf_offset1;
1592         src1stride = edge_emu_stride;
1593         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1594                                              block_w, block_h, mx, my, lc->mc_buffer);
1595
1596         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1597                                  edge_emu_stride, src2stride,
1598                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1599                                  x_off - EPEL_EXTRA_BEFORE,
1600                                  y_off - EPEL_EXTRA_BEFORE,
1601                                  pic_width, pic_height);
1602         src2 = lc->edge_emu_buffer + buf_offset2;
1603         src2stride = edge_emu_stride;
1604
1605         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1606                                              block_w, block_h, mx, my,
1607                                              lc->mc_buffer);
1608     } else {
1609         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1610                                              block_w, block_h, mx, my,
1611                                              lc->mc_buffer);
1612         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1613                                              block_w, block_h, mx, my,
1614                                              lc->mc_buffer);
1615     }
1616 }
1617
1618 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1619                                 const Mv *mv, int y0, int height)
1620 {
1621     int y = (mv->y >> 2) + y0 + height + 9;
1622     ff_thread_await_progress(&ref->tf, y, 0);
1623 }
1624
1625 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1626                                 int nPbW, int nPbH,
1627                                 int log2_cb_size, int partIdx)
1628 {
1629 #define POS(c_idx, x, y)                                                              \
1630     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1631                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1632     HEVCLocalContext *lc = &s->HEVClc;
1633     int merge_idx = 0;
1634     struct MvField current_mv = {{{ 0 }}};
1635
1636     int min_pu_width = s->sps->min_pu_width;
1637
1638     MvField *tab_mvf = s->ref->tab_mvf;
1639     RefPicList  *refPicList = s->ref->refPicList;
1640     HEVCFrame *ref0, *ref1;
1641
1642     int tmpstride = MAX_PB_SIZE;
1643
1644     uint8_t *dst0 = POS(0, x0, y0);
1645     uint8_t *dst1 = POS(1, x0, y0);
1646     uint8_t *dst2 = POS(2, x0, y0);
1647     int log2_min_cb_size = s->sps->log2_min_cb_size;
1648     int min_cb_width     = s->sps->min_cb_width;
1649     int x_cb             = x0 >> log2_min_cb_size;
1650     int y_cb             = y0 >> log2_min_cb_size;
1651     int ref_idx[2];
1652     int mvp_flag[2];
1653     int x_pu, y_pu;
1654     int i, j;
1655
1656     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1657         if (s->sh.max_num_merge_cand > 1)
1658             merge_idx = ff_hevc_merge_idx_decode(s);
1659         else
1660             merge_idx = 0;
1661
1662         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1663                                    1 << log2_cb_size,
1664                                    1 << log2_cb_size,
1665                                    log2_cb_size, partIdx,
1666                                    merge_idx, &current_mv);
1667         x_pu = x0 >> s->sps->log2_min_pu_size;
1668         y_pu = y0 >> s->sps->log2_min_pu_size;
1669
1670         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1671             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1672                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1673     } else { /* MODE_INTER */
1674         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1675         if (lc->pu.merge_flag) {
1676             if (s->sh.max_num_merge_cand > 1)
1677                 merge_idx = ff_hevc_merge_idx_decode(s);
1678             else
1679                 merge_idx = 0;
1680
1681             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1682                                        partIdx, merge_idx, &current_mv);
1683             x_pu = x0 >> s->sps->log2_min_pu_size;
1684             y_pu = y0 >> s->sps->log2_min_pu_size;
1685
1686             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1687                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1688                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1689         } else {
1690             enum InterPredIdc inter_pred_idc = PRED_L0;
1691             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1692             if (s->sh.slice_type == B_SLICE)
1693                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1694
1695             if (inter_pred_idc != PRED_L1) {
1696                 if (s->sh.nb_refs[L0]) {
1697                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1698                     current_mv.ref_idx[0] = ref_idx[0];
1699                 }
1700                 current_mv.pred_flag[0] = 1;
1701                 hls_mvd_coding(s, x0, y0, 0);
1702                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1703                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1704                                          partIdx, merge_idx, &current_mv,
1705                                          mvp_flag[0], 0);
1706                 current_mv.mv[0].x += lc->pu.mvd.x;
1707                 current_mv.mv[0].y += lc->pu.mvd.y;
1708             }
1709
1710             if (inter_pred_idc != PRED_L0) {
1711                 if (s->sh.nb_refs[L1]) {
1712                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1713                     current_mv.ref_idx[1] = ref_idx[1];
1714                 }
1715
1716                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1717                     lc->pu.mvd.x = 0;
1718                     lc->pu.mvd.y = 0;
1719                 } else {
1720                     hls_mvd_coding(s, x0, y0, 1);
1721                 }
1722
1723                 current_mv.pred_flag[1] = 1;
1724                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1725                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1726                                          partIdx, merge_idx, &current_mv,
1727                                          mvp_flag[1], 1);
1728                 current_mv.mv[1].x += lc->pu.mvd.x;
1729                 current_mv.mv[1].y += lc->pu.mvd.y;
1730             }
1731
1732             x_pu = x0 >> s->sps->log2_min_pu_size;
1733             y_pu = y0 >> s->sps->log2_min_pu_size;
1734
1735             for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1736                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1737                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1738         }
1739     }
1740
1741     if (current_mv.pred_flag[0]) {
1742         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1743         if (!ref0)
1744             return;
1745         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1746     }
1747     if (current_mv.pred_flag[1]) {
1748         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1749         if (!ref1)
1750             return;
1751         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1752     }
1753
1754     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1755         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1756         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1757
1758         luma_mc(s, tmp, tmpstride, ref0->frame,
1759                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1760
1761         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1762             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1763             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1764                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1765                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1766                                      dst0, s->frame->linesize[0], tmp,
1767                                      tmpstride, nPbW, nPbH);
1768         } else {
1769             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1770         }
1771         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1772                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1773
1774         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1775             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1776             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1777                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1778                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1779                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1780                                      nPbW / 2, nPbH / 2);
1781             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1782                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1783                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1784                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1785                                      nPbW / 2, nPbH / 2);
1786         } else {
1787             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1788             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1789         }
1790     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1791         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1792         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1793
1794         if (!ref1)
1795             return;
1796
1797         luma_mc(s, tmp, tmpstride, ref1->frame,
1798                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1799
1800         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1801             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1802             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1803                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1804                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1805                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1806                                       nPbW, nPbH);
1807         } else {
1808             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1809         }
1810
1811         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1812                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1813
1814         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1815             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1816             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1817                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1818                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1819                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1820             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1821                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1822                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1823                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1824         } else {
1825             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1826             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1827         }
1828     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1829         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1830         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1831         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1832         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1833         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1834         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1835
1836         if (!ref0 || !ref1)
1837             return;
1838
1839         luma_mc(s, tmp, tmpstride, ref0->frame,
1840                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1841         luma_mc(s, tmp2, tmpstride, ref1->frame,
1842                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1843
1844         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1845             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1846             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1847                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1848                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1849                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1850                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1851                                          dst0, s->frame->linesize[0],
1852                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1853         } else {
1854             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1855                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1856         }
1857
1858         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1859                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1860         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1861                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1862
1863         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1864             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1865             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1866                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1867                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1868                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1869                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1870                                          dst1, s->frame->linesize[1], tmp, tmp3,
1871                                          tmpstride, nPbW / 2, nPbH / 2);
1872             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1873                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1874                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1875                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1876                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1877                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1878                                          tmpstride, nPbW / 2, nPbH / 2);
1879         } else {
1880             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1881             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1882         }
1883     }
1884 }
1885
1886 /**
1887  * 8.4.1
1888  */
1889 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1890                                 int prev_intra_luma_pred_flag)
1891 {
1892     HEVCLocalContext *lc = &s->HEVClc;
1893     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1894     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1895     int min_pu_width     = s->sps->min_pu_width;
1896     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1897     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1898     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1899
1900     int cand_up   = (lc->ctb_up_flag || y0b) ?
1901                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1902     int cand_left = (lc->ctb_left_flag || x0b) ?
1903                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1904
1905     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1906
1907     MvField *tab_mvf = s->ref->tab_mvf;
1908     int intra_pred_mode;
1909     int candidate[3];
1910     int i, j;
1911
1912     // intra_pred_mode prediction does not cross vertical CTB boundaries
1913     if ((y0 - 1) < y_ctb)
1914         cand_up = INTRA_DC;
1915
1916     if (cand_left == cand_up) {
1917         if (cand_left < 2) {
1918             candidate[0] = INTRA_PLANAR;
1919             candidate[1] = INTRA_DC;
1920             candidate[2] = INTRA_ANGULAR_26;
1921         } else {
1922             candidate[0] = cand_left;
1923             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1924             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1925         }
1926     } else {
1927         candidate[0] = cand_left;
1928         candidate[1] = cand_up;
1929         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1930             candidate[2] = INTRA_PLANAR;
1931         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1932             candidate[2] = INTRA_DC;
1933         } else {
1934             candidate[2] = INTRA_ANGULAR_26;
1935         }
1936     }
1937
1938     if (prev_intra_luma_pred_flag) {
1939         intra_pred_mode = candidate[lc->pu.mpm_idx];
1940     } else {
1941         if (candidate[0] > candidate[1])
1942             FFSWAP(uint8_t, candidate[0], candidate[1]);
1943         if (candidate[0] > candidate[2])
1944             FFSWAP(uint8_t, candidate[0], candidate[2]);
1945         if (candidate[1] > candidate[2])
1946             FFSWAP(uint8_t, candidate[1], candidate[2]);
1947
1948         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1949         for (i = 0; i < 3; i++)
1950             if (intra_pred_mode >= candidate[i])
1951                 intra_pred_mode++;
1952     }
1953
1954     /* write the intra prediction units into the mv array */
1955     if (!size_in_pus)
1956         size_in_pus = 1;
1957     for (i = 0; i < size_in_pus; i++) {
1958         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1959                intra_pred_mode, size_in_pus);
1960
1961         for (j = 0; j < size_in_pus; j++) {
1962             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1963             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1964             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1965             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1966             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1967             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1969             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1970             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1971         }
1972     }
1973
1974     return intra_pred_mode;
1975 }
1976
1977 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1978                                           int log2_cb_size, int ct_depth)
1979 {
1980     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1981     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1982     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1983     int y;
1984
1985     for (y = 0; y < length; y++)
1986         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1987                ct_depth, length);
1988 }
1989
1990 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1991                                   int log2_cb_size)
1992 {
1993     HEVCLocalContext *lc = &s->HEVClc;
1994     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1995     uint8_t prev_intra_luma_pred_flag[4];
1996     int split   = lc->cu.part_mode == PART_NxN;
1997     int pb_size = (1 << log2_cb_size) >> split;
1998     int side    = split + 1;
1999     int chroma_mode;
2000     int i, j;
2001
2002     for (i = 0; i < side; i++)
2003         for (j = 0; j < side; j++)
2004             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2005
2006     for (i = 0; i < side; i++) {
2007         for (j = 0; j < side; j++) {
2008             if (prev_intra_luma_pred_flag[2 * i + j])
2009                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2010             else
2011                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2012
2013             lc->pu.intra_pred_mode[2 * i + j] =
2014                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2015                                      prev_intra_luma_pred_flag[2 * i + j]);
2016         }
2017     }
2018
2019     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2020     if (chroma_mode != 4) {
2021         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2022             lc->pu.intra_pred_mode_c = 34;
2023         else
2024             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2025     } else {
2026         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2027     }
2028 }
2029
2030 static void intra_prediction_unit_default_value(HEVCContext *s,
2031                                                 int x0, int y0,
2032                                                 int log2_cb_size)
2033 {
2034     HEVCLocalContext *lc = &s->HEVClc;
2035     int pb_size          = 1 << log2_cb_size;
2036     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2037     int min_pu_width     = s->sps->min_pu_width;
2038     MvField *tab_mvf     = s->ref->tab_mvf;
2039     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2040     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2041     int j, k;
2042
2043     if (size_in_pus == 0)
2044         size_in_pus = 1;
2045     for (j = 0; j < size_in_pus; j++) {
2046         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2047         for (k = 0; k < size_in_pus; k++)
2048             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2049     }
2050 }
2051
2052 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2053 {
2054     int cb_size          = 1 << log2_cb_size;
2055     HEVCLocalContext *lc = &s->HEVClc;
2056     int log2_min_cb_size = s->sps->log2_min_cb_size;
2057     int length           = cb_size >> log2_min_cb_size;
2058     int min_cb_width     = s->sps->min_cb_width;
2059     int x_cb             = x0 >> log2_min_cb_size;
2060     int y_cb             = y0 >> log2_min_cb_size;
2061     int x, y, ret;
2062
2063     lc->cu.x                = x0;
2064     lc->cu.y                = y0;
2065     lc->cu.rqt_root_cbf     = 1;
2066     lc->cu.pred_mode        = MODE_INTRA;
2067     lc->cu.part_mode        = PART_2Nx2N;
2068     lc->cu.intra_split_flag = 0;
2069     lc->cu.pcm_flag         = 0;
2070
2071     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2072     for (x = 0; x < 4; x++)
2073         lc->pu.intra_pred_mode[x] = 1;
2074     if (s->pps->transquant_bypass_enable_flag) {
2075         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2076         if (lc->cu.cu_transquant_bypass_flag)
2077             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2078     } else
2079         lc->cu.cu_transquant_bypass_flag = 0;
2080
2081     if (s->sh.slice_type != I_SLICE) {
2082         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2083
2084         lc->cu.pred_mode = MODE_SKIP;
2085         x = y_cb * min_cb_width + x_cb;
2086         for (y = 0; y < length; y++) {
2087             memset(&s->skip_flag[x], skip_flag, length);
2088             x += min_cb_width;
2089         }
2090         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2091     }
2092
2093     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2094         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2095         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2096
2097         if (!s->sh.disable_deblocking_filter_flag)
2098             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2099     } else {
2100         if (s->sh.slice_type != I_SLICE)
2101             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2102         if (lc->cu.pred_mode != MODE_INTRA ||
2103             log2_cb_size == s->sps->log2_min_cb_size) {
2104             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2105             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2106                                       lc->cu.pred_mode == MODE_INTRA;
2107         }
2108
2109         if (lc->cu.pred_mode == MODE_INTRA) {
2110             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2111                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2112                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2113                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2114             }
2115             if (lc->cu.pcm_flag) {
2116                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2117                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2118                 if (s->sps->pcm.loop_filter_disable_flag)
2119                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2120
2121                 if (ret < 0)
2122                     return ret;
2123             } else {
2124                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2125             }
2126         } else {
2127             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2128             switch (lc->cu.part_mode) {
2129             case PART_2Nx2N:
2130                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2131                 break;
2132             case PART_2NxN:
2133                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2134                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2135                 break;
2136             case PART_Nx2N:
2137                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2138                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2139                 break;
2140             case PART_2NxnU:
2141                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2142                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2143                 break;
2144             case PART_2NxnD:
2145                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2146                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2147                 break;
2148             case PART_nLx2N:
2149                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2150                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2151                 break;
2152             case PART_nRx2N:
2153                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2154                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2155                 break;
2156             case PART_NxN:
2157                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2158                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2159                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2160                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2161                 break;
2162             }
2163         }
2164
2165         if (!lc->cu.pcm_flag) {
2166             if (lc->cu.pred_mode != MODE_INTRA &&
2167                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2168                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2169             }
2170             if (lc->cu.rqt_root_cbf) {
2171                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2172                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2173                                          s->sps->max_transform_hierarchy_depth_inter;
2174                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2175                                          log2_cb_size,
2176                                          log2_cb_size, 0, 0, 0, 0);
2177                 if (ret < 0)
2178                     return ret;
2179             } else {
2180                 if (!s->sh.disable_deblocking_filter_flag)
2181                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2182             }
2183         }
2184     }
2185
2186     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2187         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2188
2189     x = y_cb * min_cb_width + x_cb;
2190     for (y = 0; y < length; y++) {
2191         memset(&s->qp_y_tab[x], lc->qp_y, length);
2192         x += min_cb_width;
2193     }
2194
2195     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2196
2197     return 0;
2198 }
2199
2200 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2201                                int log2_cb_size, int cb_depth)
2202 {
2203     HEVCLocalContext *lc = &s->HEVClc;
2204     const int cb_size    = 1 << log2_cb_size;
2205     int split_cu;
2206
2207     lc->ct.depth = cb_depth;
2208     if (x0 + cb_size <= s->sps->width  &&
2209         y0 + cb_size <= s->sps->height &&
2210         log2_cb_size > s->sps->log2_min_cb_size) {
2211         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2212     } else {
2213         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2214     }
2215     if (s->pps->cu_qp_delta_enabled_flag &&
2216         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2217         lc->tu.is_cu_qp_delta_coded = 0;
2218         lc->tu.cu_qp_delta          = 0;
2219     }
2220
2221     if (split_cu) {
2222         const int cb_size_split = cb_size >> 1;
2223         const int x1 = x0 + cb_size_split;
2224         const int y1 = y0 + cb_size_split;
2225
2226         log2_cb_size--;
2227         cb_depth++;
2228
2229 #define SUBDIVIDE(x, y)                                                \
2230 do {                                                                   \
2231     if (x < s->sps->width && y < s->sps->height) {                     \
2232         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2233         if (ret < 0)                                                   \
2234             return ret;                                                \
2235     }                                                                  \
2236 } while (0)
2237
2238         SUBDIVIDE(x0, y0);
2239         SUBDIVIDE(x1, y0);
2240         SUBDIVIDE(x0, y1);
2241         SUBDIVIDE(x1, y1);
2242     } else {
2243         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2244         if (ret < 0)
2245             return ret;
2246     }
2247
2248     return 0;
2249 }
2250
2251 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2252                                  int ctb_addr_ts)
2253 {
2254     HEVCLocalContext *lc  = &s->HEVClc;
2255     int ctb_size          = 1 << s->sps->log2_ctb_size;
2256     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2257     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2258
2259     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2260
2261     if (s->pps->entropy_coding_sync_enabled_flag) {
2262         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2263             lc->first_qp_group = 1;
2264         lc->end_of_tiles_x = s->sps->width;
2265     } else if (s->pps->tiles_enabled_flag) {
2266         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2267             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2268             lc->start_of_tiles_x = x_ctb;
2269             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2270             lc->first_qp_group   = 1;
2271         }
2272     } else {
2273         lc->end_of_tiles_x = s->sps->width;
2274     }
2275
2276     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2277
2278     lc->boundary_flags = 0;
2279     if (s->pps->tiles_enabled_flag) {
2280         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2281             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2282         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2283             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2284         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2285             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2286         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2287             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2288     } else {
2289         if (!ctb_addr_in_slice > 0)
2290             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2291         if (ctb_addr_in_slice < s->sps->ctb_width)
2292             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2293     }
2294
2295     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2296     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2297     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2298     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2299 }
2300
2301 static int hls_slice_data(HEVCContext *s)
2302 {
2303     int ctb_size    = 1 << s->sps->log2_ctb_size;
2304     int more_data   = 1;
2305     int x_ctb       = 0;
2306     int y_ctb       = 0;
2307     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2308     int ret;
2309
2310     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2311         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2312
2313         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2314         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2315         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2316
2317         ff_hevc_cabac_init(s, ctb_addr_ts);
2318
2319         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2320
2321         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2322         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2323         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2324
2325         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2326         if (ret < 0)
2327             return ret;
2328         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2329
2330         ctb_addr_ts++;
2331         ff_hevc_save_states(s, ctb_addr_ts);
2332         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2333     }
2334
2335     if (x_ctb + ctb_size >= s->sps->width &&
2336         y_ctb + ctb_size >= s->sps->height)
2337         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2338
2339     return ctb_addr_ts;
2340 }
2341
2342 /**
2343  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2344  * 0 if the unit should be skipped, 1 otherwise
2345  */
2346 static int hls_nal_unit(HEVCContext *s)
2347 {
2348     GetBitContext *gb = &s->HEVClc.gb;
2349     int nuh_layer_id;
2350
2351     if (get_bits1(gb) != 0)
2352         return AVERROR_INVALIDDATA;
2353
2354     s->nal_unit_type = get_bits(gb, 6);
2355
2356     nuh_layer_id   = get_bits(gb, 6);
2357     s->temporal_id = get_bits(gb, 3) - 1;
2358     if (s->temporal_id < 0)
2359         return AVERROR_INVALIDDATA;
2360
2361     av_log(s->avctx, AV_LOG_DEBUG,
2362            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2363            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2364
2365     return nuh_layer_id == 0;
2366 }
2367
2368 static void restore_tqb_pixels(HEVCContext *s)
2369 {
2370     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2371     int x, y, c_idx;
2372
2373     for (c_idx = 0; c_idx < 3; c_idx++) {
2374         ptrdiff_t stride = s->frame->linesize[c_idx];
2375         int hshift       = s->sps->hshift[c_idx];
2376         int vshift       = s->sps->vshift[c_idx];
2377         for (y = 0; y < s->sps->min_pu_height; y++) {
2378             for (x = 0; x < s->sps->min_pu_width; x++) {
2379                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2380                     int n;
2381                     int len      = min_pu_size >> hshift;
2382                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2383                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2384                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2385                         memcpy(dst, src, len);
2386                         src += stride;
2387                         dst += stride;
2388                     }
2389                 }
2390             }
2391         }
2392     }
2393 }
2394
2395 static int set_side_data(HEVCContext *s)
2396 {
2397     AVFrame *out = s->ref->frame;
2398
2399     if (s->sei_frame_packing_present &&
2400         s->frame_packing_arrangement_type >= 3 &&
2401         s->frame_packing_arrangement_type <= 5 &&
2402         s->content_interpretation_type > 0 &&
2403         s->content_interpretation_type < 3) {
2404         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2405         if (!stereo)
2406             return AVERROR(ENOMEM);
2407
2408         switch (s->frame_packing_arrangement_type) {
2409         case 3:
2410             if (s->quincunx_subsampling)
2411                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2412             else
2413                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2414             break;
2415         case 4:
2416             stereo->type = AV_STEREO3D_TOPBOTTOM;
2417             break;
2418         case 5:
2419             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2420             break;
2421         }
2422
2423         if (s->content_interpretation_type == 2)
2424             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2425     }
2426
2427     if (s->sei_display_orientation_present &&
2428         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2429         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2430         AVFrameSideData *rotation = av_frame_new_side_data(out,
2431                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2432                                                            sizeof(int32_t) * 9);
2433         if (!rotation)
2434             return AVERROR(ENOMEM);
2435
2436         av_display_rotation_set((int32_t *)rotation->data, angle);
2437         av_display_matrix_flip((int32_t *)rotation->data,
2438                                s->sei_vflip, s->sei_hflip);
2439     }
2440
2441     return 0;
2442 }
2443
2444 static int hevc_frame_start(HEVCContext *s)
2445 {
2446     HEVCLocalContext *lc = &s->HEVClc;
2447     int ret;
2448
2449     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2450     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2451     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2452     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2453
2454     lc->start_of_tiles_x = 0;
2455     s->is_decoded        = 0;
2456     s->first_nal_type    = s->nal_unit_type;
2457
2458     if (s->pps->tiles_enabled_flag)
2459         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2460
2461     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2462                               s->poc);
2463     if (ret < 0)
2464         goto fail;
2465
2466     ret = ff_hevc_frame_rps(s);
2467     if (ret < 0) {
2468         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2469         goto fail;
2470     }
2471
2472     s->ref->frame->key_frame = IS_IRAP(s);
2473
2474     ret = set_side_data(s);
2475     if (ret < 0)
2476         goto fail;
2477
2478     av_frame_unref(s->output_frame);
2479     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2480     if (ret < 0)
2481         goto fail;
2482
2483     ff_thread_finish_setup(s->avctx);
2484
2485     return 0;
2486
2487 fail:
2488     if (s->ref)
2489         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2490     s->ref = NULL;
2491     return ret;
2492 }
2493
2494 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2495 {
2496     HEVCLocalContext *lc = &s->HEVClc;
2497     GetBitContext *gb    = &lc->gb;
2498     int ctb_addr_ts, ret;
2499
2500     ret = init_get_bits8(gb, nal, length);
2501     if (ret < 0)
2502         return ret;
2503
2504     ret = hls_nal_unit(s);
2505     if (ret < 0) {
2506         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2507                s->nal_unit_type);
2508         goto fail;
2509     } else if (!ret)
2510         return 0;
2511
2512     switch (s->nal_unit_type) {
2513     case NAL_VPS:
2514         ret = ff_hevc_decode_nal_vps(s);
2515         if (ret < 0)
2516             goto fail;
2517         break;
2518     case NAL_SPS:
2519         ret = ff_hevc_decode_nal_sps(s);
2520         if (ret < 0)
2521             goto fail;
2522         break;
2523     case NAL_PPS:
2524         ret = ff_hevc_decode_nal_pps(s);
2525         if (ret < 0)
2526             goto fail;
2527         break;
2528     case NAL_SEI_PREFIX:
2529     case NAL_SEI_SUFFIX:
2530         ret = ff_hevc_decode_nal_sei(s);
2531         if (ret < 0)
2532             goto fail;
2533         break;
2534     case NAL_TRAIL_R:
2535     case NAL_TRAIL_N:
2536     case NAL_TSA_N:
2537     case NAL_TSA_R:
2538     case NAL_STSA_N:
2539     case NAL_STSA_R:
2540     case NAL_BLA_W_LP:
2541     case NAL_BLA_W_RADL:
2542     case NAL_BLA_N_LP:
2543     case NAL_IDR_W_RADL:
2544     case NAL_IDR_N_LP:
2545     case NAL_CRA_NUT:
2546     case NAL_RADL_N:
2547     case NAL_RADL_R:
2548     case NAL_RASL_N:
2549     case NAL_RASL_R:
2550         ret = hls_slice_header(s);
2551         if (ret < 0)
2552             return ret;
2553
2554         if (s->max_ra == INT_MAX) {
2555             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2556                 s->max_ra = s->poc;
2557             } else {
2558                 if (IS_IDR(s))
2559                     s->max_ra = INT_MIN;
2560             }
2561         }
2562
2563         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2564             s->poc <= s->max_ra) {
2565             s->is_decoded = 0;
2566             break;
2567         } else {
2568             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2569                 s->max_ra = INT_MIN;
2570         }
2571
2572         if (s->sh.first_slice_in_pic_flag) {
2573             ret = hevc_frame_start(s);
2574             if (ret < 0)
2575                 return ret;
2576         } else if (!s->ref) {
2577             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2578             goto fail;
2579         }
2580
2581         if (s->nal_unit_type != s->first_nal_type) {
2582             av_log(s->avctx, AV_LOG_ERROR,
2583                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2584                    s->first_nal_type, s->nal_unit_type);
2585             return AVERROR_INVALIDDATA;
2586         }
2587
2588         if (!s->sh.dependent_slice_segment_flag &&
2589             s->sh.slice_type != I_SLICE) {
2590             ret = ff_hevc_slice_rpl(s);
2591             if (ret < 0) {
2592                 av_log(s->avctx, AV_LOG_WARNING,
2593                        "Error constructing the reference lists for the current slice.\n");
2594                 goto fail;
2595             }
2596         }
2597
2598         ctb_addr_ts = hls_slice_data(s);
2599         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2600             s->is_decoded = 1;
2601             if ((s->pps->transquant_bypass_enable_flag ||
2602                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2603                 s->sps->sao_enabled)
2604                 restore_tqb_pixels(s);
2605         }
2606
2607         if (ctb_addr_ts < 0) {
2608             ret = ctb_addr_ts;
2609             goto fail;
2610         }
2611         break;
2612     case NAL_EOS_NUT:
2613     case NAL_EOB_NUT:
2614         s->seq_decode = (s->seq_decode + 1) & 0xff;
2615         s->max_ra     = INT_MAX;
2616         break;
2617     case NAL_AUD:
2618     case NAL_FD_NUT:
2619         break;
2620     default:
2621         av_log(s->avctx, AV_LOG_INFO,
2622                "Skipping NAL unit %d\n", s->nal_unit_type);
2623     }
2624
2625     return 0;
2626 fail:
2627     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2628         return ret;
2629     return 0;
2630 }
2631
2632 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2633  * between these functions would be nice. */
2634 static int extract_rbsp(const uint8_t *src, int length,
2635                         HEVCNAL *nal)
2636 {
2637     int i, si, di;
2638     uint8_t *dst;
2639
2640 #define STARTCODE_TEST                                                  \
2641         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2642             if (src[i + 2] != 3) {                                      \
2643                 /* startcode, so we must be past the end */             \
2644                 length = i;                                             \
2645             }                                                           \
2646             break;                                                      \
2647         }
2648 #if HAVE_FAST_UNALIGNED
2649 #define FIND_FIRST_ZERO                                                 \
2650         if (i > 0 && !src[i])                                           \
2651             i--;                                                        \
2652         while (src[i])                                                  \
2653             i++
2654 #if HAVE_FAST_64BIT
2655     for (i = 0; i + 1 < length; i += 9) {
2656         if (!((~AV_RN64A(src + i) &
2657                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2658               0x8000800080008080ULL))
2659             continue;
2660         FIND_FIRST_ZERO;
2661         STARTCODE_TEST;
2662         i -= 7;
2663     }
2664 #else
2665     for (i = 0; i + 1 < length; i += 5) {
2666         if (!((~AV_RN32A(src + i) &
2667                (AV_RN32A(src + i) - 0x01000101U)) &
2668               0x80008080U))
2669             continue;
2670         FIND_FIRST_ZERO;
2671         STARTCODE_TEST;
2672         i -= 3;
2673     }
2674 #endif /* HAVE_FAST_64BIT */
2675 #else
2676     for (i = 0; i + 1 < length; i += 2) {
2677         if (src[i])
2678             continue;
2679         if (i > 0 && src[i - 1] == 0)
2680             i--;
2681         STARTCODE_TEST;
2682     }
2683 #endif /* HAVE_FAST_UNALIGNED */
2684
2685     if (i >= length - 1) { // no escaped 0
2686         nal->data = src;
2687         nal->size = length;
2688         return length;
2689     }
2690
2691     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2692                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2693     if (!nal->rbsp_buffer)
2694         return AVERROR(ENOMEM);
2695
2696     dst = nal->rbsp_buffer;
2697
2698     memcpy(dst, src, i);
2699     si = di = i;
2700     while (si + 2 < length) {
2701         // remove escapes (very rare 1:2^22)
2702         if (src[si + 2] > 3) {
2703             dst[di++] = src[si++];
2704             dst[di++] = src[si++];
2705         } else if (src[si] == 0 && src[si + 1] == 0) {
2706             if (src[si + 2] == 3) { // escape
2707                 dst[di++] = 0;
2708                 dst[di++] = 0;
2709                 si       += 3;
2710
2711                 continue;
2712             } else // next start code
2713                 goto nsc;
2714         }
2715
2716         dst[di++] = src[si++];
2717     }
2718     while (si < length)
2719         dst[di++] = src[si++];
2720
2721 nsc:
2722     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2723
2724     nal->data = dst;
2725     nal->size = di;
2726     return si;
2727 }
2728
2729 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2730 {
2731     int i, consumed, ret = 0;
2732
2733     s->ref = NULL;
2734     s->eos = 0;
2735
2736     /* split the input packet into NAL units, so we know the upper bound on the
2737      * number of slices in the frame */
2738     s->nb_nals = 0;
2739     while (length >= 4) {
2740         HEVCNAL *nal;
2741         int extract_length = 0;
2742
2743         if (s->is_nalff) {
2744             int i;
2745             for (i = 0; i < s->nal_length_size; i++)
2746                 extract_length = (extract_length << 8) | buf[i];
2747             buf    += s->nal_length_size;
2748             length -= s->nal_length_size;
2749
2750             if (extract_length > length) {
2751                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2752                 ret = AVERROR_INVALIDDATA;
2753                 goto fail;
2754             }
2755         } else {
2756             if (buf[2] == 0) {
2757                 length--;
2758                 buf++;
2759                 continue;
2760             }
2761             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2762                 ret = AVERROR_INVALIDDATA;
2763                 goto fail;
2764             }
2765
2766             buf           += 3;
2767             length        -= 3;
2768             extract_length = length;
2769         }
2770
2771         if (s->nals_allocated < s->nb_nals + 1) {
2772             int new_size = s->nals_allocated + 1;
2773             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2774             if (!tmp) {
2775                 ret = AVERROR(ENOMEM);
2776                 goto fail;
2777             }
2778             s->nals = tmp;
2779             memset(s->nals + s->nals_allocated, 0,
2780                    (new_size - s->nals_allocated) * sizeof(*tmp));
2781             s->nals_allocated = new_size;
2782         }
2783         nal = &s->nals[s->nb_nals++];
2784
2785         consumed = extract_rbsp(buf, extract_length, nal);
2786         if (consumed < 0) {
2787             ret = consumed;
2788             goto fail;
2789         }
2790
2791         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2792         if (ret < 0)
2793             goto fail;
2794         hls_nal_unit(s);
2795
2796         if (s->nal_unit_type == NAL_EOB_NUT ||
2797             s->nal_unit_type == NAL_EOS_NUT)
2798             s->eos = 1;
2799
2800         buf    += consumed;
2801         length -= consumed;
2802     }
2803
2804     /* parse the NAL units */
2805     for (i = 0; i < s->nb_nals; i++) {
2806         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2807         if (ret < 0) {
2808             av_log(s->avctx, AV_LOG_WARNING,
2809                    "Error parsing NAL unit #%d.\n", i);
2810             goto fail;
2811         }
2812     }
2813
2814 fail:
2815     if (s->ref)
2816         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2817
2818     return ret;
2819 }
2820
2821 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2822 {
2823     int i;
2824     for (i = 0; i < 16; i++)
2825         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2826 }
2827
2828 static int verify_md5(HEVCContext *s, AVFrame *frame)
2829 {
2830     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2831     int pixel_shift;
2832     int i, j;
2833
2834     if (!desc)
2835         return AVERROR(EINVAL);
2836
2837     pixel_shift = desc->comp[0].depth_minus1 > 7;
2838
2839     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2840            s->poc);
2841
2842     /* the checksums are LE, so we have to byteswap for >8bpp formats
2843      * on BE arches */
2844 #if HAVE_BIGENDIAN
2845     if (pixel_shift && !s->checksum_buf) {
2846         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2847                        FFMAX3(frame->linesize[0], frame->linesize[1],
2848                               frame->linesize[2]));
2849         if (!s->checksum_buf)
2850             return AVERROR(ENOMEM);
2851     }
2852 #endif
2853
2854     for (i = 0; frame->data[i]; i++) {
2855         int width  = s->avctx->coded_width;
2856         int height = s->avctx->coded_height;
2857         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2858         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2859         uint8_t md5[16];
2860
2861         av_md5_init(s->md5_ctx);
2862         for (j = 0; j < h; j++) {
2863             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2864 #if HAVE_BIGENDIAN
2865             if (pixel_shift) {
2866                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2867                                     (const uint16_t *) src, w);
2868                 src = s->checksum_buf;
2869             }
2870 #endif
2871             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2872         }
2873         av_md5_final(s->md5_ctx, md5);
2874
2875         if (!memcmp(md5, s->md5[i], 16)) {
2876             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2877             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2878             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2879         } else {
2880             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2881             print_md5(s->avctx, AV_LOG_ERROR, md5);
2882             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2883             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2884             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2885             return AVERROR_INVALIDDATA;
2886         }
2887     }
2888
2889     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2890
2891     return 0;
2892 }
2893
2894 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2895                              AVPacket *avpkt)
2896 {
2897     int ret;
2898     HEVCContext *s = avctx->priv_data;
2899
2900     if (!avpkt->size) {
2901         ret = ff_hevc_output_frame(s, data, 1);
2902         if (ret < 0)
2903             return ret;
2904
2905         *got_output = ret;
2906         return 0;
2907     }
2908
2909     s->ref = NULL;
2910     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2911     if (ret < 0)
2912         return ret;
2913
2914     /* verify the SEI checksum */
2915     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2916         s->is_md5) {
2917         ret = verify_md5(s, s->ref->frame);
2918         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2919             ff_hevc_unref_frame(s, s->ref, ~0);
2920             return ret;
2921         }
2922     }
2923     s->is_md5 = 0;
2924
2925     if (s->is_decoded) {
2926         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2927         s->is_decoded = 0;
2928     }
2929
2930     if (s->output_frame->buf[0]) {
2931         av_frame_move_ref(data, s->output_frame);
2932         *got_output = 1;
2933     }
2934
2935     return avpkt->size;
2936 }
2937
2938 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2939 {
2940     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2941     if (ret < 0)
2942         return ret;
2943
2944     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2945     if (!dst->tab_mvf_buf)
2946         goto fail;
2947     dst->tab_mvf = src->tab_mvf;
2948
2949     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2950     if (!dst->rpl_tab_buf)
2951         goto fail;
2952     dst->rpl_tab = src->rpl_tab;
2953
2954     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2955     if (!dst->rpl_buf)
2956         goto fail;
2957
2958     dst->poc        = src->poc;
2959     dst->ctb_count  = src->ctb_count;
2960     dst->window     = src->window;
2961     dst->flags      = src->flags;
2962     dst->sequence   = src->sequence;
2963
2964     return 0;
2965 fail:
2966     ff_hevc_unref_frame(s, dst, ~0);
2967     return AVERROR(ENOMEM);
2968 }
2969
2970 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2971 {
2972     HEVCContext       *s = avctx->priv_data;
2973     int i;
2974
2975     pic_arrays_free(s);
2976
2977     av_freep(&s->md5_ctx);
2978
2979     av_frame_free(&s->tmp_frame);
2980     av_frame_free(&s->output_frame);
2981
2982     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2983         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2984         av_frame_free(&s->DPB[i].frame);
2985     }
2986
2987     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2988         av_buffer_unref(&s->vps_list[i]);
2989     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2990         av_buffer_unref(&s->sps_list[i]);
2991     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2992         av_buffer_unref(&s->pps_list[i]);
2993
2994     for (i = 0; i < s->nals_allocated; i++)
2995         av_freep(&s->nals[i].rbsp_buffer);
2996     av_freep(&s->nals);
2997     s->nals_allocated = 0;
2998
2999     return 0;
3000 }
3001
3002 static av_cold int hevc_init_context(AVCodecContext *avctx)
3003 {
3004     HEVCContext *s = avctx->priv_data;
3005     int i;
3006
3007     s->avctx = avctx;
3008
3009     s->tmp_frame = av_frame_alloc();
3010     if (!s->tmp_frame)
3011         goto fail;
3012
3013     s->output_frame = av_frame_alloc();
3014     if (!s->output_frame)
3015         goto fail;
3016
3017     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3018         s->DPB[i].frame = av_frame_alloc();
3019         if (!s->DPB[i].frame)
3020             goto fail;
3021         s->DPB[i].tf.f = s->DPB[i].frame;
3022     }
3023
3024     s->max_ra = INT_MAX;
3025
3026     s->md5_ctx = av_md5_alloc();
3027     if (!s->md5_ctx)
3028         goto fail;
3029
3030     ff_bswapdsp_init(&s->bdsp);
3031
3032     s->context_initialized = 1;
3033
3034     return 0;
3035
3036 fail:
3037     hevc_decode_free(avctx);
3038     return AVERROR(ENOMEM);
3039 }
3040
3041 static int hevc_update_thread_context(AVCodecContext *dst,
3042                                       const AVCodecContext *src)
3043 {
3044     HEVCContext *s  = dst->priv_data;
3045     HEVCContext *s0 = src->priv_data;
3046     int i, ret;
3047
3048     if (!s->context_initialized) {
3049         ret = hevc_init_context(dst);
3050         if (ret < 0)
3051             return ret;
3052     }
3053
3054     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3055         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3056         if (s0->DPB[i].frame->buf[0]) {
3057             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3058             if (ret < 0)
3059                 return ret;
3060         }
3061     }
3062
3063     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3064         av_buffer_unref(&s->vps_list[i]);
3065         if (s0->vps_list[i]) {
3066             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3067             if (!s->vps_list[i])
3068                 return AVERROR(ENOMEM);
3069         }
3070     }
3071
3072     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3073         av_buffer_unref(&s->sps_list[i]);
3074         if (s0->sps_list[i]) {
3075             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3076             if (!s->sps_list[i])
3077                 return AVERROR(ENOMEM);
3078         }
3079     }
3080
3081     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3082         av_buffer_unref(&s->pps_list[i]);
3083         if (s0->pps_list[i]) {
3084             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3085             if (!s->pps_list[i])
3086                 return AVERROR(ENOMEM);
3087         }
3088     }
3089
3090     if (s->sps != s0->sps)
3091         ret = set_sps(s, s0->sps);
3092
3093     s->seq_decode = s0->seq_decode;
3094     s->seq_output = s0->seq_output;
3095     s->pocTid0    = s0->pocTid0;
3096     s->max_ra     = s0->max_ra;
3097
3098     s->is_nalff        = s0->is_nalff;
3099     s->nal_length_size = s0->nal_length_size;
3100
3101     if (s0->eos) {
3102         s->seq_decode = (s->seq_decode + 1) & 0xff;
3103         s->max_ra = INT_MAX;
3104     }
3105
3106     return 0;
3107 }
3108
3109 static int hevc_decode_extradata(HEVCContext *s)
3110 {
3111     AVCodecContext *avctx = s->avctx;
3112     GetByteContext gb;
3113     int ret;
3114
3115     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3116
3117     if (avctx->extradata_size > 3 &&
3118         (avctx->extradata[0] || avctx->extradata[1] ||
3119          avctx->extradata[2] > 1)) {
3120         /* It seems the extradata is encoded as hvcC format.
3121          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3122          * is finalized. When finalized, configurationVersion will be 1 and we
3123          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3124         int i, j, num_arrays, nal_len_size;
3125
3126         s->is_nalff = 1;
3127
3128         bytestream2_skip(&gb, 21);
3129         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3130         num_arrays   = bytestream2_get_byte(&gb);
3131
3132         /* nal units in the hvcC always have length coded with 2 bytes,
3133          * so put a fake nal_length_size = 2 while parsing them */
3134         s->nal_length_size = 2;
3135
3136         /* Decode nal units from hvcC. */
3137         for (i = 0; i < num_arrays; i++) {
3138             int type = bytestream2_get_byte(&gb) & 0x3f;
3139             int cnt  = bytestream2_get_be16(&gb);
3140
3141             for (j = 0; j < cnt; j++) {
3142                 // +2 for the nal size field
3143                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3144                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3145                     av_log(s->avctx, AV_LOG_ERROR,
3146                            "Invalid NAL unit size in extradata.\n");
3147                     return AVERROR_INVALIDDATA;
3148                 }
3149
3150                 ret = decode_nal_units(s, gb.buffer, nalsize);
3151                 if (ret < 0) {
3152                     av_log(avctx, AV_LOG_ERROR,
3153                            "Decoding nal unit %d %d from hvcC failed\n",
3154                            type, i);
3155                     return ret;
3156                 }
3157                 bytestream2_skip(&gb, nalsize);
3158             }
3159         }
3160
3161         /* Now store right nal length size, that will be used to parse
3162          * all other nals */
3163         s->nal_length_size = nal_len_size;
3164     } else {
3165         s->is_nalff = 0;
3166         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3167         if (ret < 0)
3168             return ret;
3169     }
3170     return 0;
3171 }
3172
3173 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3174 {
3175     HEVCContext *s = avctx->priv_data;
3176     int ret;
3177
3178     ff_init_cabac_states();
3179
3180     avctx->internal->allocate_progress = 1;
3181
3182     ret = hevc_init_context(avctx);
3183     if (ret < 0)
3184         return ret;
3185
3186     if (avctx->extradata_size > 0 && avctx->extradata) {
3187         ret = hevc_decode_extradata(s);
3188         if (ret < 0) {
3189             hevc_decode_free(avctx);
3190             return ret;
3191         }
3192     }
3193
3194     return 0;
3195 }
3196
3197 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3198 {
3199     HEVCContext *s = avctx->priv_data;
3200     int ret;
3201
3202     memset(s, 0, sizeof(*s));
3203
3204     ret = hevc_init_context(avctx);
3205     if (ret < 0)
3206         return ret;
3207
3208     return 0;
3209 }
3210
3211 static void hevc_decode_flush(AVCodecContext *avctx)
3212 {
3213     HEVCContext *s = avctx->priv_data;
3214     ff_hevc_flush_dpb(s);
3215     s->max_ra = INT_MAX;
3216 }
3217
3218 #define OFFSET(x) offsetof(HEVCContext, x)
3219 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3220
3221 static const AVProfile profiles[] = {
3222     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3223     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3224     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3225     { FF_PROFILE_UNKNOWN },
3226 };
3227
3228 static const AVOption options[] = {
3229     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3230         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3231     { NULL },
3232 };
3233
3234 static const AVClass hevc_decoder_class = {
3235     .class_name = "HEVC decoder",
3236     .item_name  = av_default_item_name,
3237     .option     = options,
3238     .version    = LIBAVUTIL_VERSION_INT,
3239 };
3240
3241 AVCodec ff_hevc_decoder = {
3242     .name                  = "hevc",
3243     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3244     .type                  = AVMEDIA_TYPE_VIDEO,
3245     .id                    = AV_CODEC_ID_HEVC,
3246     .priv_data_size        = sizeof(HEVCContext),
3247     .priv_class            = &hevc_decoder_class,
3248     .init                  = hevc_decode_init,
3249     .close                 = hevc_decode_free,
3250     .decode                = hevc_decode_frame,
3251     .flush                 = hevc_decode_flush,
3252     .update_thread_context = hevc_update_thread_context,
3253     .init_thread_copy      = hevc_init_thread_copy,
3254     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3255                              CODEC_CAP_FRAME_THREADS,
3256     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3257 };