]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
avutil: add alias names for gray 8/16 colour spaces
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
387 {
388     int ret;
389     unsigned int num = 0, den = 0;
390
391     pic_arrays_free(s);
392     ret = pic_arrays_init(s, sps);
393     if (ret < 0)
394         goto fail;
395
396     s->avctx->coded_width         = sps->width;
397     s->avctx->coded_height        = sps->height;
398     s->avctx->width               = sps->output_width;
399     s->avctx->height              = sps->output_height;
400     s->avctx->pix_fmt             = sps->pix_fmt;
401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
402
403     ff_set_sar(s->avctx, sps->vui.sar);
404
405     if (sps->vui.video_signal_type_present_flag)
406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
407                                                                : AVCOL_RANGE_MPEG;
408     else
409         s->avctx->color_range = AVCOL_RANGE_MPEG;
410
411     if (sps->vui.colour_description_present_flag) {
412         s->avctx->color_primaries = sps->vui.colour_primaries;
413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
415     } else {
416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
419     }
420
421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
424
425     if (sps->sao_enabled) {
426         av_frame_unref(s->tmp_frame);
427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
428         if (ret < 0)
429             goto fail;
430         s->frame = s->tmp_frame;
431     }
432
433     s->sps = sps;
434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
435
436     if (s->vps->vps_timing_info_present_flag) {
437         num = s->vps->vps_num_units_in_tick;
438         den = s->vps->vps_time_scale;
439     } else if (sps->vui.vui_timing_info_present_flag) {
440         num = sps->vui.vui_num_units_in_tick;
441         den = sps->vui.vui_time_scale;
442     }
443
444     if (num != 0 && den != 0)
445         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
446                   num, den, 1 << 30);
447
448     return 0;
449
450 fail:
451     pic_arrays_free(s);
452     s->sps = NULL;
453     return ret;
454 }
455
456 static int hls_slice_header(HEVCContext *s)
457 {
458     GetBitContext *gb = &s->HEVClc.gb;
459     SliceHeader *sh   = &s->sh;
460     int i, ret;
461
462     // Coded parameters
463     sh->first_slice_in_pic_flag = get_bits1(gb);
464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
465         s->seq_decode = (s->seq_decode + 1) & 0xff;
466         s->max_ra     = INT_MAX;
467         if (IS_IDR(s))
468             ff_hevc_clear_refs(s);
469     }
470     if (IS_IRAP(s))
471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
472
473     sh->pps_id = get_ue_golomb_long(gb);
474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
476         return AVERROR_INVALIDDATA;
477     }
478     if (!sh->first_slice_in_pic_flag &&
479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
481         return AVERROR_INVALIDDATA;
482     }
483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
484
485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
487
488         ff_hevc_clear_refs(s);
489         ret = set_sps(s, s->sps);
490         if (ret < 0)
491             return ret;
492
493         s->seq_decode = (s->seq_decode + 1) & 0xff;
494         s->max_ra     = INT_MAX;
495     }
496
497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
499
500     sh->dependent_slice_segment_flag = 0;
501     if (!sh->first_slice_in_pic_flag) {
502         int slice_address_length;
503
504         if (s->pps->dependent_slice_segments_enabled_flag)
505             sh->dependent_slice_segment_flag = get_bits1(gb);
506
507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
508                                             s->sps->ctb_height);
509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
511             av_log(s->avctx, AV_LOG_ERROR,
512                    "Invalid slice segment address: %u.\n",
513                    sh->slice_segment_addr);
514             return AVERROR_INVALIDDATA;
515         }
516
517         if (!sh->dependent_slice_segment_flag) {
518             sh->slice_addr = sh->slice_segment_addr;
519             s->slice_idx++;
520         }
521     } else {
522         sh->slice_segment_addr = sh->slice_addr = 0;
523         s->slice_idx           = 0;
524         s->slice_initialized   = 0;
525     }
526
527     if (!sh->dependent_slice_segment_flag) {
528         s->slice_initialized = 0;
529
530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
532
533         sh->slice_type = get_ue_golomb_long(gb);
534         if (!(sh->slice_type == I_SLICE ||
535               sh->slice_type == P_SLICE ||
536               sh->slice_type == B_SLICE)) {
537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
538                    sh->slice_type);
539             return AVERROR_INVALIDDATA;
540         }
541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
543             return AVERROR_INVALIDDATA;
544         }
545
546         // when flag is not present, picture is inferred to be output
547         sh->pic_output_flag = 1;
548         if (s->pps->output_flag_present_flag)
549             sh->pic_output_flag = get_bits1(gb);
550
551         if (s->sps->separate_colour_plane_flag)
552             sh->colour_plane_id = get_bits(gb, 2);
553
554         if (!IS_IDR(s)) {
555             int short_term_ref_pic_set_sps_flag, poc;
556
557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
560                 av_log(s->avctx, AV_LOG_WARNING,
561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
563                     return AVERROR_INVALIDDATA;
564                 poc = s->poc;
565             }
566             s->poc = poc;
567
568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
569             if (!short_term_ref_pic_set_sps_flag) {
570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
571                 if (ret < 0)
572                     return ret;
573
574                 sh->short_term_rps = &sh->slice_rps;
575             } else {
576                 int numbits, rps_idx;
577
578                 if (!s->sps->nb_st_rps) {
579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
580                     return AVERROR_INVALIDDATA;
581                 }
582
583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
586             }
587
588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
589             if (ret < 0) {
590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
592                     return AVERROR_INVALIDDATA;
593             }
594
595             if (s->sps->sps_temporal_mvp_enabled_flag)
596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
597             else
598                 sh->slice_temporal_mvp_enabled_flag = 0;
599         } else {
600             s->sh.short_term_rps = NULL;
601             s->poc               = 0;
602         }
603
604         /* 8.3.1 */
605         if (s->temporal_id == 0 &&
606             s->nal_unit_type != NAL_TRAIL_N &&
607             s->nal_unit_type != NAL_TSA_N   &&
608             s->nal_unit_type != NAL_STSA_N  &&
609             s->nal_unit_type != NAL_RADL_N  &&
610             s->nal_unit_type != NAL_RADL_R  &&
611             s->nal_unit_type != NAL_RASL_N  &&
612             s->nal_unit_type != NAL_RASL_R)
613             s->pocTid0 = s->poc;
614
615         if (s->sps->sao_enabled) {
616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
617             sh->slice_sample_adaptive_offset_flag[1] =
618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
619         } else {
620             sh->slice_sample_adaptive_offset_flag[0] = 0;
621             sh->slice_sample_adaptive_offset_flag[1] = 0;
622             sh->slice_sample_adaptive_offset_flag[2] = 0;
623         }
624
625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
627             int nb_refs;
628
629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
630             if (sh->slice_type == B_SLICE)
631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
632
633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
635                 if (sh->slice_type == B_SLICE)
636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
637             }
638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
640                        sh->nb_refs[L0], sh->nb_refs[L1]);
641                 return AVERROR_INVALIDDATA;
642             }
643
644             sh->rpl_modification_flag[0] = 0;
645             sh->rpl_modification_flag[1] = 0;
646             nb_refs = ff_hevc_frame_nb_refs(s);
647             if (!nb_refs) {
648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
649                 return AVERROR_INVALIDDATA;
650             }
651
652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
653                 sh->rpl_modification_flag[0] = get_bits1(gb);
654                 if (sh->rpl_modification_flag[0]) {
655                     for (i = 0; i < sh->nb_refs[L0]; i++)
656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658
659                 if (sh->slice_type == B_SLICE) {
660                     sh->rpl_modification_flag[1] = get_bits1(gb);
661                     if (sh->rpl_modification_flag[1] == 1)
662                         for (i = 0; i < sh->nb_refs[L1]; i++)
663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
664                 }
665             }
666
667             if (sh->slice_type == B_SLICE)
668                 sh->mvd_l1_zero_flag = get_bits1(gb);
669
670             if (s->pps->cabac_init_present_flag)
671                 sh->cabac_init_flag = get_bits1(gb);
672             else
673                 sh->cabac_init_flag = 0;
674
675             sh->collocated_ref_idx = 0;
676             if (sh->slice_temporal_mvp_enabled_flag) {
677                 sh->collocated_list = L0;
678                 if (sh->slice_type == B_SLICE)
679                     sh->collocated_list = !get_bits1(gb);
680
681                 if (sh->nb_refs[sh->collocated_list] > 1) {
682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
684                         av_log(s->avctx, AV_LOG_ERROR,
685                                "Invalid collocated_ref_idx: %d.\n",
686                                sh->collocated_ref_idx);
687                         return AVERROR_INVALIDDATA;
688                     }
689                 }
690             }
691
692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
694                 pred_weight_table(s, gb);
695             }
696
697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
699                 av_log(s->avctx, AV_LOG_ERROR,
700                        "Invalid number of merging MVP candidates: %d.\n",
701                        sh->max_num_merge_cand);
702                 return AVERROR_INVALIDDATA;
703             }
704         }
705
706         sh->slice_qp_delta = get_se_golomb(gb);
707
708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
709             sh->slice_cb_qp_offset = get_se_golomb(gb);
710             sh->slice_cr_qp_offset = get_se_golomb(gb);
711         } else {
712             sh->slice_cb_qp_offset = 0;
713             sh->slice_cr_qp_offset = 0;
714         }
715
716         if (s->pps->deblocking_filter_control_present_flag) {
717             int deblocking_filter_override_flag = 0;
718
719             if (s->pps->deblocking_filter_override_enabled_flag)
720                 deblocking_filter_override_flag = get_bits1(gb);
721
722             if (deblocking_filter_override_flag) {
723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
724                 if (!sh->disable_deblocking_filter_flag) {
725                     sh->beta_offset = get_se_golomb(gb) * 2;
726                     sh->tc_offset   = get_se_golomb(gb) * 2;
727                 }
728             } else {
729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
730                 sh->beta_offset                    = s->pps->beta_offset;
731                 sh->tc_offset                      = s->pps->tc_offset;
732             }
733         } else {
734             sh->disable_deblocking_filter_flag = 0;
735             sh->beta_offset                    = 0;
736             sh->tc_offset                      = 0;
737         }
738
739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
740             (sh->slice_sample_adaptive_offset_flag[0] ||
741              sh->slice_sample_adaptive_offset_flag[1] ||
742              !sh->disable_deblocking_filter_flag)) {
743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
744         } else {
745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
746         }
747     } else if (!s->slice_initialized) {
748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
749         return AVERROR_INVALIDDATA;
750     }
751
752     sh->num_entry_point_offsets = 0;
753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
755         if (sh->num_entry_point_offsets > 0) {
756             int offset_len = get_ue_golomb_long(gb) + 1;
757
758             for (i = 0; i < sh->num_entry_point_offsets; i++)
759                 skip_bits(gb, offset_len);
760         }
761     }
762
763     if (s->pps->slice_header_extension_present_flag) {
764         unsigned int length = get_ue_golomb_long(gb);
765         for (i = 0; i < length; i++)
766             skip_bits(gb, 8);  // slice_header_extension_data_byte
767     }
768
769     // Inferred parameters
770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771     if (sh->slice_qp > 51 ||
772         sh->slice_qp < -s->sps->qp_bd_offset) {
773         av_log(s->avctx, AV_LOG_ERROR,
774                "The slice_qp %d is outside the valid range "
775                "[%d, 51].\n",
776                sh->slice_qp,
777                -s->sps->qp_bd_offset);
778         return AVERROR_INVALIDDATA;
779     }
780
781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
782
783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785         return AVERROR_INVALIDDATA;
786     }
787
788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
789
790     if (!s->pps->cu_qp_delta_enabled_flag)
791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
793
794     s->slice_initialized = 1;
795
796     return 0;
797 }
798
799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
800
801 #define SET_SAO(elem, value)                            \
802 do {                                                    \
803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
804         sao->elem = value;                              \
805     else if (sao_merge_left_flag)                       \
806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
807     else if (sao_merge_up_flag)                         \
808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
809     else                                                \
810         sao->elem = 0;                                  \
811 } while (0)
812
813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
814 {
815     HEVCLocalContext *lc    = &s->HEVClc;
816     int sao_merge_left_flag = 0;
817     int sao_merge_up_flag   = 0;
818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
819     SAOParams *sao          = &CTB(s->sao, rx, ry);
820     int c_idx, i;
821
822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
823         s->sh.slice_sample_adaptive_offset_flag[1]) {
824         if (rx > 0) {
825             if (lc->ctb_left_flag)
826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
827         }
828         if (ry > 0 && !sao_merge_left_flag) {
829             if (lc->ctb_up_flag)
830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
831         }
832     }
833
834     for (c_idx = 0; c_idx < 3; c_idx++) {
835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
837             continue;
838         }
839
840         if (c_idx == 2) {
841             sao->type_idx[2] = sao->type_idx[1];
842             sao->eo_class[2] = sao->eo_class[1];
843         } else {
844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
845         }
846
847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
848             continue;
849
850         for (i = 0; i < 4; i++)
851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
852
853         if (sao->type_idx[c_idx] == SAO_BAND) {
854             for (i = 0; i < 4; i++) {
855                 if (sao->offset_abs[c_idx][i]) {
856                     SET_SAO(offset_sign[c_idx][i],
857                             ff_hevc_sao_offset_sign_decode(s));
858                 } else {
859                     sao->offset_sign[c_idx][i] = 0;
860                 }
861             }
862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
863         } else if (c_idx != 2) {
864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
865         }
866
867         // Inferred parameters
868         sao->offset_val[c_idx][0] = 0;
869         for (i = 0; i < 4; i++) {
870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
871             if (sao->type_idx[c_idx] == SAO_EDGE) {
872                 if (i > 1)
873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
874             } else if (sao->offset_sign[c_idx][i]) {
875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
876             }
877         }
878     }
879 }
880
881 #undef SET_SAO
882 #undef CTB
883
884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
885                                 int log2_trafo_size, enum ScanType scan_idx,
886                                 int c_idx)
887 {
888 #define GET_COORD(offset, n)                                    \
889     do {                                                        \
890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
892     } while (0)
893     HEVCLocalContext *lc    = &s->HEVClc;
894     int transform_skip_flag = 0;
895
896     int last_significant_coeff_x, last_significant_coeff_y;
897     int last_scan_pos;
898     int n_end;
899     int num_coeff    = 0;
900     int greater1_ctx = 1;
901
902     int num_last_subset;
903     int x_cg_last_sig, y_cg_last_sig;
904
905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
906
907     ptrdiff_t stride = s->frame->linesize[c_idx];
908     int hshift       = s->sps->hshift[c_idx];
909     int vshift       = s->sps->vshift[c_idx];
910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
914
915     int trafo_size = 1 << log2_trafo_size;
916     int i, qp, shift, add, scale, scale_m;
917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
918     const uint8_t *scale_matrix;
919     uint8_t dc_scale;
920
921     // Derive QP for dequant
922     if (!lc->cu.cu_transquant_bypass_flag) {
923         static const int qp_c[] = {
924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
925         };
926
927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
931         };
932
933         static const uint8_t div6[51 + 2 * 6 + 1] = {
934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
937         };
938         int qp_y = lc->qp_y;
939
940         if (c_idx == 0) {
941             qp = qp_y + s->sps->qp_bd_offset;
942         } else {
943             int qp_i, offset;
944
945             if (c_idx == 1)
946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
947             else
948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
949
950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
951             if (qp_i < 30)
952                 qp = qp_i;
953             else if (qp_i > 43)
954                 qp = qp_i - 6;
955             else
956                 qp = qp_c[qp_i - 30];
957
958             qp += s->sps->qp_bd_offset;
959         }
960
961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
962         add      = 1 << (shift - 1);
963         scale    = level_scale[rem6[qp]] << (div6[qp]);
964         scale_m  = 16; // default when no custom scaling lists.
965         dc_scale = 16;
966
967         if (s->sps->scaling_list_enable_flag) {
968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
969                                     &s->pps->scaling_list : &s->sps->scaling_list;
970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
971
972             if (log2_trafo_size != 5)
973                 matrix_id = 3 * matrix_id + c_idx;
974
975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
976             if (log2_trafo_size >= 4)
977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
978         }
979     }
980
981     if (s->pps->transform_skip_enabled_flag &&
982         !lc->cu.cu_transquant_bypass_flag   &&
983         log2_trafo_size == 2) {
984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
985     }
986
987     last_significant_coeff_x =
988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
989     last_significant_coeff_y =
990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
991
992     if (last_significant_coeff_x > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_x & 1)) +
996                                    suffix;
997     }
998
999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int trafo_depth, int blk_idx,
1237                               int cbf_luma, int cbf_cb, int cbf_cr)
1238 {
1239     HEVCLocalContext *lc = &s->HEVClc;
1240
1241     if (lc->cu.pred_mode == MODE_INTRA) {
1242         int trafo_size = 1 << log2_trafo_size;
1243         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1244
1245         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1246         if (log2_trafo_size > 2) {
1247             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1248             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1250             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1251         } else if (blk_idx == 3) {
1252             trafo_size = trafo_size << s->sps->hshift[1];
1253             ff_hevc_set_neighbour_available(s, xBase, yBase,
1254                                             trafo_size, trafo_size);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1256             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1257         }
1258     }
1259
1260     if (cbf_luma || cbf_cb || cbf_cr) {
1261         int scan_idx   = SCAN_DIAG;
1262         int scan_idx_c = SCAN_DIAG;
1263
1264         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1265             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1266             if (lc->tu.cu_qp_delta != 0)
1267                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1268                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1269             lc->tu.is_cu_qp_delta_coded = 1;
1270
1271             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1272                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1273                 av_log(s->avctx, AV_LOG_ERROR,
1274                        "The cu_qp_delta %d is outside the valid range "
1275                        "[%d, %d].\n",
1276                        lc->tu.cu_qp_delta,
1277                        -(26 + s->sps->qp_bd_offset / 2),
1278                         (25 + s->sps->qp_bd_offset / 2));
1279                 return AVERROR_INVALIDDATA;
1280             }
1281
1282             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1283         }
1284
1285         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1286             if (lc->tu.cur_intra_pred_mode >= 6 &&
1287                 lc->tu.cur_intra_pred_mode <= 14) {
1288                 scan_idx = SCAN_VERT;
1289             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1290                        lc->tu.cur_intra_pred_mode <= 30) {
1291                 scan_idx = SCAN_HORIZ;
1292             }
1293
1294             if (lc->pu.intra_pred_mode_c >=  6 &&
1295                 lc->pu.intra_pred_mode_c <= 14) {
1296                 scan_idx_c = SCAN_VERT;
1297             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1298                        lc->pu.intra_pred_mode_c <= 30) {
1299                 scan_idx_c = SCAN_HORIZ;
1300             }
1301         }
1302
1303         if (cbf_luma)
1304             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1305         if (log2_trafo_size > 2) {
1306             if (cbf_cb)
1307                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1308             if (cbf_cr)
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1310         } else if (blk_idx == 3) {
1311             if (cbf_cb)
1312                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1313             if (cbf_cr)
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1315         }
1316     }
1317     return 0;
1318 }
1319
1320 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1321 {
1322     int cb_size          = 1 << log2_cb_size;
1323     int log2_min_pu_size = s->sps->log2_min_pu_size;
1324
1325     int min_pu_width     = s->sps->min_pu_width;
1326     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1327     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1328     int i, j;
1329
1330     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1331         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1332             s->is_pcm[i + j * min_pu_width] = 2;
1333 }
1334
1335 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1336                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1337                               int log2_cb_size, int log2_trafo_size,
1338                               int trafo_depth, int blk_idx,
1339                               int cbf_cb, int cbf_cr)
1340 {
1341     HEVCLocalContext *lc = &s->HEVClc;
1342     uint8_t split_transform_flag;
1343     int ret;
1344
1345     if (lc->cu.intra_split_flag) {
1346         if (trafo_depth == 1)
1347             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1348     } else {
1349         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1350     }
1351
1352     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1353         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1354         trafo_depth     < lc->cu.max_trafo_depth       &&
1355         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1356         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1357     } else {
1358         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1359                           lc->cu.pred_mode == MODE_INTER &&
1360                           lc->cu.part_mode != PART_2Nx2N &&
1361                           trafo_depth == 0;
1362
1363         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1364                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1365                                inter_split;
1366     }
1367
1368     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1369         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1370     else if (log2_trafo_size > 2 || trafo_depth == 0)
1371         cbf_cb = 0;
1372     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1373         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1374     else if (log2_trafo_size > 2 || trafo_depth == 0)
1375         cbf_cr = 0;
1376
1377     if (split_transform_flag) {
1378         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1379         const int x1 = x0 + trafo_size_split;
1380         const int y1 = y0 + trafo_size_split;
1381
1382 #define SUBDIVIDE(x, y, idx)                                                    \
1383 do {                                                                            \
1384     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1385                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1386                              cbf_cb, cbf_cr);                                   \
1387     if (ret < 0)                                                                \
1388         return ret;                                                             \
1389 } while (0)
1390
1391         SUBDIVIDE(x0, y0, 0);
1392         SUBDIVIDE(x1, y0, 1);
1393         SUBDIVIDE(x0, y1, 2);
1394         SUBDIVIDE(x1, y1, 3);
1395
1396 #undef SUBDIVIDE
1397     } else {
1398         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1399         int log2_min_tu_size = s->sps->log2_min_tb_size;
1400         int min_tu_width     = s->sps->min_tb_width;
1401         int cbf_luma         = 1;
1402
1403         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1404             cbf_cb || cbf_cr)
1405             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1406
1407         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1408                                  log2_cb_size, log2_trafo_size, trafo_depth,
1409                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1410         if (ret < 0)
1411             return ret;
1412         // TODO: store cbf_luma somewhere else
1413         if (cbf_luma) {
1414             int i, j;
1415             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1416                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1417                     int x_tu = (x0 + j) >> log2_min_tu_size;
1418                     int y_tu = (y0 + i) >> log2_min_tu_size;
1419                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1420                 }
1421         }
1422         if (!s->sh.disable_deblocking_filter_flag) {
1423             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1424                                                   lc->slice_or_tiles_up_boundary,
1425                                                   lc->slice_or_tiles_left_boundary);
1426             if (s->pps->transquant_bypass_enable_flag &&
1427                 lc->cu.cu_transquant_bypass_flag)
1428                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1429         }
1430     }
1431     return 0;
1432 }
1433
1434 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1435 {
1436     //TODO: non-4:2:0 support
1437     HEVCLocalContext *lc = &s->HEVClc;
1438     GetBitContext gb;
1439     int cb_size   = 1 << log2_cb_size;
1440     int stride0   = s->frame->linesize[0];
1441     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1442     int   stride1 = s->frame->linesize[1];
1443     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1444     int   stride2 = s->frame->linesize[2];
1445     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1446
1447     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1448     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1449     int ret;
1450
1451     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1452                                           lc->slice_or_tiles_up_boundary,
1453                                           lc->slice_or_tiles_left_boundary);
1454
1455     ret = init_get_bits(&gb, pcm, length);
1456     if (ret < 0)
1457         return ret;
1458
1459     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1460     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1461     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1462     return 0;
1463 }
1464
1465 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1466 {
1467     HEVCLocalContext *lc = &s->HEVClc;
1468     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1469     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1470
1471     if (x)
1472         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1473     if (y)
1474         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1475
1476     switch (x) {
1477     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1478     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1479     case 0: lc->pu.mvd.x = 0;                               break;
1480     }
1481
1482     switch (y) {
1483     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1484     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1485     case 0: lc->pu.mvd.y = 0;                               break;
1486     }
1487 }
1488
1489 /**
1490  * 8.5.3.2.2.1 Luma sample interpolation process
1491  *
1492  * @param s HEVC decoding context
1493  * @param dst target buffer for block data at block position
1494  * @param dststride stride of the dst buffer
1495  * @param ref reference picture buffer at origin (0, 0)
1496  * @param mv motion vector (relative to block position) to get pixel data from
1497  * @param x_off horizontal position of block from origin (0, 0)
1498  * @param y_off vertical position of block from origin (0, 0)
1499  * @param block_w width of block
1500  * @param block_h height of block
1501  */
1502 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1503                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1504                     int block_w, int block_h)
1505 {
1506     HEVCLocalContext *lc = &s->HEVClc;
1507     uint8_t *src         = ref->data[0];
1508     ptrdiff_t srcstride  = ref->linesize[0];
1509     int pic_width        = s->sps->width;
1510     int pic_height       = s->sps->height;
1511
1512     int mx         = mv->x & 3;
1513     int my         = mv->y & 3;
1514     int extra_left = ff_hevc_qpel_extra_before[mx];
1515     int extra_top  = ff_hevc_qpel_extra_before[my];
1516
1517     x_off += mv->x >> 2;
1518     y_off += mv->y >> 2;
1519     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1520
1521     if (x_off < extra_left || y_off < extra_top ||
1522         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1523         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1524         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1525         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1526         int buf_offset = extra_top *
1527                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1528
1529         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1530                                  edge_emu_stride, srcstride,
1531                                  block_w + ff_hevc_qpel_extra[mx],
1532                                  block_h + ff_hevc_qpel_extra[my],
1533                                  x_off - extra_left, y_off - extra_top,
1534                                  pic_width, pic_height);
1535         src = lc->edge_emu_buffer + buf_offset;
1536         srcstride = edge_emu_stride;
1537     }
1538     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1539                                      block_h, lc->mc_buffer);
1540 }
1541
1542 /**
1543  * 8.5.3.2.2.2 Chroma sample interpolation process
1544  *
1545  * @param s HEVC decoding context
1546  * @param dst1 target buffer for block data at block position (U plane)
1547  * @param dst2 target buffer for block data at block position (V plane)
1548  * @param dststride stride of the dst1 and dst2 buffers
1549  * @param ref reference picture buffer at origin (0, 0)
1550  * @param mv motion vector (relative to block position) to get pixel data from
1551  * @param x_off horizontal position of block from origin (0, 0)
1552  * @param y_off vertical position of block from origin (0, 0)
1553  * @param block_w width of block
1554  * @param block_h height of block
1555  */
1556 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1557                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1558                       int x_off, int y_off, int block_w, int block_h)
1559 {
1560     HEVCLocalContext *lc = &s->HEVClc;
1561     uint8_t *src1        = ref->data[1];
1562     uint8_t *src2        = ref->data[2];
1563     ptrdiff_t src1stride = ref->linesize[1];
1564     ptrdiff_t src2stride = ref->linesize[2];
1565     int pic_width        = s->sps->width >> 1;
1566     int pic_height       = s->sps->height >> 1;
1567
1568     int mx = mv->x & 7;
1569     int my = mv->y & 7;
1570
1571     x_off += mv->x >> 3;
1572     y_off += mv->y >> 3;
1573     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1574     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1575
1576     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1577         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1578         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1579         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1580         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1581         int buf_offset1 = EPEL_EXTRA_BEFORE *
1582                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1583         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1584         int buf_offset2 = EPEL_EXTRA_BEFORE *
1585                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1586
1587         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1588                                  edge_emu_stride, src1stride,
1589                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1590                                  x_off - EPEL_EXTRA_BEFORE,
1591                                  y_off - EPEL_EXTRA_BEFORE,
1592                                  pic_width, pic_height);
1593
1594         src1 = lc->edge_emu_buffer + buf_offset1;
1595         src1stride = edge_emu_stride;
1596         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1597                                              block_w, block_h, mx, my, lc->mc_buffer);
1598
1599         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1600                                  edge_emu_stride, src2stride,
1601                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1602                                  x_off - EPEL_EXTRA_BEFORE,
1603                                  y_off - EPEL_EXTRA_BEFORE,
1604                                  pic_width, pic_height);
1605         src2 = lc->edge_emu_buffer + buf_offset2;
1606         src2stride = edge_emu_stride;
1607
1608         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1609                                              block_w, block_h, mx, my,
1610                                              lc->mc_buffer);
1611     } else {
1612         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1613                                              block_w, block_h, mx, my,
1614                                              lc->mc_buffer);
1615         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1616                                              block_w, block_h, mx, my,
1617                                              lc->mc_buffer);
1618     }
1619 }
1620
1621 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1622                                 const Mv *mv, int y0, int height)
1623 {
1624     int y = (mv->y >> 2) + y0 + height + 9;
1625     ff_thread_await_progress(&ref->tf, y, 0);
1626 }
1627
1628 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1629                                 int nPbW, int nPbH,
1630                                 int log2_cb_size, int partIdx)
1631 {
1632 #define POS(c_idx, x, y)                                                              \
1633     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1634                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1635     HEVCLocalContext *lc = &s->HEVClc;
1636     int merge_idx = 0;
1637     struct MvField current_mv = {{{ 0 }}};
1638
1639     int min_pu_width = s->sps->min_pu_width;
1640
1641     MvField *tab_mvf = s->ref->tab_mvf;
1642     RefPicList  *refPicList = s->ref->refPicList;
1643     HEVCFrame *ref0, *ref1;
1644
1645     int tmpstride = MAX_PB_SIZE;
1646
1647     uint8_t *dst0 = POS(0, x0, y0);
1648     uint8_t *dst1 = POS(1, x0, y0);
1649     uint8_t *dst2 = POS(2, x0, y0);
1650     int log2_min_cb_size = s->sps->log2_min_cb_size;
1651     int min_cb_width     = s->sps->min_cb_width;
1652     int x_cb             = x0 >> log2_min_cb_size;
1653     int y_cb             = y0 >> log2_min_cb_size;
1654     int ref_idx[2];
1655     int mvp_flag[2];
1656     int x_pu, y_pu;
1657     int i, j;
1658
1659     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1660         if (s->sh.max_num_merge_cand > 1)
1661             merge_idx = ff_hevc_merge_idx_decode(s);
1662         else
1663             merge_idx = 0;
1664
1665         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1666                                    1 << log2_cb_size,
1667                                    1 << log2_cb_size,
1668                                    log2_cb_size, partIdx,
1669                                    merge_idx, &current_mv);
1670         x_pu = x0 >> s->sps->log2_min_pu_size;
1671         y_pu = y0 >> s->sps->log2_min_pu_size;
1672
1673         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1674             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1675                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1676     } else { /* MODE_INTER */
1677         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1678         if (lc->pu.merge_flag) {
1679             if (s->sh.max_num_merge_cand > 1)
1680                 merge_idx = ff_hevc_merge_idx_decode(s);
1681             else
1682                 merge_idx = 0;
1683
1684             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1685                                        partIdx, merge_idx, &current_mv);
1686             x_pu = x0 >> s->sps->log2_min_pu_size;
1687             y_pu = y0 >> s->sps->log2_min_pu_size;
1688
1689             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1690                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1691                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1692         } else {
1693             enum InterPredIdc inter_pred_idc = PRED_L0;
1694             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1695             if (s->sh.slice_type == B_SLICE)
1696                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1697
1698             if (inter_pred_idc != PRED_L1) {
1699                 if (s->sh.nb_refs[L0]) {
1700                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1701                     current_mv.ref_idx[0] = ref_idx[0];
1702                 }
1703                 current_mv.pred_flag[0] = 1;
1704                 hls_mvd_coding(s, x0, y0, 0);
1705                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1706                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1707                                          partIdx, merge_idx, &current_mv,
1708                                          mvp_flag[0], 0);
1709                 current_mv.mv[0].x += lc->pu.mvd.x;
1710                 current_mv.mv[0].y += lc->pu.mvd.y;
1711             }
1712
1713             if (inter_pred_idc != PRED_L0) {
1714                 if (s->sh.nb_refs[L1]) {
1715                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1716                     current_mv.ref_idx[1] = ref_idx[1];
1717                 }
1718
1719                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1720                     lc->pu.mvd.x = 0;
1721                     lc->pu.mvd.y = 0;
1722                 } else {
1723                     hls_mvd_coding(s, x0, y0, 1);
1724                 }
1725
1726                 current_mv.pred_flag[1] = 1;
1727                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1728                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1729                                          partIdx, merge_idx, &current_mv,
1730                                          mvp_flag[1], 1);
1731                 current_mv.mv[1].x += lc->pu.mvd.x;
1732                 current_mv.mv[1].y += lc->pu.mvd.y;
1733             }
1734
1735             x_pu = x0 >> s->sps->log2_min_pu_size;
1736             y_pu = y0 >> s->sps->log2_min_pu_size;
1737
1738             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1739                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1740                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1741         }
1742     }
1743
1744     if (current_mv.pred_flag[0]) {
1745         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1746         if (!ref0)
1747             return;
1748         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1749     }
1750     if (current_mv.pred_flag[1]) {
1751         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1752         if (!ref1)
1753             return;
1754         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1755     }
1756
1757     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1758         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1759         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1760
1761         luma_mc(s, tmp, tmpstride, ref0->frame,
1762                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1763
1764         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1765             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1766             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1767                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1768                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1769                                      dst0, s->frame->linesize[0], tmp,
1770                                      tmpstride, nPbW, nPbH);
1771         } else {
1772             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1773         }
1774         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1775                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1776
1777         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1778             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1779             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1780                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1781                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1782                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1783                                      nPbW / 2, nPbH / 2);
1784             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1785                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1786                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1787                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1788                                      nPbW / 2, nPbH / 2);
1789         } else {
1790             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1791             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1792         }
1793     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1794         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1795         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1796
1797         if (!ref1)
1798             return;
1799
1800         luma_mc(s, tmp, tmpstride, ref1->frame,
1801                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1802
1803         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1804             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1805             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1806                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1807                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1808                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1809                                       nPbW, nPbH);
1810         } else {
1811             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1812         }
1813
1814         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1815                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1816
1817         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1818             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1819             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1820                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1821                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1822                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1823             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1824                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1825                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1826                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1827         } else {
1828             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1829             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1830         }
1831     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1832         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1833         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1834         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1835         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1836         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1837         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1838
1839         if (!ref0 || !ref1)
1840             return;
1841
1842         luma_mc(s, tmp, tmpstride, ref0->frame,
1843                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1844         luma_mc(s, tmp2, tmpstride, ref1->frame,
1845                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1846
1847         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1848             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1849             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1850                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1851                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1852                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1853                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1854                                          dst0, s->frame->linesize[0],
1855                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1856         } else {
1857             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1858                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1859         }
1860
1861         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1862                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1863         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1864                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1865
1866         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1867             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1868             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1869                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1870                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1871                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1872                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1873                                          dst1, s->frame->linesize[1], tmp, tmp3,
1874                                          tmpstride, nPbW / 2, nPbH / 2);
1875             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1876                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1877                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1878                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1879                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1880                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1881                                          tmpstride, nPbW / 2, nPbH / 2);
1882         } else {
1883             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1884             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1885         }
1886     }
1887 }
1888
1889 /**
1890  * 8.4.1
1891  */
1892 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1893                                 int prev_intra_luma_pred_flag)
1894 {
1895     HEVCLocalContext *lc = &s->HEVClc;
1896     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1897     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1898     int min_pu_width     = s->sps->min_pu_width;
1899     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1900     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1901     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1902
1903     int cand_up   = (lc->ctb_up_flag || y0b) ?
1904                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1905     int cand_left = (lc->ctb_left_flag || x0b) ?
1906                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1907
1908     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1909
1910     MvField *tab_mvf = s->ref->tab_mvf;
1911     int intra_pred_mode;
1912     int candidate[3];
1913     int i, j;
1914
1915     // intra_pred_mode prediction does not cross vertical CTB boundaries
1916     if ((y0 - 1) < y_ctb)
1917         cand_up = INTRA_DC;
1918
1919     if (cand_left == cand_up) {
1920         if (cand_left < 2) {
1921             candidate[0] = INTRA_PLANAR;
1922             candidate[1] = INTRA_DC;
1923             candidate[2] = INTRA_ANGULAR_26;
1924         } else {
1925             candidate[0] = cand_left;
1926             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1927             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1928         }
1929     } else {
1930         candidate[0] = cand_left;
1931         candidate[1] = cand_up;
1932         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1933             candidate[2] = INTRA_PLANAR;
1934         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1935             candidate[2] = INTRA_DC;
1936         } else {
1937             candidate[2] = INTRA_ANGULAR_26;
1938         }
1939     }
1940
1941     if (prev_intra_luma_pred_flag) {
1942         intra_pred_mode = candidate[lc->pu.mpm_idx];
1943     } else {
1944         if (candidate[0] > candidate[1])
1945             FFSWAP(uint8_t, candidate[0], candidate[1]);
1946         if (candidate[0] > candidate[2])
1947             FFSWAP(uint8_t, candidate[0], candidate[2]);
1948         if (candidate[1] > candidate[2])
1949             FFSWAP(uint8_t, candidate[1], candidate[2]);
1950
1951         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1952         for (i = 0; i < 3; i++)
1953             if (intra_pred_mode >= candidate[i])
1954                 intra_pred_mode++;
1955     }
1956
1957     /* write the intra prediction units into the mv array */
1958     if (!size_in_pus)
1959         size_in_pus = 1;
1960     for (i = 0; i < size_in_pus; i++) {
1961         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1962                intra_pred_mode, size_in_pus);
1963
1964         for (j = 0; j < size_in_pus; j++) {
1965             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1966             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1967             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1969             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1970             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1971             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1972             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1973             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1974         }
1975     }
1976
1977     return intra_pred_mode;
1978 }
1979
1980 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1981                                           int log2_cb_size, int ct_depth)
1982 {
1983     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1984     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1985     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1986     int y;
1987
1988     for (y = 0; y < length; y++)
1989         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1990                ct_depth, length);
1991 }
1992
1993 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1994                                   int log2_cb_size)
1995 {
1996     HEVCLocalContext *lc = &s->HEVClc;
1997     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1998     uint8_t prev_intra_luma_pred_flag[4];
1999     int split   = lc->cu.part_mode == PART_NxN;
2000     int pb_size = (1 << log2_cb_size) >> split;
2001     int side    = split + 1;
2002     int chroma_mode;
2003     int i, j;
2004
2005     for (i = 0; i < side; i++)
2006         for (j = 0; j < side; j++)
2007             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2008
2009     for (i = 0; i < side; i++) {
2010         for (j = 0; j < side; j++) {
2011             if (prev_intra_luma_pred_flag[2 * i + j])
2012                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2013             else
2014                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2015
2016             lc->pu.intra_pred_mode[2 * i + j] =
2017                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2018                                      prev_intra_luma_pred_flag[2 * i + j]);
2019         }
2020     }
2021
2022     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2023     if (chroma_mode != 4) {
2024         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2025             lc->pu.intra_pred_mode_c = 34;
2026         else
2027             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2028     } else {
2029         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2030     }
2031 }
2032
2033 static void intra_prediction_unit_default_value(HEVCContext *s,
2034                                                 int x0, int y0,
2035                                                 int log2_cb_size)
2036 {
2037     HEVCLocalContext *lc = &s->HEVClc;
2038     int pb_size          = 1 << log2_cb_size;
2039     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2040     int min_pu_width     = s->sps->min_pu_width;
2041     MvField *tab_mvf     = s->ref->tab_mvf;
2042     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2043     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2044     int j, k;
2045
2046     if (size_in_pus == 0)
2047         size_in_pus = 1;
2048     for (j = 0; j < size_in_pus; j++) {
2049         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2050         for (k = 0; k < size_in_pus; k++)
2051             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2052     }
2053 }
2054
2055 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2056 {
2057     int cb_size          = 1 << log2_cb_size;
2058     HEVCLocalContext *lc = &s->HEVClc;
2059     int log2_min_cb_size = s->sps->log2_min_cb_size;
2060     int length           = cb_size >> log2_min_cb_size;
2061     int min_cb_width     = s->sps->min_cb_width;
2062     int x_cb             = x0 >> log2_min_cb_size;
2063     int y_cb             = y0 >> log2_min_cb_size;
2064     int x, y, ret;
2065
2066     lc->cu.x                = x0;
2067     lc->cu.y                = y0;
2068     lc->cu.rqt_root_cbf     = 1;
2069     lc->cu.pred_mode        = MODE_INTRA;
2070     lc->cu.part_mode        = PART_2Nx2N;
2071     lc->cu.intra_split_flag = 0;
2072     lc->cu.pcm_flag         = 0;
2073
2074     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2075     for (x = 0; x < 4; x++)
2076         lc->pu.intra_pred_mode[x] = 1;
2077     if (s->pps->transquant_bypass_enable_flag) {
2078         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2079         if (lc->cu.cu_transquant_bypass_flag)
2080             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2081     } else
2082         lc->cu.cu_transquant_bypass_flag = 0;
2083
2084     if (s->sh.slice_type != I_SLICE) {
2085         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2086
2087         lc->cu.pred_mode = MODE_SKIP;
2088         x = y_cb * min_cb_width + x_cb;
2089         for (y = 0; y < length; y++) {
2090             memset(&s->skip_flag[x], skip_flag, length);
2091             x += min_cb_width;
2092         }
2093         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2094     }
2095
2096     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2097         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2098         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2099
2100         if (!s->sh.disable_deblocking_filter_flag)
2101             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2102                                                   lc->slice_or_tiles_up_boundary,
2103                                                   lc->slice_or_tiles_left_boundary);
2104     } else {
2105         if (s->sh.slice_type != I_SLICE)
2106             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2107         if (lc->cu.pred_mode != MODE_INTRA ||
2108             log2_cb_size == s->sps->log2_min_cb_size) {
2109             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2110             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2111                                       lc->cu.pred_mode == MODE_INTRA;
2112         }
2113
2114         if (lc->cu.pred_mode == MODE_INTRA) {
2115             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2116                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2117                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2118                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2119             }
2120             if (lc->cu.pcm_flag) {
2121                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2122                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2123                 if (s->sps->pcm.loop_filter_disable_flag)
2124                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2125
2126                 if (ret < 0)
2127                     return ret;
2128             } else {
2129                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2130             }
2131         } else {
2132             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2133             switch (lc->cu.part_mode) {
2134             case PART_2Nx2N:
2135                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2136                 break;
2137             case PART_2NxN:
2138                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2139                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2140                 break;
2141             case PART_Nx2N:
2142                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2143                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2144                 break;
2145             case PART_2NxnU:
2146                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2147                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2148                 break;
2149             case PART_2NxnD:
2150                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2151                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2152                 break;
2153             case PART_nLx2N:
2154                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2155                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2156                 break;
2157             case PART_nRx2N:
2158                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2159                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2160                 break;
2161             case PART_NxN:
2162                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2163                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2164                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2165                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2166                 break;
2167             }
2168         }
2169
2170         if (!lc->cu.pcm_flag) {
2171             if (lc->cu.pred_mode != MODE_INTRA &&
2172                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2173                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2174             }
2175             if (lc->cu.rqt_root_cbf) {
2176                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2177                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2178                                          s->sps->max_transform_hierarchy_depth_inter;
2179                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2180                                          log2_cb_size,
2181                                          log2_cb_size, 0, 0, 0, 0);
2182                 if (ret < 0)
2183                     return ret;
2184             } else {
2185                 if (!s->sh.disable_deblocking_filter_flag)
2186                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2187                                                           lc->slice_or_tiles_up_boundary,
2188                                                           lc->slice_or_tiles_left_boundary);
2189             }
2190         }
2191     }
2192
2193     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2194         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2195
2196     x = y_cb * min_cb_width + x_cb;
2197     for (y = 0; y < length; y++) {
2198         memset(&s->qp_y_tab[x], lc->qp_y, length);
2199         x += min_cb_width;
2200     }
2201
2202     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2203
2204     return 0;
2205 }
2206
2207 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2208                                int log2_cb_size, int cb_depth)
2209 {
2210     HEVCLocalContext *lc = &s->HEVClc;
2211     const int cb_size    = 1 << log2_cb_size;
2212     int split_cu;
2213
2214     lc->ct.depth = cb_depth;
2215     if (x0 + cb_size <= s->sps->width  &&
2216         y0 + cb_size <= s->sps->height &&
2217         log2_cb_size > s->sps->log2_min_cb_size) {
2218         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2219     } else {
2220         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2221     }
2222     if (s->pps->cu_qp_delta_enabled_flag &&
2223         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2224         lc->tu.is_cu_qp_delta_coded = 0;
2225         lc->tu.cu_qp_delta          = 0;
2226     }
2227
2228     if (split_cu) {
2229         const int cb_size_split = cb_size >> 1;
2230         const int x1 = x0 + cb_size_split;
2231         const int y1 = y0 + cb_size_split;
2232
2233         log2_cb_size--;
2234         cb_depth++;
2235
2236 #define SUBDIVIDE(x, y)                                                \
2237 do {                                                                   \
2238     if (x < s->sps->width && y < s->sps->height) {                     \
2239         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2240         if (ret < 0)                                                   \
2241             return ret;                                                \
2242     }                                                                  \
2243 } while (0)
2244
2245         SUBDIVIDE(x0, y0);
2246         SUBDIVIDE(x1, y0);
2247         SUBDIVIDE(x0, y1);
2248         SUBDIVIDE(x1, y1);
2249     } else {
2250         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2251         if (ret < 0)
2252             return ret;
2253     }
2254
2255     return 0;
2256 }
2257
2258 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2259                                  int ctb_addr_ts)
2260 {
2261     HEVCLocalContext *lc  = &s->HEVClc;
2262     int ctb_size          = 1 << s->sps->log2_ctb_size;
2263     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2264     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2265
2266     int tile_left_boundary, tile_up_boundary;
2267     int slice_left_boundary, slice_up_boundary;
2268
2269     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2270
2271     if (s->pps->entropy_coding_sync_enabled_flag) {
2272         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2273             lc->first_qp_group = 1;
2274         lc->end_of_tiles_x = s->sps->width;
2275     } else if (s->pps->tiles_enabled_flag) {
2276         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2277             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2278             lc->start_of_tiles_x = x_ctb;
2279             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2280             lc->first_qp_group   = 1;
2281         }
2282     } else {
2283         lc->end_of_tiles_x = s->sps->width;
2284     }
2285
2286     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2287
2288     if (s->pps->tiles_enabled_flag) {
2289         tile_left_boundary  = x_ctb > 0 &&
2290                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2291         slice_left_boundary = x_ctb > 0 &&
2292                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2293         tile_up_boundary  = y_ctb > 0 &&
2294                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2295         slice_up_boundary = y_ctb > 0 &&
2296                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2297     } else {
2298         tile_left_boundary  =
2299         tile_up_boundary    = 1;
2300         slice_left_boundary = ctb_addr_in_slice > 0;
2301         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2302     }
2303     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2304     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2305     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2306     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2307     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2308     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2309 }
2310
2311 static int hls_slice_data(HEVCContext *s)
2312 {
2313     int ctb_size    = 1 << s->sps->log2_ctb_size;
2314     int more_data   = 1;
2315     int x_ctb       = 0;
2316     int y_ctb       = 0;
2317     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2318     int ret;
2319
2320     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2321         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2322
2323         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2324         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2325         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2326
2327         ff_hevc_cabac_init(s, ctb_addr_ts);
2328
2329         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2330
2331         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2332         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2333         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2334
2335         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2336         if (ret < 0)
2337             return ret;
2338         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2339
2340         ctb_addr_ts++;
2341         ff_hevc_save_states(s, ctb_addr_ts);
2342         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2343     }
2344
2345     if (x_ctb + ctb_size >= s->sps->width &&
2346         y_ctb + ctb_size >= s->sps->height)
2347         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2348
2349     return ctb_addr_ts;
2350 }
2351
2352 /**
2353  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2354  * 0 if the unit should be skipped, 1 otherwise
2355  */
2356 static int hls_nal_unit(HEVCContext *s)
2357 {
2358     GetBitContext *gb = &s->HEVClc.gb;
2359     int nuh_layer_id;
2360
2361     if (get_bits1(gb) != 0)
2362         return AVERROR_INVALIDDATA;
2363
2364     s->nal_unit_type = get_bits(gb, 6);
2365
2366     nuh_layer_id   = get_bits(gb, 6);
2367     s->temporal_id = get_bits(gb, 3) - 1;
2368     if (s->temporal_id < 0)
2369         return AVERROR_INVALIDDATA;
2370
2371     av_log(s->avctx, AV_LOG_DEBUG,
2372            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2373            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2374
2375     return nuh_layer_id == 0;
2376 }
2377
2378 static void restore_tqb_pixels(HEVCContext *s)
2379 {
2380     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2381     int x, y, c_idx;
2382
2383     for (c_idx = 0; c_idx < 3; c_idx++) {
2384         ptrdiff_t stride = s->frame->linesize[c_idx];
2385         int hshift       = s->sps->hshift[c_idx];
2386         int vshift       = s->sps->vshift[c_idx];
2387         for (y = 0; y < s->sps->min_pu_height; y++) {
2388             for (x = 0; x < s->sps->min_pu_width; x++) {
2389                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2390                     int n;
2391                     int len      = min_pu_size >> hshift;
2392                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2393                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2394                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2395                         memcpy(dst, src, len);
2396                         src += stride;
2397                         dst += stride;
2398                     }
2399                 }
2400             }
2401         }
2402     }
2403 }
2404
2405 static int set_side_data(HEVCContext *s)
2406 {
2407     AVFrame *out = s->ref->frame;
2408
2409     if (s->sei_frame_packing_present &&
2410         s->frame_packing_arrangement_type >= 3 &&
2411         s->frame_packing_arrangement_type <= 5 &&
2412         s->content_interpretation_type > 0 &&
2413         s->content_interpretation_type < 3) {
2414         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2415         if (!stereo)
2416             return AVERROR(ENOMEM);
2417
2418         switch (s->frame_packing_arrangement_type) {
2419         case 3:
2420             if (s->quincunx_subsampling)
2421                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2422             else
2423                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2424             break;
2425         case 4:
2426             stereo->type = AV_STEREO3D_TOPBOTTOM;
2427             break;
2428         case 5:
2429             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2430             break;
2431         }
2432
2433         if (s->content_interpretation_type == 2)
2434             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2435     }
2436
2437     if (s->sei_display_orientation_present &&
2438         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2439         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2440         AVFrameSideData *rotation = av_frame_new_side_data(out,
2441                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2442                                                            sizeof(int32_t) * 9);
2443         if (!rotation)
2444             return AVERROR(ENOMEM);
2445
2446         av_display_rotation_set((int32_t *)rotation->data, angle);
2447         av_display_matrix_flip((int32_t *)rotation->data,
2448                                s->sei_vflip, s->sei_hflip);
2449     }
2450
2451     return 0;
2452 }
2453
2454 static int hevc_frame_start(HEVCContext *s)
2455 {
2456     HEVCLocalContext *lc = &s->HEVClc;
2457     int ret;
2458
2459     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2460     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2461     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2462     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2463
2464     lc->start_of_tiles_x = 0;
2465     s->is_decoded        = 0;
2466     s->first_nal_type    = s->nal_unit_type;
2467
2468     if (s->pps->tiles_enabled_flag)
2469         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2470
2471     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2472                               s->poc);
2473     if (ret < 0)
2474         goto fail;
2475
2476     ret = ff_hevc_frame_rps(s);
2477     if (ret < 0) {
2478         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2479         goto fail;
2480     }
2481
2482     s->ref->frame->key_frame = IS_IRAP(s);
2483
2484     ret = set_side_data(s);
2485     if (ret < 0)
2486         goto fail;
2487
2488     av_frame_unref(s->output_frame);
2489     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2490     if (ret < 0)
2491         goto fail;
2492
2493     ff_thread_finish_setup(s->avctx);
2494
2495     return 0;
2496
2497 fail:
2498     if (s->ref)
2499         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2500     s->ref = NULL;
2501     return ret;
2502 }
2503
2504 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2505 {
2506     HEVCLocalContext *lc = &s->HEVClc;
2507     GetBitContext *gb    = &lc->gb;
2508     int ctb_addr_ts, ret;
2509
2510     ret = init_get_bits8(gb, nal, length);
2511     if (ret < 0)
2512         return ret;
2513
2514     ret = hls_nal_unit(s);
2515     if (ret < 0) {
2516         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2517                s->nal_unit_type);
2518         goto fail;
2519     } else if (!ret)
2520         return 0;
2521
2522     switch (s->nal_unit_type) {
2523     case NAL_VPS:
2524         ret = ff_hevc_decode_nal_vps(s);
2525         if (ret < 0)
2526             goto fail;
2527         break;
2528     case NAL_SPS:
2529         ret = ff_hevc_decode_nal_sps(s);
2530         if (ret < 0)
2531             goto fail;
2532         break;
2533     case NAL_PPS:
2534         ret = ff_hevc_decode_nal_pps(s);
2535         if (ret < 0)
2536             goto fail;
2537         break;
2538     case NAL_SEI_PREFIX:
2539     case NAL_SEI_SUFFIX:
2540         ret = ff_hevc_decode_nal_sei(s);
2541         if (ret < 0)
2542             goto fail;
2543         break;
2544     case NAL_TRAIL_R:
2545     case NAL_TRAIL_N:
2546     case NAL_TSA_N:
2547     case NAL_TSA_R:
2548     case NAL_STSA_N:
2549     case NAL_STSA_R:
2550     case NAL_BLA_W_LP:
2551     case NAL_BLA_W_RADL:
2552     case NAL_BLA_N_LP:
2553     case NAL_IDR_W_RADL:
2554     case NAL_IDR_N_LP:
2555     case NAL_CRA_NUT:
2556     case NAL_RADL_N:
2557     case NAL_RADL_R:
2558     case NAL_RASL_N:
2559     case NAL_RASL_R:
2560         ret = hls_slice_header(s);
2561         if (ret < 0)
2562             return ret;
2563
2564         if (s->max_ra == INT_MAX) {
2565             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2566                 s->max_ra = s->poc;
2567             } else {
2568                 if (IS_IDR(s))
2569                     s->max_ra = INT_MIN;
2570             }
2571         }
2572
2573         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2574             s->poc <= s->max_ra) {
2575             s->is_decoded = 0;
2576             break;
2577         } else {
2578             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2579                 s->max_ra = INT_MIN;
2580         }
2581
2582         if (s->sh.first_slice_in_pic_flag) {
2583             ret = hevc_frame_start(s);
2584             if (ret < 0)
2585                 return ret;
2586         } else if (!s->ref) {
2587             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2588             goto fail;
2589         }
2590
2591         if (s->nal_unit_type != s->first_nal_type) {
2592             av_log(s->avctx, AV_LOG_ERROR,
2593                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2594                    s->first_nal_type, s->nal_unit_type);
2595             return AVERROR_INVALIDDATA;
2596         }
2597
2598         if (!s->sh.dependent_slice_segment_flag &&
2599             s->sh.slice_type != I_SLICE) {
2600             ret = ff_hevc_slice_rpl(s);
2601             if (ret < 0) {
2602                 av_log(s->avctx, AV_LOG_WARNING,
2603                        "Error constructing the reference lists for the current slice.\n");
2604                 goto fail;
2605             }
2606         }
2607
2608         ctb_addr_ts = hls_slice_data(s);
2609         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2610             s->is_decoded = 1;
2611             if ((s->pps->transquant_bypass_enable_flag ||
2612                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2613                 s->sps->sao_enabled)
2614                 restore_tqb_pixels(s);
2615         }
2616
2617         if (ctb_addr_ts < 0) {
2618             ret = ctb_addr_ts;
2619             goto fail;
2620         }
2621         break;
2622     case NAL_EOS_NUT:
2623     case NAL_EOB_NUT:
2624         s->seq_decode = (s->seq_decode + 1) & 0xff;
2625         s->max_ra     = INT_MAX;
2626         break;
2627     case NAL_AUD:
2628     case NAL_FD_NUT:
2629         break;
2630     default:
2631         av_log(s->avctx, AV_LOG_INFO,
2632                "Skipping NAL unit %d\n", s->nal_unit_type);
2633     }
2634
2635     return 0;
2636 fail:
2637     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2638         return ret;
2639     return 0;
2640 }
2641
2642 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2643  * between these functions would be nice. */
2644 static int extract_rbsp(const uint8_t *src, int length,
2645                         HEVCNAL *nal)
2646 {
2647     int i, si, di;
2648     uint8_t *dst;
2649
2650 #define STARTCODE_TEST                                                  \
2651         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2652             if (src[i + 2] != 3) {                                      \
2653                 /* startcode, so we must be past the end */             \
2654                 length = i;                                             \
2655             }                                                           \
2656             break;                                                      \
2657         }
2658 #if HAVE_FAST_UNALIGNED
2659 #define FIND_FIRST_ZERO                                                 \
2660         if (i > 0 && !src[i])                                           \
2661             i--;                                                        \
2662         while (src[i])                                                  \
2663             i++
2664 #if HAVE_FAST_64BIT
2665     for (i = 0; i + 1 < length; i += 9) {
2666         if (!((~AV_RN64A(src + i) &
2667                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2668               0x8000800080008080ULL))
2669             continue;
2670         FIND_FIRST_ZERO;
2671         STARTCODE_TEST;
2672         i -= 7;
2673     }
2674 #else
2675     for (i = 0; i + 1 < length; i += 5) {
2676         if (!((~AV_RN32A(src + i) &
2677                (AV_RN32A(src + i) - 0x01000101U)) &
2678               0x80008080U))
2679             continue;
2680         FIND_FIRST_ZERO;
2681         STARTCODE_TEST;
2682         i -= 3;
2683     }
2684 #endif /* HAVE_FAST_64BIT */
2685 #else
2686     for (i = 0; i + 1 < length; i += 2) {
2687         if (src[i])
2688             continue;
2689         if (i > 0 && src[i - 1] == 0)
2690             i--;
2691         STARTCODE_TEST;
2692     }
2693 #endif /* HAVE_FAST_UNALIGNED */
2694
2695     if (i >= length - 1) { // no escaped 0
2696         nal->data = src;
2697         nal->size = length;
2698         return length;
2699     }
2700
2701     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2702                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2703     if (!nal->rbsp_buffer)
2704         return AVERROR(ENOMEM);
2705
2706     dst = nal->rbsp_buffer;
2707
2708     memcpy(dst, src, i);
2709     si = di = i;
2710     while (si + 2 < length) {
2711         // remove escapes (very rare 1:2^22)
2712         if (src[si + 2] > 3) {
2713             dst[di++] = src[si++];
2714             dst[di++] = src[si++];
2715         } else if (src[si] == 0 && src[si + 1] == 0) {
2716             if (src[si + 2] == 3) { // escape
2717                 dst[di++] = 0;
2718                 dst[di++] = 0;
2719                 si       += 3;
2720
2721                 continue;
2722             } else // next start code
2723                 goto nsc;
2724         }
2725
2726         dst[di++] = src[si++];
2727     }
2728     while (si < length)
2729         dst[di++] = src[si++];
2730
2731 nsc:
2732     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2733
2734     nal->data = dst;
2735     nal->size = di;
2736     return si;
2737 }
2738
2739 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2740 {
2741     int i, consumed, ret = 0;
2742
2743     s->ref = NULL;
2744     s->eos = 0;
2745
2746     /* split the input packet into NAL units, so we know the upper bound on the
2747      * number of slices in the frame */
2748     s->nb_nals = 0;
2749     while (length >= 4) {
2750         HEVCNAL *nal;
2751         int extract_length = 0;
2752
2753         if (s->is_nalff) {
2754             int i;
2755             for (i = 0; i < s->nal_length_size; i++)
2756                 extract_length = (extract_length << 8) | buf[i];
2757             buf    += s->nal_length_size;
2758             length -= s->nal_length_size;
2759
2760             if (extract_length > length) {
2761                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2762                 ret = AVERROR_INVALIDDATA;
2763                 goto fail;
2764             }
2765         } else {
2766             if (buf[2] == 0) {
2767                 length--;
2768                 buf++;
2769                 continue;
2770             }
2771             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2772                 ret = AVERROR_INVALIDDATA;
2773                 goto fail;
2774             }
2775
2776             buf           += 3;
2777             length        -= 3;
2778             extract_length = length;
2779         }
2780
2781         if (s->nals_allocated < s->nb_nals + 1) {
2782             int new_size = s->nals_allocated + 1;
2783             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2784             if (!tmp) {
2785                 ret = AVERROR(ENOMEM);
2786                 goto fail;
2787             }
2788             s->nals = tmp;
2789             memset(s->nals + s->nals_allocated, 0,
2790                    (new_size - s->nals_allocated) * sizeof(*tmp));
2791             s->nals_allocated = new_size;
2792         }
2793         nal = &s->nals[s->nb_nals++];
2794
2795         consumed = extract_rbsp(buf, extract_length, nal);
2796         if (consumed < 0) {
2797             ret = consumed;
2798             goto fail;
2799         }
2800
2801         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2802         if (ret < 0)
2803             goto fail;
2804         hls_nal_unit(s);
2805
2806         if (s->nal_unit_type == NAL_EOB_NUT ||
2807             s->nal_unit_type == NAL_EOS_NUT)
2808             s->eos = 1;
2809
2810         buf    += consumed;
2811         length -= consumed;
2812     }
2813
2814     /* parse the NAL units */
2815     for (i = 0; i < s->nb_nals; i++) {
2816         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2817         if (ret < 0) {
2818             av_log(s->avctx, AV_LOG_WARNING,
2819                    "Error parsing NAL unit #%d.\n", i);
2820             goto fail;
2821         }
2822     }
2823
2824 fail:
2825     if (s->ref)
2826         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2827
2828     return ret;
2829 }
2830
2831 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2832 {
2833     int i;
2834     for (i = 0; i < 16; i++)
2835         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2836 }
2837
2838 static int verify_md5(HEVCContext *s, AVFrame *frame)
2839 {
2840     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2841     int pixel_shift;
2842     int i, j;
2843
2844     if (!desc)
2845         return AVERROR(EINVAL);
2846
2847     pixel_shift = desc->comp[0].depth_minus1 > 7;
2848
2849     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2850            s->poc);
2851
2852     /* the checksums are LE, so we have to byteswap for >8bpp formats
2853      * on BE arches */
2854 #if HAVE_BIGENDIAN
2855     if (pixel_shift && !s->checksum_buf) {
2856         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2857                        FFMAX3(frame->linesize[0], frame->linesize[1],
2858                               frame->linesize[2]));
2859         if (!s->checksum_buf)
2860             return AVERROR(ENOMEM);
2861     }
2862 #endif
2863
2864     for (i = 0; frame->data[i]; i++) {
2865         int width  = s->avctx->coded_width;
2866         int height = s->avctx->coded_height;
2867         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2868         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2869         uint8_t md5[16];
2870
2871         av_md5_init(s->md5_ctx);
2872         for (j = 0; j < h; j++) {
2873             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2874 #if HAVE_BIGENDIAN
2875             if (pixel_shift) {
2876                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2877                                     (const uint16_t *) src, w);
2878                 src = s->checksum_buf;
2879             }
2880 #endif
2881             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2882         }
2883         av_md5_final(s->md5_ctx, md5);
2884
2885         if (!memcmp(md5, s->md5[i], 16)) {
2886             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2887             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2888             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2889         } else {
2890             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2891             print_md5(s->avctx, AV_LOG_ERROR, md5);
2892             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2893             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2894             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2895             return AVERROR_INVALIDDATA;
2896         }
2897     }
2898
2899     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2900
2901     return 0;
2902 }
2903
2904 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2905                              AVPacket *avpkt)
2906 {
2907     int ret;
2908     HEVCContext *s = avctx->priv_data;
2909
2910     if (!avpkt->size) {
2911         ret = ff_hevc_output_frame(s, data, 1);
2912         if (ret < 0)
2913             return ret;
2914
2915         *got_output = ret;
2916         return 0;
2917     }
2918
2919     s->ref = NULL;
2920     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2921     if (ret < 0)
2922         return ret;
2923
2924     /* verify the SEI checksum */
2925     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2926         s->is_md5) {
2927         ret = verify_md5(s, s->ref->frame);
2928         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2929             ff_hevc_unref_frame(s, s->ref, ~0);
2930             return ret;
2931         }
2932     }
2933     s->is_md5 = 0;
2934
2935     if (s->is_decoded) {
2936         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2937         s->is_decoded = 0;
2938     }
2939
2940     if (s->output_frame->buf[0]) {
2941         av_frame_move_ref(data, s->output_frame);
2942         *got_output = 1;
2943     }
2944
2945     return avpkt->size;
2946 }
2947
2948 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2949 {
2950     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2951     if (ret < 0)
2952         return ret;
2953
2954     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2955     if (!dst->tab_mvf_buf)
2956         goto fail;
2957     dst->tab_mvf = src->tab_mvf;
2958
2959     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2960     if (!dst->rpl_tab_buf)
2961         goto fail;
2962     dst->rpl_tab = src->rpl_tab;
2963
2964     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2965     if (!dst->rpl_buf)
2966         goto fail;
2967
2968     dst->poc        = src->poc;
2969     dst->ctb_count  = src->ctb_count;
2970     dst->window     = src->window;
2971     dst->flags      = src->flags;
2972     dst->sequence   = src->sequence;
2973
2974     return 0;
2975 fail:
2976     ff_hevc_unref_frame(s, dst, ~0);
2977     return AVERROR(ENOMEM);
2978 }
2979
2980 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2981 {
2982     HEVCContext       *s = avctx->priv_data;
2983     int i;
2984
2985     pic_arrays_free(s);
2986
2987     av_freep(&s->md5_ctx);
2988
2989     av_frame_free(&s->tmp_frame);
2990     av_frame_free(&s->output_frame);
2991
2992     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2993         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2994         av_frame_free(&s->DPB[i].frame);
2995     }
2996
2997     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2998         av_buffer_unref(&s->vps_list[i]);
2999     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3000         av_buffer_unref(&s->sps_list[i]);
3001     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3002         av_buffer_unref(&s->pps_list[i]);
3003
3004     for (i = 0; i < s->nals_allocated; i++)
3005         av_freep(&s->nals[i].rbsp_buffer);
3006     av_freep(&s->nals);
3007     s->nals_allocated = 0;
3008
3009     return 0;
3010 }
3011
3012 static av_cold int hevc_init_context(AVCodecContext *avctx)
3013 {
3014     HEVCContext *s = avctx->priv_data;
3015     int i;
3016
3017     s->avctx = avctx;
3018
3019     s->tmp_frame = av_frame_alloc();
3020     if (!s->tmp_frame)
3021         goto fail;
3022
3023     s->output_frame = av_frame_alloc();
3024     if (!s->output_frame)
3025         goto fail;
3026
3027     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3028         s->DPB[i].frame = av_frame_alloc();
3029         if (!s->DPB[i].frame)
3030             goto fail;
3031         s->DPB[i].tf.f = s->DPB[i].frame;
3032     }
3033
3034     s->max_ra = INT_MAX;
3035
3036     s->md5_ctx = av_md5_alloc();
3037     if (!s->md5_ctx)
3038         goto fail;
3039
3040     ff_bswapdsp_init(&s->bdsp);
3041
3042     s->context_initialized = 1;
3043
3044     return 0;
3045
3046 fail:
3047     hevc_decode_free(avctx);
3048     return AVERROR(ENOMEM);
3049 }
3050
3051 static int hevc_update_thread_context(AVCodecContext *dst,
3052                                       const AVCodecContext *src)
3053 {
3054     HEVCContext *s  = dst->priv_data;
3055     HEVCContext *s0 = src->priv_data;
3056     int i, ret;
3057
3058     if (!s->context_initialized) {
3059         ret = hevc_init_context(dst);
3060         if (ret < 0)
3061             return ret;
3062     }
3063
3064     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3065         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3066         if (s0->DPB[i].frame->buf[0]) {
3067             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3068             if (ret < 0)
3069                 return ret;
3070         }
3071     }
3072
3073     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3074         av_buffer_unref(&s->vps_list[i]);
3075         if (s0->vps_list[i]) {
3076             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3077             if (!s->vps_list[i])
3078                 return AVERROR(ENOMEM);
3079         }
3080     }
3081
3082     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3083         av_buffer_unref(&s->sps_list[i]);
3084         if (s0->sps_list[i]) {
3085             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3086             if (!s->sps_list[i])
3087                 return AVERROR(ENOMEM);
3088         }
3089     }
3090
3091     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3092         av_buffer_unref(&s->pps_list[i]);
3093         if (s0->pps_list[i]) {
3094             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3095             if (!s->pps_list[i])
3096                 return AVERROR(ENOMEM);
3097         }
3098     }
3099
3100     if (s->sps != s0->sps)
3101         ret = set_sps(s, s0->sps);
3102
3103     s->seq_decode = s0->seq_decode;
3104     s->seq_output = s0->seq_output;
3105     s->pocTid0    = s0->pocTid0;
3106     s->max_ra     = s0->max_ra;
3107
3108     s->is_nalff        = s0->is_nalff;
3109     s->nal_length_size = s0->nal_length_size;
3110
3111     if (s0->eos) {
3112         s->seq_decode = (s->seq_decode + 1) & 0xff;
3113         s->max_ra = INT_MAX;
3114     }
3115
3116     return 0;
3117 }
3118
3119 static int hevc_decode_extradata(HEVCContext *s)
3120 {
3121     AVCodecContext *avctx = s->avctx;
3122     GetByteContext gb;
3123     int ret;
3124
3125     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3126
3127     if (avctx->extradata_size > 3 &&
3128         (avctx->extradata[0] || avctx->extradata[1] ||
3129          avctx->extradata[2] > 1)) {
3130         /* It seems the extradata is encoded as hvcC format.
3131          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3132          * is finalized. When finalized, configurationVersion will be 1 and we
3133          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3134         int i, j, num_arrays, nal_len_size;
3135
3136         s->is_nalff = 1;
3137
3138         bytestream2_skip(&gb, 21);
3139         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3140         num_arrays   = bytestream2_get_byte(&gb);
3141
3142         /* nal units in the hvcC always have length coded with 2 bytes,
3143          * so put a fake nal_length_size = 2 while parsing them */
3144         s->nal_length_size = 2;
3145
3146         /* Decode nal units from hvcC. */
3147         for (i = 0; i < num_arrays; i++) {
3148             int type = bytestream2_get_byte(&gb) & 0x3f;
3149             int cnt  = bytestream2_get_be16(&gb);
3150
3151             for (j = 0; j < cnt; j++) {
3152                 // +2 for the nal size field
3153                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3154                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3155                     av_log(s->avctx, AV_LOG_ERROR,
3156                            "Invalid NAL unit size in extradata.\n");
3157                     return AVERROR_INVALIDDATA;
3158                 }
3159
3160                 ret = decode_nal_units(s, gb.buffer, nalsize);
3161                 if (ret < 0) {
3162                     av_log(avctx, AV_LOG_ERROR,
3163                            "Decoding nal unit %d %d from hvcC failed\n",
3164                            type, i);
3165                     return ret;
3166                 }
3167                 bytestream2_skip(&gb, nalsize);
3168             }
3169         }
3170
3171         /* Now store right nal length size, that will be used to parse
3172          * all other nals */
3173         s->nal_length_size = nal_len_size;
3174     } else {
3175         s->is_nalff = 0;
3176         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3177         if (ret < 0)
3178             return ret;
3179     }
3180     return 0;
3181 }
3182
3183 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3184 {
3185     HEVCContext *s = avctx->priv_data;
3186     int ret;
3187
3188     ff_init_cabac_states();
3189
3190     avctx->internal->allocate_progress = 1;
3191
3192     ret = hevc_init_context(avctx);
3193     if (ret < 0)
3194         return ret;
3195
3196     if (avctx->extradata_size > 0 && avctx->extradata) {
3197         ret = hevc_decode_extradata(s);
3198         if (ret < 0) {
3199             hevc_decode_free(avctx);
3200             return ret;
3201         }
3202     }
3203
3204     return 0;
3205 }
3206
3207 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3208 {
3209     HEVCContext *s = avctx->priv_data;
3210     int ret;
3211
3212     memset(s, 0, sizeof(*s));
3213
3214     ret = hevc_init_context(avctx);
3215     if (ret < 0)
3216         return ret;
3217
3218     return 0;
3219 }
3220
3221 static void hevc_decode_flush(AVCodecContext *avctx)
3222 {
3223     HEVCContext *s = avctx->priv_data;
3224     ff_hevc_flush_dpb(s);
3225     s->max_ra = INT_MAX;
3226 }
3227
3228 #define OFFSET(x) offsetof(HEVCContext, x)
3229 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3230
3231 static const AVProfile profiles[] = {
3232     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3233     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3234     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3235     { FF_PROFILE_UNKNOWN },
3236 };
3237
3238 static const AVOption options[] = {
3239     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3240         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3241     { NULL },
3242 };
3243
3244 static const AVClass hevc_decoder_class = {
3245     .class_name = "HEVC decoder",
3246     .item_name  = av_default_item_name,
3247     .option     = options,
3248     .version    = LIBAVUTIL_VERSION_INT,
3249 };
3250
3251 AVCodec ff_hevc_decoder = {
3252     .name                  = "hevc",
3253     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3254     .type                  = AVMEDIA_TYPE_VIDEO,
3255     .id                    = AV_CODEC_ID_HEVC,
3256     .priv_data_size        = sizeof(HEVCContext),
3257     .priv_class            = &hevc_decoder_class,
3258     .init                  = hevc_decode_init,
3259     .close                 = hevc_decode_free,
3260     .decode                = hevc_decode_frame,
3261     .flush                 = hevc_decode_flush,
3262     .update_thread_context = hevc_update_thread_context,
3263     .init_thread_copy      = hevc_init_thread_copy,
3264     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3265                              CODEC_CAP_FRAME_THREADS,
3266     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3267 };