]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: Bound check slice_qp
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/internal.h"
29 #include "libavutil/md5.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/stereo3d.h"
33
34 #include "bytestream.h"
35 #include "cabac_functions.h"
36 #include "dsputil.h"
37 #include "golomb.h"
38 #include "hevc.h"
39
40 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
41 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
42 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
43
44 static const uint8_t scan_1x1[1] = { 0 };
45
46 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
47
48 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
49
50 static const uint8_t horiz_scan4x4_x[16] = {
51     0, 1, 2, 3,
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55 };
56
57 static const uint8_t horiz_scan4x4_y[16] = {
58     0, 0, 0, 0,
59     1, 1, 1, 1,
60     2, 2, 2, 2,
61     3, 3, 3, 3,
62 };
63
64 static const uint8_t horiz_scan8x8_inv[8][8] = {
65     {  0,  1,  2,  3, 16, 17, 18, 19, },
66     {  4,  5,  6,  7, 20, 21, 22, 23, },
67     {  8,  9, 10, 11, 24, 25, 26, 27, },
68     { 12, 13, 14, 15, 28, 29, 30, 31, },
69     { 32, 33, 34, 35, 48, 49, 50, 51, },
70     { 36, 37, 38, 39, 52, 53, 54, 55, },
71     { 40, 41, 42, 43, 56, 57, 58, 59, },
72     { 44, 45, 46, 47, 60, 61, 62, 63, },
73 };
74
75 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
76
77 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
78
79 static const uint8_t diag_scan2x2_inv[2][2] = {
80     { 0, 2, },
81     { 1, 3, },
82 };
83
84 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
85     0, 0, 1, 0,
86     1, 2, 0, 1,
87     2, 3, 1, 2,
88     3, 2, 3, 3,
89 };
90
91 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
92     0, 1, 0, 2,
93     1, 0, 3, 2,
94     1, 0, 3, 2,
95     1, 3, 2, 3,
96 };
97
98 static const uint8_t diag_scan4x4_inv[4][4] = {
99     { 0,  2,  5,  9, },
100     { 1,  4,  8, 12, },
101     { 3,  7, 11, 14, },
102     { 6, 10, 13, 15, },
103 };
104
105 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
106     0, 0, 1, 0,
107     1, 2, 0, 1,
108     2, 3, 0, 1,
109     2, 3, 4, 0,
110     1, 2, 3, 4,
111     5, 0, 1, 2,
112     3, 4, 5, 6,
113     0, 1, 2, 3,
114     4, 5, 6, 7,
115     1, 2, 3, 4,
116     5, 6, 7, 2,
117     3, 4, 5, 6,
118     7, 3, 4, 5,
119     6, 7, 4, 5,
120     6, 7, 5, 6,
121     7, 6, 7, 7,
122 };
123
124 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
125     0, 1, 0, 2,
126     1, 0, 3, 2,
127     1, 0, 4, 3,
128     2, 1, 0, 5,
129     4, 3, 2, 1,
130     0, 6, 5, 4,
131     3, 2, 1, 0,
132     7, 6, 5, 4,
133     3, 2, 1, 0,
134     7, 6, 5, 4,
135     3, 2, 1, 7,
136     6, 5, 4, 3,
137     2, 7, 6, 5,
138     4, 3, 7, 6,
139     5, 4, 7, 6,
140     5, 7, 6, 7,
141 };
142
143 static const uint8_t diag_scan8x8_inv[8][8] = {
144     {  0,  2,  5,  9, 14, 20, 27, 35, },
145     {  1,  4,  8, 13, 19, 26, 34, 42, },
146     {  3,  7, 12, 18, 25, 33, 41, 48, },
147     {  6, 11, 17, 24, 32, 40, 47, 53, },
148     { 10, 16, 23, 31, 39, 46, 52, 57, },
149     { 15, 22, 30, 38, 45, 51, 56, 60, },
150     { 21, 29, 37, 44, 50, 55, 59, 62, },
151     { 28, 36, 43, 49, 54, 58, 61, 63, },
152 };
153
154 /**
155  * NOTE: Each function hls_foo correspond to the function foo in the
156  * specification (HLS stands for High Level Syntax).
157  */
158
159 /**
160  * Section 5.7
161  */
162
163 /* free everything allocated  by pic_arrays_init() */
164 static void pic_arrays_free(HEVCContext *s)
165 {
166     av_freep(&s->sao);
167     av_freep(&s->deblock);
168     av_freep(&s->split_cu_flag);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size         = width * height;
195     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
196                            ((height >> log2_min_cb_size) + 1);
197     int ctb_count        = sps->ctb_width * sps->ctb_height;
198     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
199
200     s->bs_width  = width  >> 3;
201     s->bs_height = height >> 3;
202
203     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
204     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
205     s->split_cu_flag = av_malloc(pic_size);
206     if (!s->sao || !s->deblock || !s->split_cu_flag)
207         goto fail;
208
209     s->skip_flag    = av_malloc(pic_size_in_ctb);
210     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
211     if (!s->skip_flag || !s->tab_ct_depth)
212         goto fail;
213
214     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
215     s->tab_ipm  = av_malloc(min_pu_size);
216     s->is_pcm   = av_malloc(min_pu_size);
217     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
218         goto fail;
219
220     s->filter_slice_edges = av_malloc(ctb_count);
221     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->tab_slice_address));
223     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
224                                       sizeof(*s->qp_y_tab));
225     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
226         goto fail;
227
228     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
229     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
230     if (!s->horizontal_bs || !s->vertical_bs)
231         goto fail;
232
233     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
234                                           av_buffer_alloc);
235     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
236                                           av_buffer_allocz);
237     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
238         goto fail;
239
240     return 0;
241
242 fail:
243     pic_arrays_free(s);
244     return AVERROR(ENOMEM);
245 }
246
247 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
248 {
249     int i = 0;
250     int j = 0;
251     uint8_t luma_weight_l0_flag[16];
252     uint8_t chroma_weight_l0_flag[16];
253     uint8_t luma_weight_l1_flag[16];
254     uint8_t chroma_weight_l1_flag[16];
255
256     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
257     if (s->sps->chroma_format_idc != 0) {
258         int delta = get_se_golomb(gb);
259         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
260     }
261
262     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
263         luma_weight_l0_flag[i] = get_bits1(gb);
264         if (!luma_weight_l0_flag[i]) {
265             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
266             s->sh.luma_offset_l0[i] = 0;
267         }
268     }
269     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
270         for (i = 0; i < s->sh.nb_refs[L0]; i++)
271             chroma_weight_l0_flag[i] = get_bits1(gb);
272     } else {
273         for (i = 0; i < s->sh.nb_refs[L0]; i++)
274             chroma_weight_l0_flag[i] = 0;
275     }
276     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
277         if (luma_weight_l0_flag[i]) {
278             int delta_luma_weight_l0 = get_se_golomb(gb);
279             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
280             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
281         }
282         if (chroma_weight_l0_flag[i]) {
283             for (j = 0; j < 2; j++) {
284                 int delta_chroma_weight_l0 = get_se_golomb(gb);
285                 int delta_chroma_offset_l0 = get_se_golomb(gb);
286                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
287                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
288                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
289             }
290         } else {
291             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][0] = 0;
293             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
294             s->sh.chroma_offset_l0[i][1] = 0;
295         }
296     }
297     if (s->sh.slice_type == B_SLICE) {
298         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
299             luma_weight_l1_flag[i] = get_bits1(gb);
300             if (!luma_weight_l1_flag[i]) {
301                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
302                 s->sh.luma_offset_l1[i] = 0;
303             }
304         }
305         if (s->sps->chroma_format_idc != 0) {
306             for (i = 0; i < s->sh.nb_refs[L1]; i++)
307                 chroma_weight_l1_flag[i] = get_bits1(gb);
308         } else {
309             for (i = 0; i < s->sh.nb_refs[L1]; i++)
310                 chroma_weight_l1_flag[i] = 0;
311         }
312         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
313             if (luma_weight_l1_flag[i]) {
314                 int delta_luma_weight_l1 = get_se_golomb(gb);
315                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
316                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
317             }
318             if (chroma_weight_l1_flag[i]) {
319                 for (j = 0; j < 2; j++) {
320                     int delta_chroma_weight_l1 = get_se_golomb(gb);
321                     int delta_chroma_offset_l1 = get_se_golomb(gb);
322                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
323                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
324                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
325                 }
326             } else {
327                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][0] = 0;
329                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
330                 s->sh.chroma_offset_l1[i][1] = 0;
331             }
332         }
333     }
334 }
335
336 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
337 {
338     const HEVCSPS *sps = s->sps;
339     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
340     int prev_delta_msb = 0;
341     unsigned int nb_sps = 0, nb_sh;
342     int i;
343
344     rps->nb_refs = 0;
345     if (!sps->long_term_ref_pics_present_flag)
346         return 0;
347
348     if (sps->num_long_term_ref_pics_sps > 0)
349         nb_sps = get_ue_golomb_long(gb);
350     nb_sh = get_ue_golomb_long(gb);
351
352     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
353         return AVERROR_INVALIDDATA;
354
355     rps->nb_refs = nb_sh + nb_sps;
356
357     for (i = 0; i < rps->nb_refs; i++) {
358         uint8_t delta_poc_msb_present;
359
360         if (i < nb_sps) {
361             uint8_t lt_idx_sps = 0;
362
363             if (sps->num_long_term_ref_pics_sps > 1)
364                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
365
366             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
367             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
368         } else {
369             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
370             rps->used[i] = get_bits1(gb);
371         }
372
373         delta_poc_msb_present = get_bits1(gb);
374         if (delta_poc_msb_present) {
375             int delta = get_ue_golomb_long(gb);
376
377             if (i && i != nb_sps)
378                 delta += prev_delta_msb;
379
380             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
381             prev_delta_msb = delta;
382         }
383     }
384
385     return 0;
386 }
387
388 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
389 {
390     int ret;
391     int num = 0, den = 0;
392
393     pic_arrays_free(s);
394     ret = pic_arrays_init(s, sps);
395     if (ret < 0)
396         goto fail;
397
398     s->avctx->coded_width         = sps->width;
399     s->avctx->coded_height        = sps->height;
400     s->avctx->width               = sps->output_width;
401     s->avctx->height              = sps->output_height;
402     s->avctx->pix_fmt             = sps->pix_fmt;
403     s->avctx->sample_aspect_ratio = sps->vui.sar;
404     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
405
406     if (sps->vui.video_signal_type_present_flag)
407         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
408                                                                : AVCOL_RANGE_MPEG;
409     else
410         s->avctx->color_range = AVCOL_RANGE_MPEG;
411
412     if (sps->vui.colour_description_present_flag) {
413         s->avctx->color_primaries = sps->vui.colour_primaries;
414         s->avctx->color_trc       = sps->vui.transfer_characteristic;
415         s->avctx->colorspace      = sps->vui.matrix_coeffs;
416     } else {
417         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
418         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
419         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
420     }
421
422     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
423     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
424     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
425
426     if (sps->sao_enabled) {
427         av_frame_unref(s->tmp_frame);
428         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
429         if (ret < 0)
430             goto fail;
431         s->frame = s->tmp_frame;
432     }
433
434     s->sps = sps;
435     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
436
437     if (s->vps->vps_timing_info_present_flag) {
438         num = s->vps->vps_num_units_in_tick;
439         den = s->vps->vps_time_scale;
440     } else if (sps->vui.vui_timing_info_present_flag) {
441         num = sps->vui.vui_num_units_in_tick;
442         den = sps->vui.vui_time_scale;
443     }
444
445     if (num != 0 && den != 0)
446         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
447                   num, den, 1 << 30);
448
449     return 0;
450
451 fail:
452     pic_arrays_free(s);
453     s->sps = NULL;
454     return ret;
455 }
456
457 static int hls_slice_header(HEVCContext *s)
458 {
459     GetBitContext *gb = &s->HEVClc.gb;
460     SliceHeader *sh   = &s->sh;
461     int i, ret;
462
463     // Coded parameters
464     sh->first_slice_in_pic_flag = get_bits1(gb);
465     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
466         s->seq_decode = (s->seq_decode + 1) & 0xff;
467         s->max_ra     = INT_MAX;
468         if (IS_IDR(s))
469             ff_hevc_clear_refs(s);
470     }
471     if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
472         sh->no_output_of_prior_pics_flag = get_bits1(gb);
473
474     sh->pps_id = get_ue_golomb_long(gb);
475     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
476         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
477         return AVERROR_INVALIDDATA;
478     }
479     if (!sh->first_slice_in_pic_flag &&
480         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
481         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
482         return AVERROR_INVALIDDATA;
483     }
484     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
485
486     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
487         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
488
489         ff_hevc_clear_refs(s);
490         ret = set_sps(s, s->sps);
491         if (ret < 0)
492             return ret;
493
494         s->seq_decode = (s->seq_decode + 1) & 0xff;
495         s->max_ra     = INT_MAX;
496     }
497
498     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
499     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
500
501     sh->dependent_slice_segment_flag = 0;
502     if (!sh->first_slice_in_pic_flag) {
503         int slice_address_length;
504
505         if (s->pps->dependent_slice_segments_enabled_flag)
506             sh->dependent_slice_segment_flag = get_bits1(gb);
507
508         slice_address_length = av_ceil_log2(s->sps->ctb_width *
509                                             s->sps->ctb_height);
510         sh->slice_segment_addr = get_bits(gb, slice_address_length);
511         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
512             av_log(s->avctx, AV_LOG_ERROR,
513                    "Invalid slice segment address: %u.\n",
514                    sh->slice_segment_addr);
515             return AVERROR_INVALIDDATA;
516         }
517
518         if (!sh->dependent_slice_segment_flag) {
519             sh->slice_addr = sh->slice_segment_addr;
520             s->slice_idx++;
521         }
522     } else {
523         sh->slice_segment_addr = sh->slice_addr = 0;
524         s->slice_idx           = 0;
525         s->slice_initialized   = 0;
526     }
527
528     if (!sh->dependent_slice_segment_flag) {
529         s->slice_initialized = 0;
530
531         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
532             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
533
534         sh->slice_type = get_ue_golomb_long(gb);
535         if (!(sh->slice_type == I_SLICE ||
536               sh->slice_type == P_SLICE ||
537               sh->slice_type == B_SLICE)) {
538             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
539                    sh->slice_type);
540             return AVERROR_INVALIDDATA;
541         }
542         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
543             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
544             return AVERROR_INVALIDDATA;
545         }
546
547         if (s->pps->output_flag_present_flag)
548             sh->pic_output_flag = get_bits1(gb);
549
550         if (s->sps->separate_colour_plane_flag)
551             sh->colour_plane_id = get_bits(gb, 2);
552
553         if (!IS_IDR(s)) {
554             int short_term_ref_pic_set_sps_flag, poc;
555
556             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
557             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
558             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
559                 av_log(s->avctx, AV_LOG_WARNING,
560                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
561                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
562                     return AVERROR_INVALIDDATA;
563                 poc = s->poc;
564             }
565             s->poc = poc;
566
567             short_term_ref_pic_set_sps_flag = get_bits1(gb);
568             if (!short_term_ref_pic_set_sps_flag) {
569                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
570                 if (ret < 0)
571                     return ret;
572
573                 sh->short_term_rps = &sh->slice_rps;
574             } else {
575                 int numbits, rps_idx;
576
577                 if (!s->sps->nb_st_rps) {
578                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
579                     return AVERROR_INVALIDDATA;
580                 }
581
582                 numbits = av_ceil_log2(s->sps->nb_st_rps);
583                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
584                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
585             }
586
587             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
588             if (ret < 0) {
589                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
590                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
591                     return AVERROR_INVALIDDATA;
592             }
593
594             if (s->sps->sps_temporal_mvp_enabled_flag)
595                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
596             else
597                 sh->slice_temporal_mvp_enabled_flag = 0;
598         } else {
599             s->sh.short_term_rps = NULL;
600             s->poc               = 0;
601         }
602
603         /* 8.3.1 */
604         if (s->temporal_id == 0 &&
605             s->nal_unit_type != NAL_TRAIL_N &&
606             s->nal_unit_type != NAL_TSA_N   &&
607             s->nal_unit_type != NAL_STSA_N  &&
608             s->nal_unit_type != NAL_RADL_N  &&
609             s->nal_unit_type != NAL_RADL_R  &&
610             s->nal_unit_type != NAL_RASL_N  &&
611             s->nal_unit_type != NAL_RASL_R)
612             s->pocTid0 = s->poc;
613
614         if (s->sps->sao_enabled) {
615             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
616             sh->slice_sample_adaptive_offset_flag[1] =
617             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
618         } else {
619             sh->slice_sample_adaptive_offset_flag[0] = 0;
620             sh->slice_sample_adaptive_offset_flag[1] = 0;
621             sh->slice_sample_adaptive_offset_flag[2] = 0;
622         }
623
624         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
625         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
626             int nb_refs;
627
628             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
629             if (sh->slice_type == B_SLICE)
630                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
631
632             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
633                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
634                 if (sh->slice_type == B_SLICE)
635                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
636             }
637             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
638                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
639                        sh->nb_refs[L0], sh->nb_refs[L1]);
640                 return AVERROR_INVALIDDATA;
641             }
642
643             sh->rpl_modification_flag[0] = 0;
644             sh->rpl_modification_flag[1] = 0;
645             nb_refs = ff_hevc_frame_nb_refs(s);
646             if (!nb_refs) {
647                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
648                 return AVERROR_INVALIDDATA;
649             }
650
651             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
652                 sh->rpl_modification_flag[0] = get_bits1(gb);
653                 if (sh->rpl_modification_flag[0]) {
654                     for (i = 0; i < sh->nb_refs[L0]; i++)
655                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
656                 }
657
658                 if (sh->slice_type == B_SLICE) {
659                     sh->rpl_modification_flag[1] = get_bits1(gb);
660                     if (sh->rpl_modification_flag[1] == 1)
661                         for (i = 0; i < sh->nb_refs[L1]; i++)
662                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
663                 }
664             }
665
666             if (sh->slice_type == B_SLICE)
667                 sh->mvd_l1_zero_flag = get_bits1(gb);
668
669             if (s->pps->cabac_init_present_flag)
670                 sh->cabac_init_flag = get_bits1(gb);
671             else
672                 sh->cabac_init_flag = 0;
673
674             sh->collocated_ref_idx = 0;
675             if (sh->slice_temporal_mvp_enabled_flag) {
676                 sh->collocated_list = L0;
677                 if (sh->slice_type == B_SLICE)
678                     sh->collocated_list = !get_bits1(gb);
679
680                 if (sh->nb_refs[sh->collocated_list] > 1) {
681                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
682                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
683                         av_log(s->avctx, AV_LOG_ERROR,
684                                "Invalid collocated_ref_idx: %d.\n",
685                                sh->collocated_ref_idx);
686                         return AVERROR_INVALIDDATA;
687                     }
688                 }
689             }
690
691             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
692                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
693                 pred_weight_table(s, gb);
694             }
695
696             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
697             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
698                 av_log(s->avctx, AV_LOG_ERROR,
699                        "Invalid number of merging MVP candidates: %d.\n",
700                        sh->max_num_merge_cand);
701                 return AVERROR_INVALIDDATA;
702             }
703         }
704
705         sh->slice_qp_delta = get_se_golomb(gb);
706
707         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
708             sh->slice_cb_qp_offset = get_se_golomb(gb);
709             sh->slice_cr_qp_offset = get_se_golomb(gb);
710         } else {
711             sh->slice_cb_qp_offset = 0;
712             sh->slice_cr_qp_offset = 0;
713         }
714
715         if (s->pps->deblocking_filter_control_present_flag) {
716             int deblocking_filter_override_flag = 0;
717
718             if (s->pps->deblocking_filter_override_enabled_flag)
719                 deblocking_filter_override_flag = get_bits1(gb);
720
721             if (deblocking_filter_override_flag) {
722                 sh->disable_deblocking_filter_flag = get_bits1(gb);
723                 if (!sh->disable_deblocking_filter_flag) {
724                     sh->beta_offset = get_se_golomb(gb) * 2;
725                     sh->tc_offset   = get_se_golomb(gb) * 2;
726                 }
727             } else {
728                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
729                 sh->beta_offset                    = s->pps->beta_offset;
730                 sh->tc_offset                      = s->pps->tc_offset;
731             }
732         } else {
733             sh->disable_deblocking_filter_flag = 0;
734             sh->beta_offset                    = 0;
735             sh->tc_offset                      = 0;
736         }
737
738         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
739             (sh->slice_sample_adaptive_offset_flag[0] ||
740              sh->slice_sample_adaptive_offset_flag[1] ||
741              !sh->disable_deblocking_filter_flag)) {
742             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
743         } else {
744             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
745         }
746     } else if (!s->slice_initialized) {
747         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
748         return AVERROR_INVALIDDATA;
749     }
750
751     sh->num_entry_point_offsets = 0;
752     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
753         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
754         if (sh->num_entry_point_offsets > 0) {
755             int offset_len = get_ue_golomb_long(gb) + 1;
756
757             for (i = 0; i < sh->num_entry_point_offsets; i++)
758                 skip_bits(gb, offset_len);
759         }
760     }
761
762     if (s->pps->slice_header_extension_present_flag) {
763         unsigned int length = get_ue_golomb_long(gb);
764         for (i = 0; i < length; i++)
765             skip_bits(gb, 8);  // slice_header_extension_data_byte
766     }
767
768     // Inferred parameters
769     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
770     if (sh->slice_qp > 51 ||
771         sh->slice_qp < -s->sps->qp_bd_offset) {
772         av_log(s->avctx, AV_LOG_ERROR,
773                "The slice_qp %d is outside the valid range "
774                "[%d, 51].\n",
775                sh->slice_qp,
776                -s->sps->qp_bd_offset);
777         return AVERROR_INVALIDDATA;
778     }
779
780     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
781
782     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
783
784     if (!s->pps->cu_qp_delta_enabled_flag)
785         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
786                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
787
788     s->slice_initialized = 1;
789
790     return 0;
791 }
792
793 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
794
795 #define SET_SAO(elem, value)                            \
796 do {                                                    \
797     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
798         sao->elem = value;                              \
799     else if (sao_merge_left_flag)                       \
800         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
801     else if (sao_merge_up_flag)                         \
802         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
803     else                                                \
804         sao->elem = 0;                                  \
805 } while (0)
806
807 static void hls_sao_param(HEVCContext *s, int rx, int ry)
808 {
809     HEVCLocalContext *lc    = &s->HEVClc;
810     int sao_merge_left_flag = 0;
811     int sao_merge_up_flag   = 0;
812     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
813     SAOParams *sao          = &CTB(s->sao, rx, ry);
814     int c_idx, i;
815
816     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
817         s->sh.slice_sample_adaptive_offset_flag[1]) {
818         if (rx > 0) {
819             if (lc->ctb_left_flag)
820                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
821         }
822         if (ry > 0 && !sao_merge_left_flag) {
823             if (lc->ctb_up_flag)
824                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
825         }
826     }
827
828     for (c_idx = 0; c_idx < 3; c_idx++) {
829         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
830             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
831             continue;
832         }
833
834         if (c_idx == 2) {
835             sao->type_idx[2] = sao->type_idx[1];
836             sao->eo_class[2] = sao->eo_class[1];
837         } else {
838             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
839         }
840
841         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
842             continue;
843
844         for (i = 0; i < 4; i++)
845             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
846
847         if (sao->type_idx[c_idx] == SAO_BAND) {
848             for (i = 0; i < 4; i++) {
849                 if (sao->offset_abs[c_idx][i]) {
850                     SET_SAO(offset_sign[c_idx][i],
851                             ff_hevc_sao_offset_sign_decode(s));
852                 } else {
853                     sao->offset_sign[c_idx][i] = 0;
854                 }
855             }
856             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
857         } else if (c_idx != 2) {
858             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
859         }
860
861         // Inferred parameters
862         sao->offset_val[c_idx][0] = 0;
863         for (i = 0; i < 4; i++) {
864             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
865             if (sao->type_idx[c_idx] == SAO_EDGE) {
866                 if (i > 1)
867                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
868             } else if (sao->offset_sign[c_idx][i]) {
869                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
870             }
871         }
872     }
873 }
874
875 #undef SET_SAO
876 #undef CTB
877
878 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
879                                 int log2_trafo_size, enum ScanType scan_idx,
880                                 int c_idx)
881 {
882 #define GET_COORD(offset, n)                                    \
883     do {                                                        \
884         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
885         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
886     } while (0)
887     HEVCLocalContext *lc    = &s->HEVClc;
888     int transform_skip_flag = 0;
889
890     int last_significant_coeff_x, last_significant_coeff_y;
891     int last_scan_pos;
892     int n_end;
893     int num_coeff    = 0;
894     int greater1_ctx = 1;
895
896     int num_last_subset;
897     int x_cg_last_sig, y_cg_last_sig;
898
899     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
900
901     ptrdiff_t stride = s->frame->linesize[c_idx];
902     int hshift       = s->sps->hshift[c_idx];
903     int vshift       = s->sps->vshift[c_idx];
904     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
905                                               ((x0 >> hshift) << s->sps->pixel_shift)];
906     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
907     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
908
909     int trafo_size = 1 << log2_trafo_size;
910     int i, qp, shift, add, scale, scale_m;
911     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
912     const uint8_t *scale_matrix;
913     uint8_t dc_scale;
914
915     // Derive QP for dequant
916     if (!lc->cu.cu_transquant_bypass_flag) {
917         static const int qp_c[] = {
918             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
919         };
920
921         static const uint8_t rem6[51 + 2 * 6 + 1] = {
922             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
923             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
924             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
925         };
926
927         static const uint8_t div6[51 + 2 * 6 + 1] = {
928             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
929             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
930             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
931         };
932         int qp_y = lc->qp_y;
933
934         if (c_idx == 0) {
935             qp = qp_y + s->sps->qp_bd_offset;
936         } else {
937             int qp_i, offset;
938
939             if (c_idx == 1)
940                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
941             else
942                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
943
944             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
945             if (qp_i < 30)
946                 qp = qp_i;
947             else if (qp_i > 43)
948                 qp = qp_i - 6;
949             else
950                 qp = qp_c[qp_i - 30];
951
952             qp += s->sps->qp_bd_offset;
953         }
954
955         shift    = s->sps->bit_depth + log2_trafo_size - 5;
956         add      = 1 << (shift - 1);
957         scale    = level_scale[rem6[qp]] << (div6[qp]);
958         scale_m  = 16; // default when no custom scaling lists.
959         dc_scale = 16;
960
961         if (s->sps->scaling_list_enable_flag) {
962             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
963                                     &s->pps->scaling_list : &s->sps->scaling_list;
964             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
965
966             if (log2_trafo_size != 5)
967                 matrix_id = 3 * matrix_id + c_idx;
968
969             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
970             if (log2_trafo_size >= 4)
971                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
972         }
973     }
974
975     if (s->pps->transform_skip_enabled_flag &&
976         !lc->cu.cu_transquant_bypass_flag   &&
977         log2_trafo_size == 2) {
978         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
979     }
980
981     last_significant_coeff_x =
982         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
983     last_significant_coeff_y =
984         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
985
986     if (last_significant_coeff_x > 3) {
987         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
988         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
989                                    (2 + (last_significant_coeff_x & 1)) +
990                                    suffix;
991     }
992
993     if (last_significant_coeff_y > 3) {
994         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
995         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
996                                    (2 + (last_significant_coeff_y & 1)) +
997                                    suffix;
998     }
999
1000     if (scan_idx == SCAN_VERT)
1001         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1002
1003     x_cg_last_sig = last_significant_coeff_x >> 2;
1004     y_cg_last_sig = last_significant_coeff_y >> 2;
1005
1006     switch (scan_idx) {
1007     case SCAN_DIAG: {
1008         int last_x_c = last_significant_coeff_x & 3;
1009         int last_y_c = last_significant_coeff_y & 3;
1010
1011         scan_x_off = ff_hevc_diag_scan4x4_x;
1012         scan_y_off = ff_hevc_diag_scan4x4_y;
1013         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1014         if (trafo_size == 4) {
1015             scan_x_cg = scan_1x1;
1016             scan_y_cg = scan_1x1;
1017         } else if (trafo_size == 8) {
1018             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1019             scan_x_cg  = diag_scan2x2_x;
1020             scan_y_cg  = diag_scan2x2_y;
1021         } else if (trafo_size == 16) {
1022             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1023             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1024             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1025         } else { // trafo_size == 32
1026             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1027             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1028             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1029         }
1030         break;
1031     }
1032     case SCAN_HORIZ:
1033         scan_x_cg  = horiz_scan2x2_x;
1034         scan_y_cg  = horiz_scan2x2_y;
1035         scan_x_off = horiz_scan4x4_x;
1036         scan_y_off = horiz_scan4x4_y;
1037         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1038         break;
1039     default: //SCAN_VERT
1040         scan_x_cg  = horiz_scan2x2_y;
1041         scan_y_cg  = horiz_scan2x2_x;
1042         scan_x_off = horiz_scan4x4_y;
1043         scan_y_off = horiz_scan4x4_x;
1044         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1045         break;
1046     }
1047     num_coeff++;
1048     num_last_subset = (num_coeff - 1) >> 4;
1049
1050     for (i = num_last_subset; i >= 0; i--) {
1051         int n, m;
1052         int x_cg, y_cg, x_c, y_c;
1053         int implicit_non_zero_coeff = 0;
1054         int64_t trans_coeff_level;
1055         int prev_sig = 0;
1056         int offset   = i << 4;
1057
1058         uint8_t significant_coeff_flag_idx[16];
1059         uint8_t nb_significant_coeff_flag = 0;
1060
1061         x_cg = scan_x_cg[i];
1062         y_cg = scan_y_cg[i];
1063
1064         if (i < num_last_subset && i > 0) {
1065             int ctx_cg = 0;
1066             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1067                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1068             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1069                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1070
1071             significant_coeff_group_flag[x_cg][y_cg] =
1072                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1073             implicit_non_zero_coeff = 1;
1074         } else {
1075             significant_coeff_group_flag[x_cg][y_cg] =
1076                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1077                  (x_cg == 0 && y_cg == 0));
1078         }
1079
1080         last_scan_pos = num_coeff - offset - 1;
1081
1082         if (i == num_last_subset) {
1083             n_end                         = last_scan_pos - 1;
1084             significant_coeff_flag_idx[0] = last_scan_pos;
1085             nb_significant_coeff_flag     = 1;
1086         } else {
1087             n_end = 15;
1088         }
1089
1090         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1091             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1092         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1093             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1094
1095         for (n = n_end; n >= 0; n--) {
1096             GET_COORD(offset, n);
1097
1098             if (significant_coeff_group_flag[x_cg][y_cg] &&
1099                 (n > 0 || implicit_non_zero_coeff == 0)) {
1100                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1101                                                           log2_trafo_size,
1102                                                           scan_idx,
1103                                                           prev_sig) == 1) {
1104                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1105                     nb_significant_coeff_flag++;
1106                     implicit_non_zero_coeff = 0;
1107                 }
1108             } else {
1109                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1110                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1111                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1112                     nb_significant_coeff_flag++;
1113                 }
1114             }
1115         }
1116
1117         n_end = nb_significant_coeff_flag;
1118
1119         if (n_end) {
1120             int first_nz_pos_in_cg = 16;
1121             int last_nz_pos_in_cg = -1;
1122             int c_rice_param = 0;
1123             int first_greater1_coeff_idx = -1;
1124             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1125             uint16_t coeff_sign_flag;
1126             int sum_abs = 0;
1127             int sign_hidden = 0;
1128
1129             // initialize first elem of coeff_bas_level_greater1_flag
1130             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1131
1132             if (!(i == num_last_subset) && greater1_ctx == 0)
1133                 ctx_set++;
1134             greater1_ctx      = 1;
1135             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1136
1137             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1138                 int n_idx = significant_coeff_flag_idx[m];
1139                 int inc   = (ctx_set << 2) + greater1_ctx;
1140                 coeff_abs_level_greater1_flag[n_idx] =
1141                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1142                 if (coeff_abs_level_greater1_flag[n_idx]) {
1143                     greater1_ctx = 0;
1144                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1145                     greater1_ctx++;
1146                 }
1147
1148                 if (coeff_abs_level_greater1_flag[n_idx] &&
1149                     first_greater1_coeff_idx == -1)
1150                     first_greater1_coeff_idx = n_idx;
1151             }
1152             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1153             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1154                                  !lc->cu.cu_transquant_bypass_flag;
1155
1156             if (first_greater1_coeff_idx != -1) {
1157                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1158             }
1159             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1160                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1161             } else {
1162                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1163             }
1164
1165             for (m = 0; m < n_end; m++) {
1166                 n = significant_coeff_flag_idx[m];
1167                 GET_COORD(offset, n);
1168                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1169                 if (trans_coeff_level == ((m < 8) ?
1170                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1171                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1172
1173                     trans_coeff_level += last_coeff_abs_level_remaining;
1174                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1175                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1176                 }
1177                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1178                     sum_abs += trans_coeff_level;
1179                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1180                         trans_coeff_level = -trans_coeff_level;
1181                 }
1182                 if (coeff_sign_flag >> 15)
1183                     trans_coeff_level = -trans_coeff_level;
1184                 coeff_sign_flag <<= 1;
1185                 if (!lc->cu.cu_transquant_bypass_flag) {
1186                     if (s->sps->scaling_list_enable_flag) {
1187                         if (y_c || x_c || log2_trafo_size < 4) {
1188                             int pos;
1189                             switch (log2_trafo_size) {
1190                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1191                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1192                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1193                             default: pos = (y_c        << 2) +  x_c;
1194                             }
1195                             scale_m = scale_matrix[pos];
1196                         } else {
1197                             scale_m = dc_scale;
1198                         }
1199                     }
1200                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1201                     if(trans_coeff_level < 0) {
1202                         if((~trans_coeff_level) & 0xFffffffffff8000)
1203                             trans_coeff_level = -32768;
1204                     } else {
1205                         if (trans_coeff_level & 0xffffffffffff8000)
1206                             trans_coeff_level = 32767;
1207                     }
1208                 }
1209                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1210             }
1211         }
1212     }
1213
1214     if (lc->cu.cu_transquant_bypass_flag) {
1215         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1216     } else {
1217         if (transform_skip_flag)
1218             s->hevcdsp.transform_skip(dst, coeffs, stride);
1219         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1220                  log2_trafo_size == 2)
1221             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1222         else
1223             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1224     }
1225 }
1226
1227 static void hls_transform_unit(HEVCContext *s, int x0, int y0,
1228                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1229                                int log2_cb_size, int log2_trafo_size,
1230                                int trafo_depth, int blk_idx)
1231 {
1232     HEVCLocalContext *lc = &s->HEVClc;
1233
1234     if (lc->cu.pred_mode == MODE_INTRA) {
1235         int trafo_size = 1 << log2_trafo_size;
1236         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1237
1238         s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
1239         if (log2_trafo_size > 2) {
1240             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1241             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1242             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
1243             s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
1244         } else if (blk_idx == 3) {
1245             trafo_size = trafo_size << s->sps->hshift[1];
1246             ff_hevc_set_neighbour_available(s, xBase, yBase,
1247                                             trafo_size, trafo_size);
1248             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
1249             s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
1250         }
1251     }
1252
1253     if (lc->tt.cbf_luma ||
1254         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1255         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1256         int scan_idx   = SCAN_DIAG;
1257         int scan_idx_c = SCAN_DIAG;
1258
1259         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1260             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1261             if (lc->tu.cu_qp_delta != 0)
1262                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1263                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1264             lc->tu.is_cu_qp_delta_coded = 1;
1265             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1266         }
1267
1268         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1269             if (lc->tu.cur_intra_pred_mode >= 6 &&
1270                 lc->tu.cur_intra_pred_mode <= 14) {
1271                 scan_idx = SCAN_VERT;
1272             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1273                        lc->tu.cur_intra_pred_mode <= 30) {
1274                 scan_idx = SCAN_HORIZ;
1275             }
1276
1277             if (lc->pu.intra_pred_mode_c >=  6 &&
1278                 lc->pu.intra_pred_mode_c <= 14) {
1279                 scan_idx_c = SCAN_VERT;
1280             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1281                        lc->pu.intra_pred_mode_c <= 30) {
1282                 scan_idx_c = SCAN_HORIZ;
1283             }
1284         }
1285
1286         if (lc->tt.cbf_luma)
1287             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1288         if (log2_trafo_size > 2) {
1289             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0))
1290                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1291             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0))
1292                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1293         } else if (blk_idx == 3) {
1294             if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase))
1295                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1296             if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase))
1297                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1298         }
1299     }
1300 }
1301
1302 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1303 {
1304     int cb_size          = 1 << log2_cb_size;
1305     int log2_min_pu_size = s->sps->log2_min_pu_size;
1306
1307     int min_pu_width     = s->sps->min_pu_width;
1308     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1309     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1310     int i, j;
1311
1312     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1313         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1314             s->is_pcm[i + j * min_pu_width] = 2;
1315 }
1316
1317 static void hls_transform_tree(HEVCContext *s, int x0, int y0,
1318                                int xBase, int yBase, int cb_xBase, int cb_yBase,
1319                                int log2_cb_size, int log2_trafo_size,
1320                                int trafo_depth, int blk_idx)
1321 {
1322     HEVCLocalContext *lc = &s->HEVClc;
1323     uint8_t split_transform_flag;
1324
1325     if (trafo_depth > 0 && log2_trafo_size == 2) {
1326         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1327             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1328         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1329             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1330     } else {
1331         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1332         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1333     }
1334
1335     if (lc->cu.intra_split_flag) {
1336         if (trafo_depth == 1)
1337             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1338     } else {
1339         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1340     }
1341
1342     lc->tt.cbf_luma = 1;
1343
1344     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1345                               lc->cu.pred_mode == MODE_INTER &&
1346                               lc->cu.part_mode != PART_2Nx2N &&
1347                               trafo_depth == 0;
1348
1349     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1350         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1351         trafo_depth     < lc->cu.max_trafo_depth       &&
1352         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1353         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1354     } else {
1355         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1356                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1357                                lc->tt.inter_split_flag;
1358     }
1359
1360     if (log2_trafo_size > 2) {
1361         if (trafo_depth == 0 ||
1362             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1363             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1364                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1365         }
1366
1367         if (trafo_depth == 0 ||
1368             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1369             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1370                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1371         }
1372     }
1373
1374     if (split_transform_flag) {
1375         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1376         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1377
1378         hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1379                            log2_trafo_size - 1, trafo_depth + 1, 0);
1380         hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1381                            log2_trafo_size - 1, trafo_depth + 1, 1);
1382         hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1383                            log2_trafo_size - 1, trafo_depth + 1, 2);
1384         hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
1385                            log2_trafo_size - 1, trafo_depth + 1, 3);
1386     } else {
1387         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1388         int log2_min_tu_size = s->sps->log2_min_tb_size;
1389         int min_tu_width     = s->sps->min_tb_width;
1390
1391         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1392             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1393             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) {
1394             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1395         }
1396
1397         hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1398                            log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
1399
1400         // TODO: store cbf_luma somewhere else
1401         if (lc->tt.cbf_luma) {
1402             int i, j;
1403             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1404                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1405                     int x_tu = (x0 + j) >> log2_min_tu_size;
1406                     int y_tu = (y0 + i) >> log2_min_tu_size;
1407                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1408                 }
1409         }
1410         if (!s->sh.disable_deblocking_filter_flag) {
1411             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1412                                                   lc->slice_or_tiles_up_boundary,
1413                                                   lc->slice_or_tiles_left_boundary);
1414             if (s->pps->transquant_bypass_enable_flag &&
1415                 lc->cu.cu_transquant_bypass_flag)
1416                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1417         }
1418     }
1419 }
1420
1421 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1422 {
1423     //TODO: non-4:2:0 support
1424     HEVCLocalContext *lc = &s->HEVClc;
1425     GetBitContext gb;
1426     int cb_size   = 1 << log2_cb_size;
1427     int stride0   = s->frame->linesize[0];
1428     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1429     int   stride1 = s->frame->linesize[1];
1430     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1431     int   stride2 = s->frame->linesize[2];
1432     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1433
1434     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1435     const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
1436     int ret;
1437
1438     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1439                                           lc->slice_or_tiles_up_boundary,
1440                                           lc->slice_or_tiles_left_boundary);
1441
1442     ret = init_get_bits(&gb, pcm, length);
1443     if (ret < 0)
1444         return ret;
1445
1446     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1447     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1448     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1449     return 0;
1450 }
1451
1452 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1453 {
1454     HEVCLocalContext *lc = &s->HEVClc;
1455     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1456     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1457
1458     if (x)
1459         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1460     if (y)
1461         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1462
1463     switch (x) {
1464     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1465     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1466     case 0: lc->pu.mvd.x = 0;                               break;
1467     }
1468
1469     switch (y) {
1470     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1471     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1472     case 0: lc->pu.mvd.y = 0;                               break;
1473     }
1474 }
1475
1476 /**
1477  * 8.5.3.2.2.1 Luma sample interpolation process
1478  *
1479  * @param s HEVC decoding context
1480  * @param dst target buffer for block data at block position
1481  * @param dststride stride of the dst buffer
1482  * @param ref reference picture buffer at origin (0, 0)
1483  * @param mv motion vector (relative to block position) to get pixel data from
1484  * @param x_off horizontal position of block from origin (0, 0)
1485  * @param y_off vertical position of block from origin (0, 0)
1486  * @param block_w width of block
1487  * @param block_h height of block
1488  */
1489 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1490                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1491                     int block_w, int block_h)
1492 {
1493     HEVCLocalContext *lc = &s->HEVClc;
1494     uint8_t *src         = ref->data[0];
1495     ptrdiff_t srcstride  = ref->linesize[0];
1496     int pic_width        = s->sps->width;
1497     int pic_height       = s->sps->height;
1498
1499     int mx         = mv->x & 3;
1500     int my         = mv->y & 3;
1501     int extra_left = ff_hevc_qpel_extra_before[mx];
1502     int extra_top  = ff_hevc_qpel_extra_before[my];
1503
1504     x_off += mv->x >> 2;
1505     y_off += mv->y >> 2;
1506     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1507
1508     if (x_off < extra_left || y_off < extra_top ||
1509         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1510         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1511         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1512         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1513         int buf_offset = extra_top *
1514                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1515
1516         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1517                                  edge_emu_stride, srcstride,
1518                                  block_w + ff_hevc_qpel_extra[mx],
1519                                  block_h + ff_hevc_qpel_extra[my],
1520                                  x_off - extra_left, y_off - extra_top,
1521                                  pic_width, pic_height);
1522         src = lc->edge_emu_buffer + buf_offset;
1523         srcstride = edge_emu_stride;
1524     }
1525     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1526                                      block_h, lc->mc_buffer);
1527 }
1528
1529 /**
1530  * 8.5.3.2.2.2 Chroma sample interpolation process
1531  *
1532  * @param s HEVC decoding context
1533  * @param dst1 target buffer for block data at block position (U plane)
1534  * @param dst2 target buffer for block data at block position (V plane)
1535  * @param dststride stride of the dst1 and dst2 buffers
1536  * @param ref reference picture buffer at origin (0, 0)
1537  * @param mv motion vector (relative to block position) to get pixel data from
1538  * @param x_off horizontal position of block from origin (0, 0)
1539  * @param y_off vertical position of block from origin (0, 0)
1540  * @param block_w width of block
1541  * @param block_h height of block
1542  */
1543 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1544                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1545                       int x_off, int y_off, int block_w, int block_h)
1546 {
1547     HEVCLocalContext *lc = &s->HEVClc;
1548     uint8_t *src1        = ref->data[1];
1549     uint8_t *src2        = ref->data[2];
1550     ptrdiff_t src1stride = ref->linesize[1];
1551     ptrdiff_t src2stride = ref->linesize[2];
1552     int pic_width        = s->sps->width >> 1;
1553     int pic_height       = s->sps->height >> 1;
1554
1555     int mx = mv->x & 7;
1556     int my = mv->y & 7;
1557
1558     x_off += mv->x >> 3;
1559     y_off += mv->y >> 3;
1560     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1561     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1562
1563     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1564         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1565         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1566         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1567         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1568         int buf_offset1 = EPEL_EXTRA_BEFORE *
1569                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1570         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1571         int buf_offset2 = EPEL_EXTRA_BEFORE *
1572                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1573
1574         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1575                                  edge_emu_stride, src1stride,
1576                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1577                                  x_off - EPEL_EXTRA_BEFORE,
1578                                  y_off - EPEL_EXTRA_BEFORE,
1579                                  pic_width, pic_height);
1580
1581         src1 = lc->edge_emu_buffer + buf_offset1;
1582         src1stride = edge_emu_stride;
1583         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1584                                              block_w, block_h, mx, my, lc->mc_buffer);
1585
1586         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1587                                  edge_emu_stride, src2stride,
1588                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1589                                  x_off - EPEL_EXTRA_BEFORE,
1590                                  y_off - EPEL_EXTRA_BEFORE,
1591                                  pic_width, pic_height);
1592         src2 = lc->edge_emu_buffer + buf_offset2;
1593         src2stride = edge_emu_stride;
1594
1595         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1596                                              block_w, block_h, mx, my,
1597                                              lc->mc_buffer);
1598     } else {
1599         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1600                                              block_w, block_h, mx, my,
1601                                              lc->mc_buffer);
1602         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1603                                              block_w, block_h, mx, my,
1604                                              lc->mc_buffer);
1605     }
1606 }
1607
1608 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1609                                 const Mv *mv, int y0, int height)
1610 {
1611     int y = (mv->y >> 2) + y0 + height + 9;
1612     ff_thread_await_progress(&ref->tf, y, 0);
1613 }
1614
1615 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1616                                 int nPbW, int nPbH,
1617                                 int log2_cb_size, int partIdx)
1618 {
1619 #define POS(c_idx, x, y)                                                              \
1620     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1621                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1622     HEVCLocalContext *lc = &s->HEVClc;
1623     int merge_idx = 0;
1624     struct MvField current_mv = {{{ 0 }}};
1625
1626     int min_pu_width = s->sps->min_pu_width;
1627
1628     MvField *tab_mvf = s->ref->tab_mvf;
1629     RefPicList  *refPicList = s->ref->refPicList;
1630     HEVCFrame *ref0, *ref1;
1631
1632     int tmpstride = MAX_PB_SIZE;
1633
1634     uint8_t *dst0 = POS(0, x0, y0);
1635     uint8_t *dst1 = POS(1, x0, y0);
1636     uint8_t *dst2 = POS(2, x0, y0);
1637     int log2_min_cb_size = s->sps->log2_min_cb_size;
1638     int min_cb_width     = s->sps->min_cb_width;
1639     int x_cb             = x0 >> log2_min_cb_size;
1640     int y_cb             = y0 >> log2_min_cb_size;
1641     int ref_idx[2];
1642     int mvp_flag[2];
1643     int x_pu, y_pu;
1644     int i, j;
1645
1646     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1647         if (s->sh.max_num_merge_cand > 1)
1648             merge_idx = ff_hevc_merge_idx_decode(s);
1649         else
1650             merge_idx = 0;
1651
1652         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1653                                    1 << log2_cb_size,
1654                                    1 << log2_cb_size,
1655                                    log2_cb_size, partIdx,
1656                                    merge_idx, &current_mv);
1657         x_pu = x0 >> s->sps->log2_min_pu_size;
1658         y_pu = y0 >> s->sps->log2_min_pu_size;
1659
1660         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1661             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1662                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1663     } else { /* MODE_INTER */
1664         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1665         if (lc->pu.merge_flag) {
1666             if (s->sh.max_num_merge_cand > 1)
1667                 merge_idx = ff_hevc_merge_idx_decode(s);
1668             else
1669                 merge_idx = 0;
1670
1671             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1672                                        partIdx, merge_idx, &current_mv);
1673             x_pu = x0 >> s->sps->log2_min_pu_size;
1674             y_pu = y0 >> s->sps->log2_min_pu_size;
1675
1676             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1677                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1678                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1679         } else {
1680             enum InterPredIdc inter_pred_idc = PRED_L0;
1681             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1682             if (s->sh.slice_type == B_SLICE)
1683                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1684
1685             if (inter_pred_idc != PRED_L1) {
1686                 if (s->sh.nb_refs[L0]) {
1687                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1688                     current_mv.ref_idx[0] = ref_idx[0];
1689                 }
1690                 current_mv.pred_flag[0] = 1;
1691                 hls_mvd_coding(s, x0, y0, 0);
1692                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1693                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1694                                          partIdx, merge_idx, &current_mv,
1695                                          mvp_flag[0], 0);
1696                 current_mv.mv[0].x += lc->pu.mvd.x;
1697                 current_mv.mv[0].y += lc->pu.mvd.y;
1698             }
1699
1700             if (inter_pred_idc != PRED_L0) {
1701                 if (s->sh.nb_refs[L1]) {
1702                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1703                     current_mv.ref_idx[1] = ref_idx[1];
1704                 }
1705
1706                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1707                     lc->pu.mvd.x = 0;
1708                     lc->pu.mvd.y = 0;
1709                 } else {
1710                     hls_mvd_coding(s, x0, y0, 1);
1711                 }
1712
1713                 current_mv.pred_flag[1] = 1;
1714                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1715                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1716                                          partIdx, merge_idx, &current_mv,
1717                                          mvp_flag[1], 1);
1718                 current_mv.mv[1].x += lc->pu.mvd.x;
1719                 current_mv.mv[1].y += lc->pu.mvd.y;
1720             }
1721
1722             x_pu = x0 >> s->sps->log2_min_pu_size;
1723             y_pu = y0 >> s->sps->log2_min_pu_size;
1724
1725             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1726                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1727                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1728         }
1729     }
1730
1731     if (current_mv.pred_flag[0]) {
1732         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1733         if (!ref0)
1734             return;
1735         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1736     }
1737     if (current_mv.pred_flag[1]) {
1738         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1739         if (!ref1)
1740             return;
1741         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1742     }
1743
1744     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1745         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1746         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1747
1748         luma_mc(s, tmp, tmpstride, ref0->frame,
1749                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1750
1751         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1752             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1753             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1754                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1755                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1756                                      dst0, s->frame->linesize[0], tmp,
1757                                      tmpstride, nPbW, nPbH);
1758         } else {
1759             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1760         }
1761         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1762                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1763
1764         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1765             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1766             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1767                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1768                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1769                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1770                                      nPbW / 2, nPbH / 2);
1771             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1772                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1773                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1774                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1775                                      nPbW / 2, nPbH / 2);
1776         } else {
1777             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1778             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1779         }
1780     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1781         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1782         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1783
1784         if (!ref1)
1785             return;
1786
1787         luma_mc(s, tmp, tmpstride, ref1->frame,
1788                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1789
1790         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1791             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1792             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1793                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1794                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1795                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1796                                       nPbW, nPbH);
1797         } else {
1798             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1799         }
1800
1801         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1802                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1803
1804         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1805             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1806             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1807                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1808                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1809                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1810             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1811                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1812                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1813                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1814         } else {
1815             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1816             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1817         }
1818     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1819         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1820         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1821         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1822         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1823         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1824         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1825
1826         if (!ref0 || !ref1)
1827             return;
1828
1829         luma_mc(s, tmp, tmpstride, ref0->frame,
1830                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1831         luma_mc(s, tmp2, tmpstride, ref1->frame,
1832                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1833
1834         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1835             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1836             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1837                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1838                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1839                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1840                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1841                                          dst0, s->frame->linesize[0],
1842                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1843         } else {
1844             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1845                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1846         }
1847
1848         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1849                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1850         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1851                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1852
1853         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1854             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1855             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1856                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1857                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1858                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1859                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1860                                          dst1, s->frame->linesize[1], tmp, tmp3,
1861                                          tmpstride, nPbW / 2, nPbH / 2);
1862             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1863                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1864                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1865                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1866                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1867                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1868                                          tmpstride, nPbW / 2, nPbH / 2);
1869         } else {
1870             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1871             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1872         }
1873     }
1874 }
1875
1876 /**
1877  * 8.4.1
1878  */
1879 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1880                                 int prev_intra_luma_pred_flag)
1881 {
1882     HEVCLocalContext *lc = &s->HEVClc;
1883     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1884     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1885     int min_pu_width     = s->sps->min_pu_width;
1886     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1887     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1888     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1889
1890     int cand_up   = (lc->ctb_up_flag || y0b) ?
1891                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1892     int cand_left = (lc->ctb_left_flag || x0b) ?
1893                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1894
1895     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1896
1897     MvField *tab_mvf = s->ref->tab_mvf;
1898     int intra_pred_mode;
1899     int candidate[3];
1900     int i, j;
1901
1902     // intra_pred_mode prediction does not cross vertical CTB boundaries
1903     if ((y0 - 1) < y_ctb)
1904         cand_up = INTRA_DC;
1905
1906     if (cand_left == cand_up) {
1907         if (cand_left < 2) {
1908             candidate[0] = INTRA_PLANAR;
1909             candidate[1] = INTRA_DC;
1910             candidate[2] = INTRA_ANGULAR_26;
1911         } else {
1912             candidate[0] = cand_left;
1913             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1914             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1915         }
1916     } else {
1917         candidate[0] = cand_left;
1918         candidate[1] = cand_up;
1919         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1920             candidate[2] = INTRA_PLANAR;
1921         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1922             candidate[2] = INTRA_DC;
1923         } else {
1924             candidate[2] = INTRA_ANGULAR_26;
1925         }
1926     }
1927
1928     if (prev_intra_luma_pred_flag) {
1929         intra_pred_mode = candidate[lc->pu.mpm_idx];
1930     } else {
1931         if (candidate[0] > candidate[1])
1932             FFSWAP(uint8_t, candidate[0], candidate[1]);
1933         if (candidate[0] > candidate[2])
1934             FFSWAP(uint8_t, candidate[0], candidate[2]);
1935         if (candidate[1] > candidate[2])
1936             FFSWAP(uint8_t, candidate[1], candidate[2]);
1937
1938         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1939         for (i = 0; i < 3; i++)
1940             if (intra_pred_mode >= candidate[i])
1941                 intra_pred_mode++;
1942     }
1943
1944     /* write the intra prediction units into the mv array */
1945     if (!size_in_pus)
1946         size_in_pus = 1;
1947     for (i = 0; i < size_in_pus; i++) {
1948         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1949                intra_pred_mode, size_in_pus);
1950
1951         for (j = 0; j < size_in_pus; j++) {
1952             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1953             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1954             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1955             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1956             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1957             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1958             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1959             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1960             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1961         }
1962     }
1963
1964     return intra_pred_mode;
1965 }
1966
1967 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1968                                           int log2_cb_size, int ct_depth)
1969 {
1970     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1971     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1972     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1973     int y;
1974
1975     for (y = 0; y < length; y++)
1976         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1977                ct_depth, length);
1978 }
1979
1980 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1981                                   int log2_cb_size)
1982 {
1983     HEVCLocalContext *lc = &s->HEVClc;
1984     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1985     uint8_t prev_intra_luma_pred_flag[4];
1986     int split   = lc->cu.part_mode == PART_NxN;
1987     int pb_size = (1 << log2_cb_size) >> split;
1988     int side    = split + 1;
1989     int chroma_mode;
1990     int i, j;
1991
1992     for (i = 0; i < side; i++)
1993         for (j = 0; j < side; j++)
1994             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1995
1996     for (i = 0; i < side; i++) {
1997         for (j = 0; j < side; j++) {
1998             if (prev_intra_luma_pred_flag[2 * i + j])
1999                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2000             else
2001                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2002
2003             lc->pu.intra_pred_mode[2 * i + j] =
2004                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2005                                      prev_intra_luma_pred_flag[2 * i + j]);
2006         }
2007     }
2008
2009     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2010     if (chroma_mode != 4) {
2011         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2012             lc->pu.intra_pred_mode_c = 34;
2013         else
2014             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2015     } else {
2016         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2017     }
2018 }
2019
2020 static void intra_prediction_unit_default_value(HEVCContext *s,
2021                                                 int x0, int y0,
2022                                                 int log2_cb_size)
2023 {
2024     HEVCLocalContext *lc = &s->HEVClc;
2025     int pb_size          = 1 << log2_cb_size;
2026     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2027     int min_pu_width     = s->sps->min_pu_width;
2028     MvField *tab_mvf     = s->ref->tab_mvf;
2029     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2030     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2031     int j, k;
2032
2033     if (size_in_pus == 0)
2034         size_in_pus = 1;
2035     for (j = 0; j < size_in_pus; j++) {
2036         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2037         for (k = 0; k < size_in_pus; k++)
2038             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2039     }
2040 }
2041
2042 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2043 {
2044     int cb_size          = 1 << log2_cb_size;
2045     HEVCLocalContext *lc = &s->HEVClc;
2046     int log2_min_cb_size = s->sps->log2_min_cb_size;
2047     int length           = cb_size >> log2_min_cb_size;
2048     int min_cb_width     = s->sps->min_cb_width;
2049     int x_cb             = x0 >> log2_min_cb_size;
2050     int y_cb             = y0 >> log2_min_cb_size;
2051     int x, y;
2052
2053     lc->cu.x                = x0;
2054     lc->cu.y                = y0;
2055     lc->cu.rqt_root_cbf     = 1;
2056     lc->cu.pred_mode        = MODE_INTRA;
2057     lc->cu.part_mode        = PART_2Nx2N;
2058     lc->cu.intra_split_flag = 0;
2059     lc->cu.pcm_flag         = 0;
2060
2061     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2062     for (x = 0; x < 4; x++)
2063         lc->pu.intra_pred_mode[x] = 1;
2064     if (s->pps->transquant_bypass_enable_flag) {
2065         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2066         if (lc->cu.cu_transquant_bypass_flag)
2067             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2068     } else
2069         lc->cu.cu_transquant_bypass_flag = 0;
2070
2071     if (s->sh.slice_type != I_SLICE) {
2072         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2073
2074         lc->cu.pred_mode = MODE_SKIP;
2075         x = y_cb * min_cb_width + x_cb;
2076         for (y = 0; y < length; y++) {
2077             memset(&s->skip_flag[x], skip_flag, length);
2078             x += min_cb_width;
2079         }
2080         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2081     }
2082
2083     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2084         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2085         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2086
2087         if (!s->sh.disable_deblocking_filter_flag)
2088             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2089                                                   lc->slice_or_tiles_up_boundary,
2090                                                   lc->slice_or_tiles_left_boundary);
2091     } else {
2092         if (s->sh.slice_type != I_SLICE)
2093             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2094         if (lc->cu.pred_mode != MODE_INTRA ||
2095             log2_cb_size == s->sps->log2_min_cb_size) {
2096             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2097             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2098                                       lc->cu.pred_mode == MODE_INTRA;
2099         }
2100
2101         if (lc->cu.pred_mode == MODE_INTRA) {
2102             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2103                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2104                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2105                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2106             }
2107             if (lc->cu.pcm_flag) {
2108                 int ret;
2109                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2110                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2111                 if (s->sps->pcm.loop_filter_disable_flag)
2112                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2113
2114                 if (ret < 0)
2115                     return ret;
2116             } else {
2117                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2118             }
2119         } else {
2120             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2121             switch (lc->cu.part_mode) {
2122             case PART_2Nx2N:
2123                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2124                 break;
2125             case PART_2NxN:
2126                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2127                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2128                 break;
2129             case PART_Nx2N:
2130                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2131                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2132                 break;
2133             case PART_2NxnU:
2134                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2135                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2136                 break;
2137             case PART_2NxnD:
2138                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2139                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2140                 break;
2141             case PART_nLx2N:
2142                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2143                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2144                 break;
2145             case PART_nRx2N:
2146                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2147                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2148                 break;
2149             case PART_NxN:
2150                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2151                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2152                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2153                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2154                 break;
2155             }
2156         }
2157
2158         if (!lc->cu.pcm_flag) {
2159             if (lc->cu.pred_mode != MODE_INTRA &&
2160                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2161                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2162             }
2163             if (lc->cu.rqt_root_cbf) {
2164                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2165                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2166                                          s->sps->max_transform_hierarchy_depth_inter;
2167                 hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
2168                                    log2_cb_size, 0, 0);
2169             } else {
2170                 if (!s->sh.disable_deblocking_filter_flag)
2171                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2172                                                           lc->slice_or_tiles_up_boundary,
2173                                                           lc->slice_or_tiles_left_boundary);
2174             }
2175         }
2176     }
2177
2178     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2179         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2180
2181     x = y_cb * min_cb_width + x_cb;
2182     for (y = 0; y < length; y++) {
2183         memset(&s->qp_y_tab[x], lc->qp_y, length);
2184         x += min_cb_width;
2185     }
2186
2187     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2188
2189     return 0;
2190 }
2191
2192 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2193                                int log2_cb_size, int cb_depth)
2194 {
2195     HEVCLocalContext *lc = &s->HEVClc;
2196     const int cb_size    = 1 << log2_cb_size;
2197
2198     lc->ct.depth = cb_depth;
2199     if (x0 + cb_size <= s->sps->width  &&
2200         y0 + cb_size <= s->sps->height &&
2201         log2_cb_size > s->sps->log2_min_cb_size) {
2202         SAMPLE(s->split_cu_flag, x0, y0) =
2203             ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2204     } else {
2205         SAMPLE(s->split_cu_flag, x0, y0) =
2206             (log2_cb_size > s->sps->log2_min_cb_size);
2207     }
2208     if (s->pps->cu_qp_delta_enabled_flag &&
2209         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2210         lc->tu.is_cu_qp_delta_coded = 0;
2211         lc->tu.cu_qp_delta          = 0;
2212     }
2213
2214     if (SAMPLE(s->split_cu_flag, x0, y0)) {
2215         const int cb_size_split = cb_size >> 1;
2216         const int x1 = x0 + cb_size_split;
2217         const int y1 = y0 + cb_size_split;
2218
2219         log2_cb_size--;
2220         cb_depth++;
2221
2222 #define SUBDIVIDE(x, y)                                                \
2223 do {                                                                   \
2224     if (x < s->sps->width && y < s->sps->height) {                     \
2225         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2226         if (ret < 0)                                                   \
2227             return ret;                                                \
2228     }                                                                  \
2229 } while (0)
2230
2231         SUBDIVIDE(x0, y0);
2232         SUBDIVIDE(x1, y0);
2233         SUBDIVIDE(x0, y1);
2234         SUBDIVIDE(x1, y1);
2235     } else {
2236         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2237         if (ret < 0)
2238             return ret;
2239     }
2240
2241     return 0;
2242 }
2243
2244 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2245                                  int ctb_addr_ts)
2246 {
2247     HEVCLocalContext *lc  = &s->HEVClc;
2248     int ctb_size          = 1 << s->sps->log2_ctb_size;
2249     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2250     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2251
2252     int tile_left_boundary, tile_up_boundary;
2253     int slice_left_boundary, slice_up_boundary;
2254
2255     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2256
2257     if (s->pps->entropy_coding_sync_enabled_flag) {
2258         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2259             lc->first_qp_group = 1;
2260         lc->end_of_tiles_x = s->sps->width;
2261     } else if (s->pps->tiles_enabled_flag) {
2262         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2263             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2264             lc->start_of_tiles_x = x_ctb;
2265             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2266             lc->first_qp_group   = 1;
2267         }
2268     } else {
2269         lc->end_of_tiles_x = s->sps->width;
2270     }
2271
2272     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2273
2274     if (s->pps->tiles_enabled_flag) {
2275         tile_left_boundary  = x_ctb > 0 &&
2276                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2277         slice_left_boundary = x_ctb > 0 &&
2278                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2279         tile_up_boundary  = y_ctb > 0 &&
2280                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2281         slice_up_boundary = y_ctb > 0 &&
2282                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2283     } else {
2284         tile_left_boundary  =
2285         tile_up_boundary    = 1;
2286         slice_left_boundary = ctb_addr_in_slice > 0;
2287         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2288     }
2289     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2290     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2291     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2292     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2293     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2294     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2295 }
2296
2297 static int hls_slice_data(HEVCContext *s)
2298 {
2299     int ctb_size    = 1 << s->sps->log2_ctb_size;
2300     int more_data   = 1;
2301     int x_ctb       = 0;
2302     int y_ctb       = 0;
2303     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2304     int ret;
2305
2306     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2307         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2308
2309         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2310         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2311         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2312
2313         ff_hevc_cabac_init(s, ctb_addr_ts);
2314
2315         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2316
2317         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2318         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2319         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2320
2321         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2322         if (ret < 0)
2323             return ret;
2324         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2325
2326         ctb_addr_ts++;
2327         ff_hevc_save_states(s, ctb_addr_ts);
2328         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2329     }
2330
2331     if (x_ctb + ctb_size >= s->sps->width &&
2332         y_ctb + ctb_size >= s->sps->height)
2333         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2334
2335     return ctb_addr_ts;
2336 }
2337
2338 /**
2339  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2340  * 0 if the unit should be skipped, 1 otherwise
2341  */
2342 static int hls_nal_unit(HEVCContext *s)
2343 {
2344     GetBitContext *gb = &s->HEVClc.gb;
2345     int nuh_layer_id;
2346
2347     if (get_bits1(gb) != 0)
2348         return AVERROR_INVALIDDATA;
2349
2350     s->nal_unit_type = get_bits(gb, 6);
2351
2352     nuh_layer_id   = get_bits(gb, 6);
2353     s->temporal_id = get_bits(gb, 3) - 1;
2354     if (s->temporal_id < 0)
2355         return AVERROR_INVALIDDATA;
2356
2357     av_log(s->avctx, AV_LOG_DEBUG,
2358            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2359            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2360
2361     return nuh_layer_id == 0;
2362 }
2363
2364 static void restore_tqb_pixels(HEVCContext *s)
2365 {
2366     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2367     int x, y, c_idx;
2368
2369     for (c_idx = 0; c_idx < 3; c_idx++) {
2370         ptrdiff_t stride = s->frame->linesize[c_idx];
2371         int hshift       = s->sps->hshift[c_idx];
2372         int vshift       = s->sps->vshift[c_idx];
2373         for (y = 0; y < s->sps->min_pu_height; y++) {
2374             for (x = 0; x < s->sps->min_pu_width; x++) {
2375                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2376                     int n;
2377                     int len      = min_pu_size >> hshift;
2378                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2379                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2380                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2381                         memcpy(dst, src, len);
2382                         src += stride;
2383                         dst += stride;
2384                     }
2385                 }
2386             }
2387         }
2388     }
2389 }
2390
2391 static int set_side_data(HEVCContext *s)
2392 {
2393     AVFrame *out = s->ref->frame;
2394
2395     if (s->sei_frame_packing_present &&
2396         s->frame_packing_arrangement_type >= 3 &&
2397         s->frame_packing_arrangement_type <= 5 &&
2398         s->content_interpretation_type > 0 &&
2399         s->content_interpretation_type < 3) {
2400         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2401         if (!stereo)
2402             return AVERROR(ENOMEM);
2403
2404         switch (s->frame_packing_arrangement_type) {
2405         case 3:
2406             if (s->quincunx_subsampling)
2407                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2408             else
2409                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2410             break;
2411         case 4:
2412             stereo->type = AV_STEREO3D_TOPBOTTOM;
2413             break;
2414         case 5:
2415             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2416             break;
2417         }
2418
2419         if (s->content_interpretation_type == 2)
2420             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2421     }
2422
2423     return 0;
2424 }
2425
2426 static int hevc_frame_start(HEVCContext *s)
2427 {
2428     HEVCLocalContext *lc = &s->HEVClc;
2429     int ret;
2430
2431     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2432     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2433     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2434     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2435
2436     lc->start_of_tiles_x = 0;
2437     s->is_decoded        = 0;
2438
2439     if (s->pps->tiles_enabled_flag)
2440         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2441
2442     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2443                               s->poc);
2444     if (ret < 0)
2445         goto fail;
2446
2447     ret = ff_hevc_frame_rps(s);
2448     if (ret < 0) {
2449         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2450         goto fail;
2451     }
2452
2453     ret = set_side_data(s);
2454     if (ret < 0)
2455         goto fail;
2456
2457     av_frame_unref(s->output_frame);
2458     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2459     if (ret < 0)
2460         goto fail;
2461
2462     ff_thread_finish_setup(s->avctx);
2463
2464     return 0;
2465
2466 fail:
2467     if (s->ref)
2468         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2469     s->ref = NULL;
2470     return ret;
2471 }
2472
2473 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2474 {
2475     HEVCLocalContext *lc = &s->HEVClc;
2476     GetBitContext *gb    = &lc->gb;
2477     int ctb_addr_ts, ret;
2478
2479     ret = init_get_bits8(gb, nal, length);
2480     if (ret < 0)
2481         return ret;
2482
2483     ret = hls_nal_unit(s);
2484     if (ret < 0) {
2485         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2486                s->nal_unit_type);
2487         if (s->avctx->err_recognition & AV_EF_EXPLODE)
2488             return ret;
2489         return 0;
2490     } else if (!ret)
2491         return 0;
2492
2493     switch (s->nal_unit_type) {
2494     case NAL_VPS:
2495         ret = ff_hevc_decode_nal_vps(s);
2496         if (ret < 0)
2497             return ret;
2498         break;
2499     case NAL_SPS:
2500         ret = ff_hevc_decode_nal_sps(s);
2501         if (ret < 0)
2502             return ret;
2503         break;
2504     case NAL_PPS:
2505         ret = ff_hevc_decode_nal_pps(s);
2506         if (ret < 0)
2507             return ret;
2508         break;
2509     case NAL_SEI_PREFIX:
2510     case NAL_SEI_SUFFIX:
2511         ret = ff_hevc_decode_nal_sei(s);
2512         if (ret < 0)
2513             return ret;
2514         break;
2515     case NAL_TRAIL_R:
2516     case NAL_TRAIL_N:
2517     case NAL_TSA_N:
2518     case NAL_TSA_R:
2519     case NAL_STSA_N:
2520     case NAL_STSA_R:
2521     case NAL_BLA_W_LP:
2522     case NAL_BLA_W_RADL:
2523     case NAL_BLA_N_LP:
2524     case NAL_IDR_W_RADL:
2525     case NAL_IDR_N_LP:
2526     case NAL_CRA_NUT:
2527     case NAL_RADL_N:
2528     case NAL_RADL_R:
2529     case NAL_RASL_N:
2530     case NAL_RASL_R:
2531         ret = hls_slice_header(s);
2532         if (ret < 0)
2533             return ret;
2534
2535         if (s->max_ra == INT_MAX) {
2536             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2537                 s->max_ra = s->poc;
2538             } else {
2539                 if (IS_IDR(s))
2540                     s->max_ra = INT_MIN;
2541             }
2542         }
2543
2544         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2545             s->poc <= s->max_ra) {
2546             s->is_decoded = 0;
2547             break;
2548         } else {
2549             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2550                 s->max_ra = INT_MIN;
2551         }
2552
2553         if (s->sh.first_slice_in_pic_flag) {
2554             ret = hevc_frame_start(s);
2555             if (ret < 0)
2556                 return ret;
2557         } else if (!s->ref) {
2558             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2559             return AVERROR_INVALIDDATA;
2560         }
2561
2562         if (!s->sh.dependent_slice_segment_flag &&
2563             s->sh.slice_type != I_SLICE) {
2564             ret = ff_hevc_slice_rpl(s);
2565             if (ret < 0) {
2566                 av_log(s->avctx, AV_LOG_WARNING,
2567                        "Error constructing the reference lists for the current slice.\n");
2568                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2569                     return ret;
2570             }
2571         }
2572
2573         ctb_addr_ts = hls_slice_data(s);
2574         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2575             s->is_decoded = 1;
2576             if ((s->pps->transquant_bypass_enable_flag ||
2577                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2578                 s->sps->sao_enabled)
2579                 restore_tqb_pixels(s);
2580         }
2581
2582         if (ctb_addr_ts < 0)
2583             return ctb_addr_ts;
2584         break;
2585     case NAL_EOS_NUT:
2586     case NAL_EOB_NUT:
2587         s->seq_decode = (s->seq_decode + 1) & 0xff;
2588         s->max_ra     = INT_MAX;
2589         break;
2590     case NAL_AUD:
2591     case NAL_FD_NUT:
2592         break;
2593     default:
2594         av_log(s->avctx, AV_LOG_INFO,
2595                "Skipping NAL unit %d\n", s->nal_unit_type);
2596     }
2597
2598     return 0;
2599 }
2600
2601 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2602  * between these functions would be nice. */
2603 static int extract_rbsp(const uint8_t *src, int length,
2604                         HEVCNAL *nal)
2605 {
2606     int i, si, di;
2607     uint8_t *dst;
2608
2609 #define STARTCODE_TEST                                                  \
2610         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2611             if (src[i + 2] != 3) {                                      \
2612                 /* startcode, so we must be past the end */             \
2613                 length = i;                                             \
2614             }                                                           \
2615             break;                                                      \
2616         }
2617 #if HAVE_FAST_UNALIGNED
2618 #define FIND_FIRST_ZERO                                                 \
2619         if (i > 0 && !src[i])                                           \
2620             i--;                                                        \
2621         while (src[i])                                                  \
2622             i++
2623 #if HAVE_FAST_64BIT
2624     for (i = 0; i + 1 < length; i += 9) {
2625         if (!((~AV_RN64A(src + i) &
2626                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2627               0x8000800080008080ULL))
2628             continue;
2629         FIND_FIRST_ZERO;
2630         STARTCODE_TEST;
2631         i -= 7;
2632     }
2633 #else
2634     for (i = 0; i + 1 < length; i += 5) {
2635         if (!((~AV_RN32A(src + i) &
2636                (AV_RN32A(src + i) - 0x01000101U)) &
2637               0x80008080U))
2638             continue;
2639         FIND_FIRST_ZERO;
2640         STARTCODE_TEST;
2641         i -= 3;
2642     }
2643 #endif /* HAVE_FAST_64BIT */
2644 #else
2645     for (i = 0; i + 1 < length; i += 2) {
2646         if (src[i])
2647             continue;
2648         if (i > 0 && src[i - 1] == 0)
2649             i--;
2650         STARTCODE_TEST;
2651     }
2652 #endif /* HAVE_FAST_UNALIGNED */
2653
2654     if (i >= length - 1) { // no escaped 0
2655         nal->data = src;
2656         nal->size = length;
2657         return length;
2658     }
2659
2660     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2661                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2662     if (!nal->rbsp_buffer)
2663         return AVERROR(ENOMEM);
2664
2665     dst = nal->rbsp_buffer;
2666
2667     memcpy(dst, src, i);
2668     si = di = i;
2669     while (si + 2 < length) {
2670         // remove escapes (very rare 1:2^22)
2671         if (src[si + 2] > 3) {
2672             dst[di++] = src[si++];
2673             dst[di++] = src[si++];
2674         } else if (src[si] == 0 && src[si + 1] == 0) {
2675             if (src[si + 2] == 3) { // escape
2676                 dst[di++] = 0;
2677                 dst[di++] = 0;
2678                 si       += 3;
2679
2680                 continue;
2681             } else // next start code
2682                 goto nsc;
2683         }
2684
2685         dst[di++] = src[si++];
2686     }
2687     while (si < length)
2688         dst[di++] = src[si++];
2689
2690 nsc:
2691     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2692
2693     nal->data = dst;
2694     nal->size = di;
2695     return si;
2696 }
2697
2698 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2699 {
2700     int i, consumed, ret = 0;
2701
2702     s->ref = NULL;
2703     s->eos = 0;
2704
2705     /* split the input packet into NAL units, so we know the upper bound on the
2706      * number of slices in the frame */
2707     s->nb_nals = 0;
2708     while (length >= 4) {
2709         HEVCNAL *nal;
2710         int extract_length = 0;
2711
2712         if (s->is_nalff) {
2713             int i;
2714             for (i = 0; i < s->nal_length_size; i++)
2715                 extract_length = (extract_length << 8) | buf[i];
2716             buf    += s->nal_length_size;
2717             length -= s->nal_length_size;
2718
2719             if (extract_length > length) {
2720                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2721                 ret = AVERROR_INVALIDDATA;
2722                 goto fail;
2723             }
2724         } else {
2725             if (buf[2] == 0) {
2726                 length--;
2727                 buf++;
2728                 continue;
2729             }
2730             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2731                 ret = AVERROR_INVALIDDATA;
2732                 goto fail;
2733             }
2734
2735             buf           += 3;
2736             length        -= 3;
2737             extract_length = length;
2738         }
2739
2740         if (s->nals_allocated < s->nb_nals + 1) {
2741             int new_size = s->nals_allocated + 1;
2742             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2743             if (!tmp) {
2744                 ret = AVERROR(ENOMEM);
2745                 goto fail;
2746             }
2747             s->nals = tmp;
2748             memset(s->nals + s->nals_allocated, 0,
2749                    (new_size - s->nals_allocated) * sizeof(*tmp));
2750             s->nals_allocated = new_size;
2751         }
2752         nal = &s->nals[s->nb_nals++];
2753
2754         consumed = extract_rbsp(buf, extract_length, nal);
2755         if (consumed < 0) {
2756             ret = consumed;
2757             goto fail;
2758         }
2759
2760         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2761         if (ret < 0)
2762             goto fail;
2763         hls_nal_unit(s);
2764
2765         if (s->nal_unit_type == NAL_EOB_NUT ||
2766             s->nal_unit_type == NAL_EOS_NUT)
2767             s->eos = 1;
2768
2769         buf    += consumed;
2770         length -= consumed;
2771     }
2772
2773     /* parse the NAL units */
2774     for (i = 0; i < s->nb_nals; i++) {
2775         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2776         if (ret < 0) {
2777             av_log(s->avctx, AV_LOG_WARNING,
2778                    "Error parsing NAL unit #%d.\n", i);
2779             if (s->avctx->err_recognition & AV_EF_EXPLODE)
2780                 goto fail;
2781         }
2782     }
2783
2784 fail:
2785     if (s->ref)
2786         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2787
2788     return ret;
2789 }
2790
2791 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2792 {
2793     int i;
2794     for (i = 0; i < 16; i++)
2795         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2796 }
2797
2798 static int verify_md5(HEVCContext *s, AVFrame *frame)
2799 {
2800     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2801     int pixel_shift;
2802     int i, j;
2803
2804     if (!desc)
2805         return AVERROR(EINVAL);
2806
2807     pixel_shift = desc->comp[0].depth_minus1 > 7;
2808
2809     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2810            s->poc);
2811
2812     /* the checksums are LE, so we have to byteswap for >8bpp formats
2813      * on BE arches */
2814 #if HAVE_BIGENDIAN
2815     if (pixel_shift && !s->checksum_buf) {
2816         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2817                        FFMAX3(frame->linesize[0], frame->linesize[1],
2818                               frame->linesize[2]));
2819         if (!s->checksum_buf)
2820             return AVERROR(ENOMEM);
2821     }
2822 #endif
2823
2824     for (i = 0; frame->data[i]; i++) {
2825         int width  = s->avctx->coded_width;
2826         int height = s->avctx->coded_height;
2827         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2828         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2829         uint8_t md5[16];
2830
2831         av_md5_init(s->md5_ctx);
2832         for (j = 0; j < h; j++) {
2833             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2834 #if HAVE_BIGENDIAN
2835             if (pixel_shift) {
2836                 s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
2837                                    (const uint16_t*)src, w);
2838                 src = s->checksum_buf;
2839             }
2840 #endif
2841             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2842         }
2843         av_md5_final(s->md5_ctx, md5);
2844
2845         if (!memcmp(md5, s->md5[i], 16)) {
2846             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2847             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2848             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2849         } else {
2850             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2851             print_md5(s->avctx, AV_LOG_ERROR, md5);
2852             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2853             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2854             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2855             return AVERROR_INVALIDDATA;
2856         }
2857     }
2858
2859     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2860
2861     return 0;
2862 }
2863
2864 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2865                              AVPacket *avpkt)
2866 {
2867     int ret;
2868     HEVCContext *s = avctx->priv_data;
2869
2870     if (!avpkt->size) {
2871         ret = ff_hevc_output_frame(s, data, 1);
2872         if (ret < 0)
2873             return ret;
2874
2875         *got_output = ret;
2876         return 0;
2877     }
2878
2879     s->ref = NULL;
2880     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2881     if (ret < 0)
2882         return ret;
2883
2884     /* verify the SEI checksum */
2885     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2886         s->is_md5) {
2887         ret = verify_md5(s, s->ref->frame);
2888         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2889             ff_hevc_unref_frame(s, s->ref, ~0);
2890             return ret;
2891         }
2892     }
2893     s->is_md5 = 0;
2894
2895     if (s->is_decoded) {
2896         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2897         s->is_decoded = 0;
2898     }
2899
2900     if (s->output_frame->buf[0]) {
2901         av_frame_move_ref(data, s->output_frame);
2902         *got_output = 1;
2903     }
2904
2905     return avpkt->size;
2906 }
2907
2908 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2909 {
2910     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2911     if (ret < 0)
2912         return ret;
2913
2914     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2915     if (!dst->tab_mvf_buf)
2916         goto fail;
2917     dst->tab_mvf = src->tab_mvf;
2918
2919     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2920     if (!dst->rpl_tab_buf)
2921         goto fail;
2922     dst->rpl_tab = src->rpl_tab;
2923
2924     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2925     if (!dst->rpl_buf)
2926         goto fail;
2927
2928     dst->poc        = src->poc;
2929     dst->ctb_count  = src->ctb_count;
2930     dst->window     = src->window;
2931     dst->flags      = src->flags;
2932     dst->sequence   = src->sequence;
2933
2934     return 0;
2935 fail:
2936     ff_hevc_unref_frame(s, dst, ~0);
2937     return AVERROR(ENOMEM);
2938 }
2939
2940 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2941 {
2942     HEVCContext       *s = avctx->priv_data;
2943     int i;
2944
2945     pic_arrays_free(s);
2946
2947     av_freep(&s->md5_ctx);
2948
2949     av_frame_free(&s->tmp_frame);
2950     av_frame_free(&s->output_frame);
2951
2952     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2953         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2954         av_frame_free(&s->DPB[i].frame);
2955     }
2956
2957     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2958         av_buffer_unref(&s->vps_list[i]);
2959     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2960         av_buffer_unref(&s->sps_list[i]);
2961     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2962         av_buffer_unref(&s->pps_list[i]);
2963
2964     for (i = 0; i < s->nals_allocated; i++)
2965         av_freep(&s->nals[i].rbsp_buffer);
2966     av_freep(&s->nals);
2967     s->nals_allocated = 0;
2968
2969     return 0;
2970 }
2971
2972 static av_cold int hevc_init_context(AVCodecContext *avctx)
2973 {
2974     HEVCContext *s = avctx->priv_data;
2975     int i;
2976
2977     s->avctx = avctx;
2978
2979     s->tmp_frame = av_frame_alloc();
2980     if (!s->tmp_frame)
2981         goto fail;
2982
2983     s->output_frame = av_frame_alloc();
2984     if (!s->output_frame)
2985         goto fail;
2986
2987     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2988         s->DPB[i].frame = av_frame_alloc();
2989         if (!s->DPB[i].frame)
2990             goto fail;
2991         s->DPB[i].tf.f = s->DPB[i].frame;
2992     }
2993
2994     s->max_ra = INT_MAX;
2995
2996     s->md5_ctx = av_md5_alloc();
2997     if (!s->md5_ctx)
2998         goto fail;
2999
3000     ff_dsputil_init(&s->dsp, avctx);
3001
3002     s->context_initialized = 1;
3003
3004     return 0;
3005
3006 fail:
3007     hevc_decode_free(avctx);
3008     return AVERROR(ENOMEM);
3009 }
3010
3011 static int hevc_update_thread_context(AVCodecContext *dst,
3012                                       const AVCodecContext *src)
3013 {
3014     HEVCContext *s  = dst->priv_data;
3015     HEVCContext *s0 = src->priv_data;
3016     int i, ret;
3017
3018     if (!s->context_initialized) {
3019         ret = hevc_init_context(dst);
3020         if (ret < 0)
3021             return ret;
3022     }
3023
3024     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3025         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3026         if (s0->DPB[i].frame->buf[0]) {
3027             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3028             if (ret < 0)
3029                 return ret;
3030         }
3031     }
3032
3033     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3034         av_buffer_unref(&s->vps_list[i]);
3035         if (s0->vps_list[i]) {
3036             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3037             if (!s->vps_list[i])
3038                 return AVERROR(ENOMEM);
3039         }
3040     }
3041
3042     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3043         av_buffer_unref(&s->sps_list[i]);
3044         if (s0->sps_list[i]) {
3045             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3046             if (!s->sps_list[i])
3047                 return AVERROR(ENOMEM);
3048         }
3049     }
3050
3051     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3052         av_buffer_unref(&s->pps_list[i]);
3053         if (s0->pps_list[i]) {
3054             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3055             if (!s->pps_list[i])
3056                 return AVERROR(ENOMEM);
3057         }
3058     }
3059
3060     if (s->sps != s0->sps)
3061         ret = set_sps(s, s0->sps);
3062
3063     s->seq_decode = s0->seq_decode;
3064     s->seq_output = s0->seq_output;
3065     s->pocTid0    = s0->pocTid0;
3066     s->max_ra     = s0->max_ra;
3067
3068     s->is_nalff        = s0->is_nalff;
3069     s->nal_length_size = s0->nal_length_size;
3070
3071     if (s0->eos) {
3072         s->seq_decode = (s->seq_decode + 1) & 0xff;
3073         s->max_ra = INT_MAX;
3074     }
3075
3076     return 0;
3077 }
3078
3079 static int hevc_decode_extradata(HEVCContext *s)
3080 {
3081     AVCodecContext *avctx = s->avctx;
3082     GetByteContext gb;
3083     int ret;
3084
3085     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3086
3087     if (avctx->extradata_size > 3 &&
3088         (avctx->extradata[0] || avctx->extradata[1] ||
3089          avctx->extradata[2] > 1)) {
3090         /* It seems the extradata is encoded as hvcC format.
3091          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3092          * is finalized. When finalized, configurationVersion will be 1 and we
3093          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3094         int i, j, num_arrays, nal_len_size;
3095
3096         s->is_nalff = 1;
3097
3098         bytestream2_skip(&gb, 21);
3099         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3100         num_arrays   = bytestream2_get_byte(&gb);
3101
3102         /* nal units in the hvcC always have length coded with 2 bytes,
3103          * so put a fake nal_length_size = 2 while parsing them */
3104         s->nal_length_size = 2;
3105
3106         /* Decode nal units from hvcC. */
3107         for (i = 0; i < num_arrays; i++) {
3108             int type = bytestream2_get_byte(&gb) & 0x3f;
3109             int cnt  = bytestream2_get_be16(&gb);
3110
3111             for (j = 0; j < cnt; j++) {
3112                 // +2 for the nal size field
3113                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3114                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3115                     av_log(s->avctx, AV_LOG_ERROR,
3116                            "Invalid NAL unit size in extradata.\n");
3117                     return AVERROR_INVALIDDATA;
3118                 }
3119
3120                 ret = decode_nal_units(s, gb.buffer, nalsize);
3121                 if (ret < 0) {
3122                     av_log(avctx, AV_LOG_ERROR,
3123                            "Decoding nal unit %d %d from hvcC failed\n",
3124                            type, i);
3125                     return ret;
3126                 }
3127                 bytestream2_skip(&gb, nalsize);
3128             }
3129         }
3130
3131         /* Now store right nal length size, that will be used to parse
3132          * all other nals */
3133         s->nal_length_size = nal_len_size;
3134     } else {
3135         s->is_nalff = 0;
3136         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3137         if (ret < 0)
3138             return ret;
3139     }
3140     return 0;
3141 }
3142
3143 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3144 {
3145     HEVCContext *s = avctx->priv_data;
3146     int ret;
3147
3148     ff_init_cabac_states();
3149
3150     avctx->internal->allocate_progress = 1;
3151
3152     ret = hevc_init_context(avctx);
3153     if (ret < 0)
3154         return ret;
3155
3156     if (avctx->extradata_size > 0 && avctx->extradata) {
3157         ret = hevc_decode_extradata(s);
3158         if (ret < 0) {
3159             hevc_decode_free(avctx);
3160             return ret;
3161         }
3162     }
3163
3164     return 0;
3165 }
3166
3167 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3168 {
3169     HEVCContext *s = avctx->priv_data;
3170     int ret;
3171
3172     memset(s, 0, sizeof(*s));
3173
3174     ret = hevc_init_context(avctx);
3175     if (ret < 0)
3176         return ret;
3177
3178     return 0;
3179 }
3180
3181 static void hevc_decode_flush(AVCodecContext *avctx)
3182 {
3183     HEVCContext *s = avctx->priv_data;
3184     ff_hevc_flush_dpb(s);
3185     s->max_ra = INT_MAX;
3186 }
3187
3188 #define OFFSET(x) offsetof(HEVCContext, x)
3189 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3190
3191 static const AVProfile profiles[] = {
3192     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3193     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3194     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3195     { FF_PROFILE_UNKNOWN },
3196 };
3197
3198 static const AVOption options[] = {
3199     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3200         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3201     { NULL },
3202 };
3203
3204 static const AVClass hevc_decoder_class = {
3205     .class_name = "HEVC decoder",
3206     .item_name  = av_default_item_name,
3207     .option     = options,
3208     .version    = LIBAVUTIL_VERSION_INT,
3209 };
3210
3211 AVCodec ff_hevc_decoder = {
3212     .name                  = "hevc",
3213     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3214     .type                  = AVMEDIA_TYPE_VIDEO,
3215     .id                    = AV_CODEC_ID_HEVC,
3216     .priv_data_size        = sizeof(HEVCContext),
3217     .priv_class            = &hevc_decoder_class,
3218     .init                  = hevc_decode_init,
3219     .close                 = hevc_decode_free,
3220     .decode                = hevc_decode_frame,
3221     .flush                 = hevc_decode_flush,
3222     .update_thread_context = hevc_update_thread_context,
3223     .init_thread_copy      = hevc_init_thread_copy,
3224     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3225                              CODEC_CAP_FRAME_THREADS,
3226     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3227 };