]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: eliminate unnecessary cbf_c{b,r} arrays
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
387 {
388     int ret;
389     unsigned int num = 0, den = 0;
390
391     pic_arrays_free(s);
392     ret = pic_arrays_init(s, sps);
393     if (ret < 0)
394         goto fail;
395
396     s->avctx->coded_width         = sps->width;
397     s->avctx->coded_height        = sps->height;
398     s->avctx->width               = sps->output_width;
399     s->avctx->height              = sps->output_height;
400     s->avctx->pix_fmt             = sps->pix_fmt;
401     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
402
403     ff_set_sar(s->avctx, sps->vui.sar);
404
405     if (sps->vui.video_signal_type_present_flag)
406         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
407                                                                : AVCOL_RANGE_MPEG;
408     else
409         s->avctx->color_range = AVCOL_RANGE_MPEG;
410
411     if (sps->vui.colour_description_present_flag) {
412         s->avctx->color_primaries = sps->vui.colour_primaries;
413         s->avctx->color_trc       = sps->vui.transfer_characteristic;
414         s->avctx->colorspace      = sps->vui.matrix_coeffs;
415     } else {
416         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
417         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
418         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
419     }
420
421     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
422     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
423     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
424
425     if (sps->sao_enabled) {
426         av_frame_unref(s->tmp_frame);
427         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
428         if (ret < 0)
429             goto fail;
430         s->frame = s->tmp_frame;
431     }
432
433     s->sps = sps;
434     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
435
436     if (s->vps->vps_timing_info_present_flag) {
437         num = s->vps->vps_num_units_in_tick;
438         den = s->vps->vps_time_scale;
439     } else if (sps->vui.vui_timing_info_present_flag) {
440         num = sps->vui.vui_num_units_in_tick;
441         den = sps->vui.vui_time_scale;
442     }
443
444     if (num != 0 && den != 0)
445         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
446                   num, den, 1 << 30);
447
448     return 0;
449
450 fail:
451     pic_arrays_free(s);
452     s->sps = NULL;
453     return ret;
454 }
455
456 static int hls_slice_header(HEVCContext *s)
457 {
458     GetBitContext *gb = &s->HEVClc.gb;
459     SliceHeader *sh   = &s->sh;
460     int i, ret;
461
462     // Coded parameters
463     sh->first_slice_in_pic_flag = get_bits1(gb);
464     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
465         s->seq_decode = (s->seq_decode + 1) & 0xff;
466         s->max_ra     = INT_MAX;
467         if (IS_IDR(s))
468             ff_hevc_clear_refs(s);
469     }
470     if (IS_IRAP(s))
471         sh->no_output_of_prior_pics_flag = get_bits1(gb);
472
473     sh->pps_id = get_ue_golomb_long(gb);
474     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
475         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
476         return AVERROR_INVALIDDATA;
477     }
478     if (!sh->first_slice_in_pic_flag &&
479         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
480         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
481         return AVERROR_INVALIDDATA;
482     }
483     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
484
485     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
486         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
487
488         ff_hevc_clear_refs(s);
489         ret = set_sps(s, s->sps);
490         if (ret < 0)
491             return ret;
492
493         s->seq_decode = (s->seq_decode + 1) & 0xff;
494         s->max_ra     = INT_MAX;
495     }
496
497     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
498     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
499
500     sh->dependent_slice_segment_flag = 0;
501     if (!sh->first_slice_in_pic_flag) {
502         int slice_address_length;
503
504         if (s->pps->dependent_slice_segments_enabled_flag)
505             sh->dependent_slice_segment_flag = get_bits1(gb);
506
507         slice_address_length = av_ceil_log2(s->sps->ctb_width *
508                                             s->sps->ctb_height);
509         sh->slice_segment_addr = get_bits(gb, slice_address_length);
510         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
511             av_log(s->avctx, AV_LOG_ERROR,
512                    "Invalid slice segment address: %u.\n",
513                    sh->slice_segment_addr);
514             return AVERROR_INVALIDDATA;
515         }
516
517         if (!sh->dependent_slice_segment_flag) {
518             sh->slice_addr = sh->slice_segment_addr;
519             s->slice_idx++;
520         }
521     } else {
522         sh->slice_segment_addr = sh->slice_addr = 0;
523         s->slice_idx           = 0;
524         s->slice_initialized   = 0;
525     }
526
527     if (!sh->dependent_slice_segment_flag) {
528         s->slice_initialized = 0;
529
530         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
531             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
532
533         sh->slice_type = get_ue_golomb_long(gb);
534         if (!(sh->slice_type == I_SLICE ||
535               sh->slice_type == P_SLICE ||
536               sh->slice_type == B_SLICE)) {
537             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
538                    sh->slice_type);
539             return AVERROR_INVALIDDATA;
540         }
541         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
542             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
543             return AVERROR_INVALIDDATA;
544         }
545
546         // when flag is not present, picture is inferred to be output
547         sh->pic_output_flag = 1;
548         if (s->pps->output_flag_present_flag)
549             sh->pic_output_flag = get_bits1(gb);
550
551         if (s->sps->separate_colour_plane_flag)
552             sh->colour_plane_id = get_bits(gb, 2);
553
554         if (!IS_IDR(s)) {
555             int short_term_ref_pic_set_sps_flag, poc;
556
557             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
558             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
559             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
560                 av_log(s->avctx, AV_LOG_WARNING,
561                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
562                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
563                     return AVERROR_INVALIDDATA;
564                 poc = s->poc;
565             }
566             s->poc = poc;
567
568             short_term_ref_pic_set_sps_flag = get_bits1(gb);
569             if (!short_term_ref_pic_set_sps_flag) {
570                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
571                 if (ret < 0)
572                     return ret;
573
574                 sh->short_term_rps = &sh->slice_rps;
575             } else {
576                 int numbits, rps_idx;
577
578                 if (!s->sps->nb_st_rps) {
579                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
580                     return AVERROR_INVALIDDATA;
581                 }
582
583                 numbits = av_ceil_log2(s->sps->nb_st_rps);
584                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
585                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
586             }
587
588             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
589             if (ret < 0) {
590                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
591                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
592                     return AVERROR_INVALIDDATA;
593             }
594
595             if (s->sps->sps_temporal_mvp_enabled_flag)
596                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
597             else
598                 sh->slice_temporal_mvp_enabled_flag = 0;
599         } else {
600             s->sh.short_term_rps = NULL;
601             s->poc               = 0;
602         }
603
604         /* 8.3.1 */
605         if (s->temporal_id == 0 &&
606             s->nal_unit_type != NAL_TRAIL_N &&
607             s->nal_unit_type != NAL_TSA_N   &&
608             s->nal_unit_type != NAL_STSA_N  &&
609             s->nal_unit_type != NAL_RADL_N  &&
610             s->nal_unit_type != NAL_RADL_R  &&
611             s->nal_unit_type != NAL_RASL_N  &&
612             s->nal_unit_type != NAL_RASL_R)
613             s->pocTid0 = s->poc;
614
615         if (s->sps->sao_enabled) {
616             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
617             sh->slice_sample_adaptive_offset_flag[1] =
618             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
619         } else {
620             sh->slice_sample_adaptive_offset_flag[0] = 0;
621             sh->slice_sample_adaptive_offset_flag[1] = 0;
622             sh->slice_sample_adaptive_offset_flag[2] = 0;
623         }
624
625         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
626         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
627             int nb_refs;
628
629             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
630             if (sh->slice_type == B_SLICE)
631                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
632
633             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
634                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
635                 if (sh->slice_type == B_SLICE)
636                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
637             }
638             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
640                        sh->nb_refs[L0], sh->nb_refs[L1]);
641                 return AVERROR_INVALIDDATA;
642             }
643
644             sh->rpl_modification_flag[0] = 0;
645             sh->rpl_modification_flag[1] = 0;
646             nb_refs = ff_hevc_frame_nb_refs(s);
647             if (!nb_refs) {
648                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
649                 return AVERROR_INVALIDDATA;
650             }
651
652             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
653                 sh->rpl_modification_flag[0] = get_bits1(gb);
654                 if (sh->rpl_modification_flag[0]) {
655                     for (i = 0; i < sh->nb_refs[L0]; i++)
656                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658
659                 if (sh->slice_type == B_SLICE) {
660                     sh->rpl_modification_flag[1] = get_bits1(gb);
661                     if (sh->rpl_modification_flag[1] == 1)
662                         for (i = 0; i < sh->nb_refs[L1]; i++)
663                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
664                 }
665             }
666
667             if (sh->slice_type == B_SLICE)
668                 sh->mvd_l1_zero_flag = get_bits1(gb);
669
670             if (s->pps->cabac_init_present_flag)
671                 sh->cabac_init_flag = get_bits1(gb);
672             else
673                 sh->cabac_init_flag = 0;
674
675             sh->collocated_ref_idx = 0;
676             if (sh->slice_temporal_mvp_enabled_flag) {
677                 sh->collocated_list = L0;
678                 if (sh->slice_type == B_SLICE)
679                     sh->collocated_list = !get_bits1(gb);
680
681                 if (sh->nb_refs[sh->collocated_list] > 1) {
682                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
683                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
684                         av_log(s->avctx, AV_LOG_ERROR,
685                                "Invalid collocated_ref_idx: %d.\n",
686                                sh->collocated_ref_idx);
687                         return AVERROR_INVALIDDATA;
688                     }
689                 }
690             }
691
692             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
693                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
694                 pred_weight_table(s, gb);
695             }
696
697             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
698             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
699                 av_log(s->avctx, AV_LOG_ERROR,
700                        "Invalid number of merging MVP candidates: %d.\n",
701                        sh->max_num_merge_cand);
702                 return AVERROR_INVALIDDATA;
703             }
704         }
705
706         sh->slice_qp_delta = get_se_golomb(gb);
707
708         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
709             sh->slice_cb_qp_offset = get_se_golomb(gb);
710             sh->slice_cr_qp_offset = get_se_golomb(gb);
711         } else {
712             sh->slice_cb_qp_offset = 0;
713             sh->slice_cr_qp_offset = 0;
714         }
715
716         if (s->pps->deblocking_filter_control_present_flag) {
717             int deblocking_filter_override_flag = 0;
718
719             if (s->pps->deblocking_filter_override_enabled_flag)
720                 deblocking_filter_override_flag = get_bits1(gb);
721
722             if (deblocking_filter_override_flag) {
723                 sh->disable_deblocking_filter_flag = get_bits1(gb);
724                 if (!sh->disable_deblocking_filter_flag) {
725                     sh->beta_offset = get_se_golomb(gb) * 2;
726                     sh->tc_offset   = get_se_golomb(gb) * 2;
727                 }
728             } else {
729                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
730                 sh->beta_offset                    = s->pps->beta_offset;
731                 sh->tc_offset                      = s->pps->tc_offset;
732             }
733         } else {
734             sh->disable_deblocking_filter_flag = 0;
735             sh->beta_offset                    = 0;
736             sh->tc_offset                      = 0;
737         }
738
739         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
740             (sh->slice_sample_adaptive_offset_flag[0] ||
741              sh->slice_sample_adaptive_offset_flag[1] ||
742              !sh->disable_deblocking_filter_flag)) {
743             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
744         } else {
745             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
746         }
747     } else if (!s->slice_initialized) {
748         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
749         return AVERROR_INVALIDDATA;
750     }
751
752     sh->num_entry_point_offsets = 0;
753     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
754         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
755         if (sh->num_entry_point_offsets > 0) {
756             int offset_len = get_ue_golomb_long(gb) + 1;
757
758             for (i = 0; i < sh->num_entry_point_offsets; i++)
759                 skip_bits(gb, offset_len);
760         }
761     }
762
763     if (s->pps->slice_header_extension_present_flag) {
764         unsigned int length = get_ue_golomb_long(gb);
765         for (i = 0; i < length; i++)
766             skip_bits(gb, 8);  // slice_header_extension_data_byte
767     }
768
769     // Inferred parameters
770     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771     if (sh->slice_qp > 51 ||
772         sh->slice_qp < -s->sps->qp_bd_offset) {
773         av_log(s->avctx, AV_LOG_ERROR,
774                "The slice_qp %d is outside the valid range "
775                "[%d, 51].\n",
776                sh->slice_qp,
777                -s->sps->qp_bd_offset);
778         return AVERROR_INVALIDDATA;
779     }
780
781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
782
783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785         return AVERROR_INVALIDDATA;
786     }
787
788     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
789
790     if (!s->pps->cu_qp_delta_enabled_flag)
791         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
792                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
793
794     s->slice_initialized = 1;
795
796     return 0;
797 }
798
799 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
800
801 #define SET_SAO(elem, value)                            \
802 do {                                                    \
803     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
804         sao->elem = value;                              \
805     else if (sao_merge_left_flag)                       \
806         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
807     else if (sao_merge_up_flag)                         \
808         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
809     else                                                \
810         sao->elem = 0;                                  \
811 } while (0)
812
813 static void hls_sao_param(HEVCContext *s, int rx, int ry)
814 {
815     HEVCLocalContext *lc    = &s->HEVClc;
816     int sao_merge_left_flag = 0;
817     int sao_merge_up_flag   = 0;
818     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
819     SAOParams *sao          = &CTB(s->sao, rx, ry);
820     int c_idx, i;
821
822     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
823         s->sh.slice_sample_adaptive_offset_flag[1]) {
824         if (rx > 0) {
825             if (lc->ctb_left_flag)
826                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
827         }
828         if (ry > 0 && !sao_merge_left_flag) {
829             if (lc->ctb_up_flag)
830                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
831         }
832     }
833
834     for (c_idx = 0; c_idx < 3; c_idx++) {
835         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
836             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
837             continue;
838         }
839
840         if (c_idx == 2) {
841             sao->type_idx[2] = sao->type_idx[1];
842             sao->eo_class[2] = sao->eo_class[1];
843         } else {
844             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
845         }
846
847         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
848             continue;
849
850         for (i = 0; i < 4; i++)
851             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
852
853         if (sao->type_idx[c_idx] == SAO_BAND) {
854             for (i = 0; i < 4; i++) {
855                 if (sao->offset_abs[c_idx][i]) {
856                     SET_SAO(offset_sign[c_idx][i],
857                             ff_hevc_sao_offset_sign_decode(s));
858                 } else {
859                     sao->offset_sign[c_idx][i] = 0;
860                 }
861             }
862             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
863         } else if (c_idx != 2) {
864             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
865         }
866
867         // Inferred parameters
868         sao->offset_val[c_idx][0] = 0;
869         for (i = 0; i < 4; i++) {
870             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
871             if (sao->type_idx[c_idx] == SAO_EDGE) {
872                 if (i > 1)
873                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
874             } else if (sao->offset_sign[c_idx][i]) {
875                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
876             }
877         }
878     }
879 }
880
881 #undef SET_SAO
882 #undef CTB
883
884 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
885                                 int log2_trafo_size, enum ScanType scan_idx,
886                                 int c_idx)
887 {
888 #define GET_COORD(offset, n)                                    \
889     do {                                                        \
890         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
891         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
892     } while (0)
893     HEVCLocalContext *lc    = &s->HEVClc;
894     int transform_skip_flag = 0;
895
896     int last_significant_coeff_x, last_significant_coeff_y;
897     int last_scan_pos;
898     int n_end;
899     int num_coeff    = 0;
900     int greater1_ctx = 1;
901
902     int num_last_subset;
903     int x_cg_last_sig, y_cg_last_sig;
904
905     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
906
907     ptrdiff_t stride = s->frame->linesize[c_idx];
908     int hshift       = s->sps->hshift[c_idx];
909     int vshift       = s->sps->vshift[c_idx];
910     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
911                                               ((x0 >> hshift) << s->sps->pixel_shift)];
912     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
913     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
914
915     int trafo_size = 1 << log2_trafo_size;
916     int i, qp, shift, add, scale, scale_m;
917     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
918     const uint8_t *scale_matrix;
919     uint8_t dc_scale;
920
921     // Derive QP for dequant
922     if (!lc->cu.cu_transquant_bypass_flag) {
923         static const int qp_c[] = {
924             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
925         };
926
927         static const uint8_t rem6[51 + 2 * 6 + 1] = {
928             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
929             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
930             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
931         };
932
933         static const uint8_t div6[51 + 2 * 6 + 1] = {
934             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
935             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
936             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
937         };
938         int qp_y = lc->qp_y;
939
940         if (c_idx == 0) {
941             qp = qp_y + s->sps->qp_bd_offset;
942         } else {
943             int qp_i, offset;
944
945             if (c_idx == 1)
946                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
947             else
948                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
949
950             qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
951             if (qp_i < 30)
952                 qp = qp_i;
953             else if (qp_i > 43)
954                 qp = qp_i - 6;
955             else
956                 qp = qp_c[qp_i - 30];
957
958             qp += s->sps->qp_bd_offset;
959         }
960
961         shift    = s->sps->bit_depth + log2_trafo_size - 5;
962         add      = 1 << (shift - 1);
963         scale    = level_scale[rem6[qp]] << (div6[qp]);
964         scale_m  = 16; // default when no custom scaling lists.
965         dc_scale = 16;
966
967         if (s->sps->scaling_list_enable_flag) {
968             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
969                                     &s->pps->scaling_list : &s->sps->scaling_list;
970             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
971
972             if (log2_trafo_size != 5)
973                 matrix_id = 3 * matrix_id + c_idx;
974
975             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
976             if (log2_trafo_size >= 4)
977                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
978         }
979     }
980
981     if (s->pps->transform_skip_enabled_flag &&
982         !lc->cu.cu_transquant_bypass_flag   &&
983         log2_trafo_size == 2) {
984         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
985     }
986
987     last_significant_coeff_x =
988         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
989     last_significant_coeff_y =
990         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
991
992     if (last_significant_coeff_x > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
994         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_x & 1)) +
996                                    suffix;
997     }
998
999     if (last_significant_coeff_y > 3) {
1000         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1001         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1002                                    (2 + (last_significant_coeff_y & 1)) +
1003                                    suffix;
1004     }
1005
1006     if (scan_idx == SCAN_VERT)
1007         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1008
1009     x_cg_last_sig = last_significant_coeff_x >> 2;
1010     y_cg_last_sig = last_significant_coeff_y >> 2;
1011
1012     switch (scan_idx) {
1013     case SCAN_DIAG: {
1014         int last_x_c = last_significant_coeff_x & 3;
1015         int last_y_c = last_significant_coeff_y & 3;
1016
1017         scan_x_off = ff_hevc_diag_scan4x4_x;
1018         scan_y_off = ff_hevc_diag_scan4x4_y;
1019         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1020         if (trafo_size == 4) {
1021             scan_x_cg = scan_1x1;
1022             scan_y_cg = scan_1x1;
1023         } else if (trafo_size == 8) {
1024             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1025             scan_x_cg  = diag_scan2x2_x;
1026             scan_y_cg  = diag_scan2x2_y;
1027         } else if (trafo_size == 16) {
1028             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1029             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1030             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1031         } else { // trafo_size == 32
1032             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1033             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1034             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1035         }
1036         break;
1037     }
1038     case SCAN_HORIZ:
1039         scan_x_cg  = horiz_scan2x2_x;
1040         scan_y_cg  = horiz_scan2x2_y;
1041         scan_x_off = horiz_scan4x4_x;
1042         scan_y_off = horiz_scan4x4_y;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1044         break;
1045     default: //SCAN_VERT
1046         scan_x_cg  = horiz_scan2x2_y;
1047         scan_y_cg  = horiz_scan2x2_x;
1048         scan_x_off = horiz_scan4x4_y;
1049         scan_y_off = horiz_scan4x4_x;
1050         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1051         break;
1052     }
1053     num_coeff++;
1054     num_last_subset = (num_coeff - 1) >> 4;
1055
1056     for (i = num_last_subset; i >= 0; i--) {
1057         int n, m;
1058         int x_cg, y_cg, x_c, y_c;
1059         int implicit_non_zero_coeff = 0;
1060         int64_t trans_coeff_level;
1061         int prev_sig = 0;
1062         int offset   = i << 4;
1063
1064         uint8_t significant_coeff_flag_idx[16];
1065         uint8_t nb_significant_coeff_flag = 0;
1066
1067         x_cg = scan_x_cg[i];
1068         y_cg = scan_y_cg[i];
1069
1070         if (i < num_last_subset && i > 0) {
1071             int ctx_cg = 0;
1072             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1073                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1074             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1075                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1076
1077             significant_coeff_group_flag[x_cg][y_cg] =
1078                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1079             implicit_non_zero_coeff = 1;
1080         } else {
1081             significant_coeff_group_flag[x_cg][y_cg] =
1082                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1083                  (x_cg == 0 && y_cg == 0));
1084         }
1085
1086         last_scan_pos = num_coeff - offset - 1;
1087
1088         if (i == num_last_subset) {
1089             n_end                         = last_scan_pos - 1;
1090             significant_coeff_flag_idx[0] = last_scan_pos;
1091             nb_significant_coeff_flag     = 1;
1092         } else {
1093             n_end = 15;
1094         }
1095
1096         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1097             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1098         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1099             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1100
1101         for (n = n_end; n >= 0; n--) {
1102             GET_COORD(offset, n);
1103
1104             if (significant_coeff_group_flag[x_cg][y_cg] &&
1105                 (n > 0 || implicit_non_zero_coeff == 0)) {
1106                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1107                                                           log2_trafo_size,
1108                                                           scan_idx,
1109                                                           prev_sig) == 1) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                     implicit_non_zero_coeff = 0;
1113                 }
1114             } else {
1115                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1116                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1117                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1118                     nb_significant_coeff_flag++;
1119                 }
1120             }
1121         }
1122
1123         n_end = nb_significant_coeff_flag;
1124
1125         if (n_end) {
1126             int first_nz_pos_in_cg = 16;
1127             int last_nz_pos_in_cg = -1;
1128             int c_rice_param = 0;
1129             int first_greater1_coeff_idx = -1;
1130             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1131             uint16_t coeff_sign_flag;
1132             int sum_abs = 0;
1133             int sign_hidden = 0;
1134
1135             // initialize first elem of coeff_bas_level_greater1_flag
1136             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1137
1138             if (!(i == num_last_subset) && greater1_ctx == 0)
1139                 ctx_set++;
1140             greater1_ctx      = 1;
1141             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1142
1143             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1144                 int n_idx = significant_coeff_flag_idx[m];
1145                 int inc   = (ctx_set << 2) + greater1_ctx;
1146                 coeff_abs_level_greater1_flag[n_idx] =
1147                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1148                 if (coeff_abs_level_greater1_flag[n_idx]) {
1149                     greater1_ctx = 0;
1150                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1151                     greater1_ctx++;
1152                 }
1153
1154                 if (coeff_abs_level_greater1_flag[n_idx] &&
1155                     first_greater1_coeff_idx == -1)
1156                     first_greater1_coeff_idx = n_idx;
1157             }
1158             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1159             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1160                                  !lc->cu.cu_transquant_bypass_flag;
1161
1162             if (first_greater1_coeff_idx != -1) {
1163                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1164             }
1165             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1166                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1167             } else {
1168                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1169             }
1170
1171             for (m = 0; m < n_end; m++) {
1172                 n = significant_coeff_flag_idx[m];
1173                 GET_COORD(offset, n);
1174                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1175                 if (trans_coeff_level == ((m < 8) ?
1176                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1177                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1178
1179                     trans_coeff_level += last_coeff_abs_level_remaining;
1180                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1181                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1182                 }
1183                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1184                     sum_abs += trans_coeff_level;
1185                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1186                         trans_coeff_level = -trans_coeff_level;
1187                 }
1188                 if (coeff_sign_flag >> 15)
1189                     trans_coeff_level = -trans_coeff_level;
1190                 coeff_sign_flag <<= 1;
1191                 if (!lc->cu.cu_transquant_bypass_flag) {
1192                     if (s->sps->scaling_list_enable_flag) {
1193                         if (y_c || x_c || log2_trafo_size < 4) {
1194                             int pos;
1195                             switch (log2_trafo_size) {
1196                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1197                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1198                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1199                             default: pos = (y_c        << 2) +  x_c;
1200                             }
1201                             scale_m = scale_matrix[pos];
1202                         } else {
1203                             scale_m = dc_scale;
1204                         }
1205                     }
1206                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1207                     if(trans_coeff_level < 0) {
1208                         if((~trans_coeff_level) & 0xFffffffffff8000)
1209                             trans_coeff_level = -32768;
1210                     } else {
1211                         if (trans_coeff_level & 0xffffffffffff8000)
1212                             trans_coeff_level = 32767;
1213                     }
1214                 }
1215                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1216             }
1217         }
1218     }
1219
1220     if (lc->cu.cu_transquant_bypass_flag) {
1221         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1222     } else {
1223         if (transform_skip_flag)
1224             s->hevcdsp.transform_skip(dst, coeffs, stride);
1225         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1226                  log2_trafo_size == 2)
1227             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1228         else
1229             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1230     }
1231 }
1232
1233 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1234                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1235                               int log2_cb_size, int log2_trafo_size,
1236                               int trafo_depth, int blk_idx,
1237                               int cbf_cb, int cbf_cr)
1238 {
1239     HEVCLocalContext *lc = &s->HEVClc;
1240
1241     if (lc->cu.pred_mode == MODE_INTRA) {
1242         int trafo_size = 1 << log2_trafo_size;
1243         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1244
1245         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1246         if (log2_trafo_size > 2) {
1247             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1248             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1249             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1250             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1251         } else if (blk_idx == 3) {
1252             trafo_size = trafo_size << s->sps->hshift[1];
1253             ff_hevc_set_neighbour_available(s, xBase, yBase,
1254                                             trafo_size, trafo_size);
1255             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1256             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1257         }
1258     }
1259
1260     if (lc->tt.cbf_luma || cbf_cb || cbf_cr) {
1261         int scan_idx   = SCAN_DIAG;
1262         int scan_idx_c = SCAN_DIAG;
1263
1264         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1265             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1266             if (lc->tu.cu_qp_delta != 0)
1267                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1268                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1269             lc->tu.is_cu_qp_delta_coded = 1;
1270
1271             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1272                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1273                 av_log(s->avctx, AV_LOG_ERROR,
1274                        "The cu_qp_delta %d is outside the valid range "
1275                        "[%d, %d].\n",
1276                        lc->tu.cu_qp_delta,
1277                        -(26 + s->sps->qp_bd_offset / 2),
1278                         (25 + s->sps->qp_bd_offset / 2));
1279                 return AVERROR_INVALIDDATA;
1280             }
1281
1282             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1283         }
1284
1285         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1286             if (lc->tu.cur_intra_pred_mode >= 6 &&
1287                 lc->tu.cur_intra_pred_mode <= 14) {
1288                 scan_idx = SCAN_VERT;
1289             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1290                        lc->tu.cur_intra_pred_mode <= 30) {
1291                 scan_idx = SCAN_HORIZ;
1292             }
1293
1294             if (lc->pu.intra_pred_mode_c >=  6 &&
1295                 lc->pu.intra_pred_mode_c <= 14) {
1296                 scan_idx_c = SCAN_VERT;
1297             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1298                        lc->pu.intra_pred_mode_c <= 30) {
1299                 scan_idx_c = SCAN_HORIZ;
1300             }
1301         }
1302
1303         if (lc->tt.cbf_luma)
1304             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1305         if (log2_trafo_size > 2) {
1306             if (cbf_cb)
1307                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1308             if (cbf_cr)
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1310         } else if (blk_idx == 3) {
1311             if (cbf_cb)
1312                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1313             if (cbf_cr)
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1315         }
1316     }
1317     return 0;
1318 }
1319
1320 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1321 {
1322     int cb_size          = 1 << log2_cb_size;
1323     int log2_min_pu_size = s->sps->log2_min_pu_size;
1324
1325     int min_pu_width     = s->sps->min_pu_width;
1326     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1327     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1328     int i, j;
1329
1330     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1331         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1332             s->is_pcm[i + j * min_pu_width] = 2;
1333 }
1334
1335 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1336                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1337                               int log2_cb_size, int log2_trafo_size,
1338                               int trafo_depth, int blk_idx,
1339                               int cbf_cb, int cbf_cr)
1340 {
1341     HEVCLocalContext *lc = &s->HEVClc;
1342     uint8_t split_transform_flag;
1343     int ret;
1344
1345     if (lc->cu.intra_split_flag) {
1346         if (trafo_depth == 1)
1347             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1348     } else {
1349         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1350     }
1351
1352     lc->tt.cbf_luma = 1;
1353
1354     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1355         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1356         trafo_depth     < lc->cu.max_trafo_depth       &&
1357         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1358         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1359     } else {
1360         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1361                           lc->cu.pred_mode == MODE_INTER &&
1362                           lc->cu.part_mode != PART_2Nx2N &&
1363                           trafo_depth == 0;
1364
1365         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1366                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1367                                inter_split;
1368     }
1369
1370     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1371         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1372     else if (log2_trafo_size > 2 || trafo_depth == 0)
1373         cbf_cb = 0;
1374     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1375         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1376     else if (log2_trafo_size > 2 || trafo_depth == 0)
1377         cbf_cr = 0;
1378
1379     if (split_transform_flag) {
1380         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1381         const int x1 = x0 + trafo_size_split;
1382         const int y1 = y0 + trafo_size_split;
1383
1384 #define SUBDIVIDE(x, y, idx)                                                    \
1385 do {                                                                            \
1386     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1387                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1388                              cbf_cb, cbf_cr);                                   \
1389     if (ret < 0)                                                                \
1390         return ret;                                                             \
1391 } while (0)
1392
1393         SUBDIVIDE(x0, y0, 0);
1394         SUBDIVIDE(x1, y0, 1);
1395         SUBDIVIDE(x0, y1, 2);
1396         SUBDIVIDE(x1, y1, 3);
1397
1398 #undef SUBDIVIDE
1399     } else {
1400         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1401         int log2_min_tu_size = s->sps->log2_min_tb_size;
1402         int min_tu_width     = s->sps->min_tb_width;
1403
1404         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1405             cbf_cb || cbf_cr)
1406             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1407
1408         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1409                                  log2_cb_size, log2_trafo_size, trafo_depth,
1410                                  blk_idx, cbf_cb, cbf_cr);
1411         if (ret < 0)
1412             return ret;
1413         // TODO: store cbf_luma somewhere else
1414         if (lc->tt.cbf_luma) {
1415             int i, j;
1416             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1417                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1418                     int x_tu = (x0 + j) >> log2_min_tu_size;
1419                     int y_tu = (y0 + i) >> log2_min_tu_size;
1420                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1421                 }
1422         }
1423         if (!s->sh.disable_deblocking_filter_flag) {
1424             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
1425                                                   lc->slice_or_tiles_up_boundary,
1426                                                   lc->slice_or_tiles_left_boundary);
1427             if (s->pps->transquant_bypass_enable_flag &&
1428                 lc->cu.cu_transquant_bypass_flag)
1429                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1430         }
1431     }
1432     return 0;
1433 }
1434
1435 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1436 {
1437     //TODO: non-4:2:0 support
1438     HEVCLocalContext *lc = &s->HEVClc;
1439     GetBitContext gb;
1440     int cb_size   = 1 << log2_cb_size;
1441     int stride0   = s->frame->linesize[0];
1442     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1443     int   stride1 = s->frame->linesize[1];
1444     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1445     int   stride2 = s->frame->linesize[2];
1446     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1447
1448     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1449     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1450     int ret;
1451
1452     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
1453                                           lc->slice_or_tiles_up_boundary,
1454                                           lc->slice_or_tiles_left_boundary);
1455
1456     ret = init_get_bits(&gb, pcm, length);
1457     if (ret < 0)
1458         return ret;
1459
1460     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1461     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1462     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1463     return 0;
1464 }
1465
1466 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1467 {
1468     HEVCLocalContext *lc = &s->HEVClc;
1469     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1470     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1471
1472     if (x)
1473         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1474     if (y)
1475         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1476
1477     switch (x) {
1478     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1479     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1480     case 0: lc->pu.mvd.x = 0;                               break;
1481     }
1482
1483     switch (y) {
1484     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1485     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1486     case 0: lc->pu.mvd.y = 0;                               break;
1487     }
1488 }
1489
1490 /**
1491  * 8.5.3.2.2.1 Luma sample interpolation process
1492  *
1493  * @param s HEVC decoding context
1494  * @param dst target buffer for block data at block position
1495  * @param dststride stride of the dst buffer
1496  * @param ref reference picture buffer at origin (0, 0)
1497  * @param mv motion vector (relative to block position) to get pixel data from
1498  * @param x_off horizontal position of block from origin (0, 0)
1499  * @param y_off vertical position of block from origin (0, 0)
1500  * @param block_w width of block
1501  * @param block_h height of block
1502  */
1503 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1504                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1505                     int block_w, int block_h)
1506 {
1507     HEVCLocalContext *lc = &s->HEVClc;
1508     uint8_t *src         = ref->data[0];
1509     ptrdiff_t srcstride  = ref->linesize[0];
1510     int pic_width        = s->sps->width;
1511     int pic_height       = s->sps->height;
1512
1513     int mx         = mv->x & 3;
1514     int my         = mv->y & 3;
1515     int extra_left = ff_hevc_qpel_extra_before[mx];
1516     int extra_top  = ff_hevc_qpel_extra_before[my];
1517
1518     x_off += mv->x >> 2;
1519     y_off += mv->y >> 2;
1520     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1521
1522     if (x_off < extra_left || y_off < extra_top ||
1523         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1524         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1525         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1526         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1527         int buf_offset = extra_top *
1528                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1529
1530         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1531                                  edge_emu_stride, srcstride,
1532                                  block_w + ff_hevc_qpel_extra[mx],
1533                                  block_h + ff_hevc_qpel_extra[my],
1534                                  x_off - extra_left, y_off - extra_top,
1535                                  pic_width, pic_height);
1536         src = lc->edge_emu_buffer + buf_offset;
1537         srcstride = edge_emu_stride;
1538     }
1539     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1540                                      block_h, lc->mc_buffer);
1541 }
1542
1543 /**
1544  * 8.5.3.2.2.2 Chroma sample interpolation process
1545  *
1546  * @param s HEVC decoding context
1547  * @param dst1 target buffer for block data at block position (U plane)
1548  * @param dst2 target buffer for block data at block position (V plane)
1549  * @param dststride stride of the dst1 and dst2 buffers
1550  * @param ref reference picture buffer at origin (0, 0)
1551  * @param mv motion vector (relative to block position) to get pixel data from
1552  * @param x_off horizontal position of block from origin (0, 0)
1553  * @param y_off vertical position of block from origin (0, 0)
1554  * @param block_w width of block
1555  * @param block_h height of block
1556  */
1557 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1558                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1559                       int x_off, int y_off, int block_w, int block_h)
1560 {
1561     HEVCLocalContext *lc = &s->HEVClc;
1562     uint8_t *src1        = ref->data[1];
1563     uint8_t *src2        = ref->data[2];
1564     ptrdiff_t src1stride = ref->linesize[1];
1565     ptrdiff_t src2stride = ref->linesize[2];
1566     int pic_width        = s->sps->width >> 1;
1567     int pic_height       = s->sps->height >> 1;
1568
1569     int mx = mv->x & 7;
1570     int my = mv->y & 7;
1571
1572     x_off += mv->x >> 3;
1573     y_off += mv->y >> 3;
1574     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1575     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1576
1577     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1578         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1579         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1580         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1581         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1582         int buf_offset1 = EPEL_EXTRA_BEFORE *
1583                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1584         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1585         int buf_offset2 = EPEL_EXTRA_BEFORE *
1586                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1587
1588         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1589                                  edge_emu_stride, src1stride,
1590                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1591                                  x_off - EPEL_EXTRA_BEFORE,
1592                                  y_off - EPEL_EXTRA_BEFORE,
1593                                  pic_width, pic_height);
1594
1595         src1 = lc->edge_emu_buffer + buf_offset1;
1596         src1stride = edge_emu_stride;
1597         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1598                                              block_w, block_h, mx, my, lc->mc_buffer);
1599
1600         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1601                                  edge_emu_stride, src2stride,
1602                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1603                                  x_off - EPEL_EXTRA_BEFORE,
1604                                  y_off - EPEL_EXTRA_BEFORE,
1605                                  pic_width, pic_height);
1606         src2 = lc->edge_emu_buffer + buf_offset2;
1607         src2stride = edge_emu_stride;
1608
1609         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1610                                              block_w, block_h, mx, my,
1611                                              lc->mc_buffer);
1612     } else {
1613         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1614                                              block_w, block_h, mx, my,
1615                                              lc->mc_buffer);
1616         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1617                                              block_w, block_h, mx, my,
1618                                              lc->mc_buffer);
1619     }
1620 }
1621
1622 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1623                                 const Mv *mv, int y0, int height)
1624 {
1625     int y = (mv->y >> 2) + y0 + height + 9;
1626     ff_thread_await_progress(&ref->tf, y, 0);
1627 }
1628
1629 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1630                                 int nPbW, int nPbH,
1631                                 int log2_cb_size, int partIdx)
1632 {
1633 #define POS(c_idx, x, y)                                                              \
1634     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1635                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1636     HEVCLocalContext *lc = &s->HEVClc;
1637     int merge_idx = 0;
1638     struct MvField current_mv = {{{ 0 }}};
1639
1640     int min_pu_width = s->sps->min_pu_width;
1641
1642     MvField *tab_mvf = s->ref->tab_mvf;
1643     RefPicList  *refPicList = s->ref->refPicList;
1644     HEVCFrame *ref0, *ref1;
1645
1646     int tmpstride = MAX_PB_SIZE;
1647
1648     uint8_t *dst0 = POS(0, x0, y0);
1649     uint8_t *dst1 = POS(1, x0, y0);
1650     uint8_t *dst2 = POS(2, x0, y0);
1651     int log2_min_cb_size = s->sps->log2_min_cb_size;
1652     int min_cb_width     = s->sps->min_cb_width;
1653     int x_cb             = x0 >> log2_min_cb_size;
1654     int y_cb             = y0 >> log2_min_cb_size;
1655     int ref_idx[2];
1656     int mvp_flag[2];
1657     int x_pu, y_pu;
1658     int i, j;
1659
1660     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1661         if (s->sh.max_num_merge_cand > 1)
1662             merge_idx = ff_hevc_merge_idx_decode(s);
1663         else
1664             merge_idx = 0;
1665
1666         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1667                                    1 << log2_cb_size,
1668                                    1 << log2_cb_size,
1669                                    log2_cb_size, partIdx,
1670                                    merge_idx, &current_mv);
1671         x_pu = x0 >> s->sps->log2_min_pu_size;
1672         y_pu = y0 >> s->sps->log2_min_pu_size;
1673
1674         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1675             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1676                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1677     } else { /* MODE_INTER */
1678         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1679         if (lc->pu.merge_flag) {
1680             if (s->sh.max_num_merge_cand > 1)
1681                 merge_idx = ff_hevc_merge_idx_decode(s);
1682             else
1683                 merge_idx = 0;
1684
1685             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1686                                        partIdx, merge_idx, &current_mv);
1687             x_pu = x0 >> s->sps->log2_min_pu_size;
1688             y_pu = y0 >> s->sps->log2_min_pu_size;
1689
1690             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1691                 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1692                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1693         } else {
1694             enum InterPredIdc inter_pred_idc = PRED_L0;
1695             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1696             if (s->sh.slice_type == B_SLICE)
1697                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1698
1699             if (inter_pred_idc != PRED_L1) {
1700                 if (s->sh.nb_refs[L0]) {
1701                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1702                     current_mv.ref_idx[0] = ref_idx[0];
1703                 }
1704                 current_mv.pred_flag[0] = 1;
1705                 hls_mvd_coding(s, x0, y0, 0);
1706                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1707                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1708                                          partIdx, merge_idx, &current_mv,
1709                                          mvp_flag[0], 0);
1710                 current_mv.mv[0].x += lc->pu.mvd.x;
1711                 current_mv.mv[0].y += lc->pu.mvd.y;
1712             }
1713
1714             if (inter_pred_idc != PRED_L0) {
1715                 if (s->sh.nb_refs[L1]) {
1716                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1717                     current_mv.ref_idx[1] = ref_idx[1];
1718                 }
1719
1720                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1721                     lc->pu.mvd.x = 0;
1722                     lc->pu.mvd.y = 0;
1723                 } else {
1724                     hls_mvd_coding(s, x0, y0, 1);
1725                 }
1726
1727                 current_mv.pred_flag[1] = 1;
1728                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1729                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1730                                          partIdx, merge_idx, &current_mv,
1731                                          mvp_flag[1], 1);
1732                 current_mv.mv[1].x += lc->pu.mvd.x;
1733                 current_mv.mv[1].y += lc->pu.mvd.y;
1734             }
1735
1736             x_pu = x0 >> s->sps->log2_min_pu_size;
1737             y_pu = y0 >> s->sps->log2_min_pu_size;
1738
1739             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1740                 for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1741                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1742         }
1743     }
1744
1745     if (current_mv.pred_flag[0]) {
1746         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1747         if (!ref0)
1748             return;
1749         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1750     }
1751     if (current_mv.pred_flag[1]) {
1752         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1753         if (!ref1)
1754             return;
1755         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1756     }
1757
1758     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1759         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1760         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1761
1762         luma_mc(s, tmp, tmpstride, ref0->frame,
1763                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1764
1765         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1766             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1767             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1768                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1769                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1770                                      dst0, s->frame->linesize[0], tmp,
1771                                      tmpstride, nPbW, nPbH);
1772         } else {
1773             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1774         }
1775         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1776                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1777
1778         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1779             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1780             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1781                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1782                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1783                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1784                                      nPbW / 2, nPbH / 2);
1785             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1786                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1787                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1788                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1789                                      nPbW / 2, nPbH / 2);
1790         } else {
1791             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1792             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1793         }
1794     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1795         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1796         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1797
1798         if (!ref1)
1799             return;
1800
1801         luma_mc(s, tmp, tmpstride, ref1->frame,
1802                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1803
1804         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1805             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1806             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1807                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1808                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1809                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1810                                       nPbW, nPbH);
1811         } else {
1812             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1813         }
1814
1815         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1816                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1817
1818         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1819             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1820             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1821                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1822                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1823                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1824             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1825                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1826                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1827                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1828         } else {
1829             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1830             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1831         }
1832     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1833         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1834         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1835         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1836         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1837         HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1838         HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1839
1840         if (!ref0 || !ref1)
1841             return;
1842
1843         luma_mc(s, tmp, tmpstride, ref0->frame,
1844                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1845         luma_mc(s, tmp2, tmpstride, ref1->frame,
1846                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1847
1848         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1849             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1850             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1851                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1852                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1853                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1854                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1855                                          dst0, s->frame->linesize[0],
1856                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1857         } else {
1858             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1859                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1860         }
1861
1862         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1863                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1864         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1865                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1866
1867         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1868             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1869             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1870                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1871                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1872                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1873                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1874                                          dst1, s->frame->linesize[1], tmp, tmp3,
1875                                          tmpstride, nPbW / 2, nPbH / 2);
1876             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1877                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1878                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1879                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1880                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1881                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1882                                          tmpstride, nPbW / 2, nPbH / 2);
1883         } else {
1884             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1885             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1886         }
1887     }
1888 }
1889
1890 /**
1891  * 8.4.1
1892  */
1893 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1894                                 int prev_intra_luma_pred_flag)
1895 {
1896     HEVCLocalContext *lc = &s->HEVClc;
1897     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1898     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1899     int min_pu_width     = s->sps->min_pu_width;
1900     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1901     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1902     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1903
1904     int cand_up   = (lc->ctb_up_flag || y0b) ?
1905                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1906     int cand_left = (lc->ctb_left_flag || x0b) ?
1907                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1908
1909     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1910
1911     MvField *tab_mvf = s->ref->tab_mvf;
1912     int intra_pred_mode;
1913     int candidate[3];
1914     int i, j;
1915
1916     // intra_pred_mode prediction does not cross vertical CTB boundaries
1917     if ((y0 - 1) < y_ctb)
1918         cand_up = INTRA_DC;
1919
1920     if (cand_left == cand_up) {
1921         if (cand_left < 2) {
1922             candidate[0] = INTRA_PLANAR;
1923             candidate[1] = INTRA_DC;
1924             candidate[2] = INTRA_ANGULAR_26;
1925         } else {
1926             candidate[0] = cand_left;
1927             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1928             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1929         }
1930     } else {
1931         candidate[0] = cand_left;
1932         candidate[1] = cand_up;
1933         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1934             candidate[2] = INTRA_PLANAR;
1935         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1936             candidate[2] = INTRA_DC;
1937         } else {
1938             candidate[2] = INTRA_ANGULAR_26;
1939         }
1940     }
1941
1942     if (prev_intra_luma_pred_flag) {
1943         intra_pred_mode = candidate[lc->pu.mpm_idx];
1944     } else {
1945         if (candidate[0] > candidate[1])
1946             FFSWAP(uint8_t, candidate[0], candidate[1]);
1947         if (candidate[0] > candidate[2])
1948             FFSWAP(uint8_t, candidate[0], candidate[2]);
1949         if (candidate[1] > candidate[2])
1950             FFSWAP(uint8_t, candidate[1], candidate[2]);
1951
1952         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1953         for (i = 0; i < 3; i++)
1954             if (intra_pred_mode >= candidate[i])
1955                 intra_pred_mode++;
1956     }
1957
1958     /* write the intra prediction units into the mv array */
1959     if (!size_in_pus)
1960         size_in_pus = 1;
1961     for (i = 0; i < size_in_pus; i++) {
1962         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1963                intra_pred_mode, size_in_pus);
1964
1965         for (j = 0; j < size_in_pus; j++) {
1966             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1967             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1969             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1970             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1971             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1972             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1973             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1974             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1975         }
1976     }
1977
1978     return intra_pred_mode;
1979 }
1980
1981 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1982                                           int log2_cb_size, int ct_depth)
1983 {
1984     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1985     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1986     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1987     int y;
1988
1989     for (y = 0; y < length; y++)
1990         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1991                ct_depth, length);
1992 }
1993
1994 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1995                                   int log2_cb_size)
1996 {
1997     HEVCLocalContext *lc = &s->HEVClc;
1998     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1999     uint8_t prev_intra_luma_pred_flag[4];
2000     int split   = lc->cu.part_mode == PART_NxN;
2001     int pb_size = (1 << log2_cb_size) >> split;
2002     int side    = split + 1;
2003     int chroma_mode;
2004     int i, j;
2005
2006     for (i = 0; i < side; i++)
2007         for (j = 0; j < side; j++)
2008             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2009
2010     for (i = 0; i < side; i++) {
2011         for (j = 0; j < side; j++) {
2012             if (prev_intra_luma_pred_flag[2 * i + j])
2013                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2014             else
2015                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2016
2017             lc->pu.intra_pred_mode[2 * i + j] =
2018                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2019                                      prev_intra_luma_pred_flag[2 * i + j]);
2020         }
2021     }
2022
2023     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2024     if (chroma_mode != 4) {
2025         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2026             lc->pu.intra_pred_mode_c = 34;
2027         else
2028             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2029     } else {
2030         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2031     }
2032 }
2033
2034 static void intra_prediction_unit_default_value(HEVCContext *s,
2035                                                 int x0, int y0,
2036                                                 int log2_cb_size)
2037 {
2038     HEVCLocalContext *lc = &s->HEVClc;
2039     int pb_size          = 1 << log2_cb_size;
2040     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2041     int min_pu_width     = s->sps->min_pu_width;
2042     MvField *tab_mvf     = s->ref->tab_mvf;
2043     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2044     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2045     int j, k;
2046
2047     if (size_in_pus == 0)
2048         size_in_pus = 1;
2049     for (j = 0; j < size_in_pus; j++) {
2050         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2051         for (k = 0; k < size_in_pus; k++)
2052             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2053     }
2054 }
2055
2056 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2057 {
2058     int cb_size          = 1 << log2_cb_size;
2059     HEVCLocalContext *lc = &s->HEVClc;
2060     int log2_min_cb_size = s->sps->log2_min_cb_size;
2061     int length           = cb_size >> log2_min_cb_size;
2062     int min_cb_width     = s->sps->min_cb_width;
2063     int x_cb             = x0 >> log2_min_cb_size;
2064     int y_cb             = y0 >> log2_min_cb_size;
2065     int x, y, ret;
2066
2067     lc->cu.x                = x0;
2068     lc->cu.y                = y0;
2069     lc->cu.rqt_root_cbf     = 1;
2070     lc->cu.pred_mode        = MODE_INTRA;
2071     lc->cu.part_mode        = PART_2Nx2N;
2072     lc->cu.intra_split_flag = 0;
2073     lc->cu.pcm_flag         = 0;
2074
2075     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2076     for (x = 0; x < 4; x++)
2077         lc->pu.intra_pred_mode[x] = 1;
2078     if (s->pps->transquant_bypass_enable_flag) {
2079         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2080         if (lc->cu.cu_transquant_bypass_flag)
2081             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2082     } else
2083         lc->cu.cu_transquant_bypass_flag = 0;
2084
2085     if (s->sh.slice_type != I_SLICE) {
2086         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2087
2088         lc->cu.pred_mode = MODE_SKIP;
2089         x = y_cb * min_cb_width + x_cb;
2090         for (y = 0; y < length; y++) {
2091             memset(&s->skip_flag[x], skip_flag, length);
2092             x += min_cb_width;
2093         }
2094         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2095     }
2096
2097     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2098         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2099         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2100
2101         if (!s->sh.disable_deblocking_filter_flag)
2102             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2103                                                   lc->slice_or_tiles_up_boundary,
2104                                                   lc->slice_or_tiles_left_boundary);
2105     } else {
2106         if (s->sh.slice_type != I_SLICE)
2107             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2108         if (lc->cu.pred_mode != MODE_INTRA ||
2109             log2_cb_size == s->sps->log2_min_cb_size) {
2110             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2111             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2112                                       lc->cu.pred_mode == MODE_INTRA;
2113         }
2114
2115         if (lc->cu.pred_mode == MODE_INTRA) {
2116             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2117                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2118                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2119                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2120             }
2121             if (lc->cu.pcm_flag) {
2122                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2123                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2124                 if (s->sps->pcm.loop_filter_disable_flag)
2125                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2126
2127                 if (ret < 0)
2128                     return ret;
2129             } else {
2130                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2131             }
2132         } else {
2133             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2134             switch (lc->cu.part_mode) {
2135             case PART_2Nx2N:
2136                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2137                 break;
2138             case PART_2NxN:
2139                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2140                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2141                 break;
2142             case PART_Nx2N:
2143                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2144                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2145                 break;
2146             case PART_2NxnU:
2147                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2148                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2149                 break;
2150             case PART_2NxnD:
2151                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2152                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2153                 break;
2154             case PART_nLx2N:
2155                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2156                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2157                 break;
2158             case PART_nRx2N:
2159                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2160                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2161                 break;
2162             case PART_NxN:
2163                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2164                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2165                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2166                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2167                 break;
2168             }
2169         }
2170
2171         if (!lc->cu.pcm_flag) {
2172             if (lc->cu.pred_mode != MODE_INTRA &&
2173                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2174                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2175             }
2176             if (lc->cu.rqt_root_cbf) {
2177                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2178                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2179                                          s->sps->max_transform_hierarchy_depth_inter;
2180                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2181                                          log2_cb_size,
2182                                          log2_cb_size, 0, 0, 0, 0);
2183                 if (ret < 0)
2184                     return ret;
2185             } else {
2186                 if (!s->sh.disable_deblocking_filter_flag)
2187                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
2188                                                           lc->slice_or_tiles_up_boundary,
2189                                                           lc->slice_or_tiles_left_boundary);
2190             }
2191         }
2192     }
2193
2194     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2195         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2196
2197     x = y_cb * min_cb_width + x_cb;
2198     for (y = 0; y < length; y++) {
2199         memset(&s->qp_y_tab[x], lc->qp_y, length);
2200         x += min_cb_width;
2201     }
2202
2203     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2204
2205     return 0;
2206 }
2207
2208 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2209                                int log2_cb_size, int cb_depth)
2210 {
2211     HEVCLocalContext *lc = &s->HEVClc;
2212     const int cb_size    = 1 << log2_cb_size;
2213     int split_cu;
2214
2215     lc->ct.depth = cb_depth;
2216     if (x0 + cb_size <= s->sps->width  &&
2217         y0 + cb_size <= s->sps->height &&
2218         log2_cb_size > s->sps->log2_min_cb_size) {
2219         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2220     } else {
2221         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2222     }
2223     if (s->pps->cu_qp_delta_enabled_flag &&
2224         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2225         lc->tu.is_cu_qp_delta_coded = 0;
2226         lc->tu.cu_qp_delta          = 0;
2227     }
2228
2229     if (split_cu) {
2230         const int cb_size_split = cb_size >> 1;
2231         const int x1 = x0 + cb_size_split;
2232         const int y1 = y0 + cb_size_split;
2233
2234         log2_cb_size--;
2235         cb_depth++;
2236
2237 #define SUBDIVIDE(x, y)                                                \
2238 do {                                                                   \
2239     if (x < s->sps->width && y < s->sps->height) {                     \
2240         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2241         if (ret < 0)                                                   \
2242             return ret;                                                \
2243     }                                                                  \
2244 } while (0)
2245
2246         SUBDIVIDE(x0, y0);
2247         SUBDIVIDE(x1, y0);
2248         SUBDIVIDE(x0, y1);
2249         SUBDIVIDE(x1, y1);
2250     } else {
2251         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2252         if (ret < 0)
2253             return ret;
2254     }
2255
2256     return 0;
2257 }
2258
2259 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2260                                  int ctb_addr_ts)
2261 {
2262     HEVCLocalContext *lc  = &s->HEVClc;
2263     int ctb_size          = 1 << s->sps->log2_ctb_size;
2264     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2265     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2266
2267     int tile_left_boundary, tile_up_boundary;
2268     int slice_left_boundary, slice_up_boundary;
2269
2270     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2271
2272     if (s->pps->entropy_coding_sync_enabled_flag) {
2273         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2274             lc->first_qp_group = 1;
2275         lc->end_of_tiles_x = s->sps->width;
2276     } else if (s->pps->tiles_enabled_flag) {
2277         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2278             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2279             lc->start_of_tiles_x = x_ctb;
2280             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2281             lc->first_qp_group   = 1;
2282         }
2283     } else {
2284         lc->end_of_tiles_x = s->sps->width;
2285     }
2286
2287     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2288
2289     if (s->pps->tiles_enabled_flag) {
2290         tile_left_boundary  = x_ctb > 0 &&
2291                               s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
2292         slice_left_boundary = x_ctb > 0 &&
2293                               s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
2294         tile_up_boundary  = y_ctb > 0 &&
2295                             s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2296         slice_up_boundary = y_ctb > 0 &&
2297                             s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2298     } else {
2299         tile_left_boundary  =
2300         tile_up_boundary    = 1;
2301         slice_left_boundary = ctb_addr_in_slice > 0;
2302         slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
2303     }
2304     lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
2305     lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
2306     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
2307     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
2308     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2309     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2310 }
2311
2312 static int hls_slice_data(HEVCContext *s)
2313 {
2314     int ctb_size    = 1 << s->sps->log2_ctb_size;
2315     int more_data   = 1;
2316     int x_ctb       = 0;
2317     int y_ctb       = 0;
2318     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2319     int ret;
2320
2321     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2322         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2323
2324         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2325         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2326         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2327
2328         ff_hevc_cabac_init(s, ctb_addr_ts);
2329
2330         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2331
2332         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2333         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2334         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2335
2336         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2337         if (ret < 0)
2338             return ret;
2339         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2340
2341         ctb_addr_ts++;
2342         ff_hevc_save_states(s, ctb_addr_ts);
2343         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2344     }
2345
2346     if (x_ctb + ctb_size >= s->sps->width &&
2347         y_ctb + ctb_size >= s->sps->height)
2348         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2349
2350     return ctb_addr_ts;
2351 }
2352
2353 /**
2354  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2355  * 0 if the unit should be skipped, 1 otherwise
2356  */
2357 static int hls_nal_unit(HEVCContext *s)
2358 {
2359     GetBitContext *gb = &s->HEVClc.gb;
2360     int nuh_layer_id;
2361
2362     if (get_bits1(gb) != 0)
2363         return AVERROR_INVALIDDATA;
2364
2365     s->nal_unit_type = get_bits(gb, 6);
2366
2367     nuh_layer_id   = get_bits(gb, 6);
2368     s->temporal_id = get_bits(gb, 3) - 1;
2369     if (s->temporal_id < 0)
2370         return AVERROR_INVALIDDATA;
2371
2372     av_log(s->avctx, AV_LOG_DEBUG,
2373            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2374            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2375
2376     return nuh_layer_id == 0;
2377 }
2378
2379 static void restore_tqb_pixels(HEVCContext *s)
2380 {
2381     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2382     int x, y, c_idx;
2383
2384     for (c_idx = 0; c_idx < 3; c_idx++) {
2385         ptrdiff_t stride = s->frame->linesize[c_idx];
2386         int hshift       = s->sps->hshift[c_idx];
2387         int vshift       = s->sps->vshift[c_idx];
2388         for (y = 0; y < s->sps->min_pu_height; y++) {
2389             for (x = 0; x < s->sps->min_pu_width; x++) {
2390                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2391                     int n;
2392                     int len      = min_pu_size >> hshift;
2393                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2394                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2395                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2396                         memcpy(dst, src, len);
2397                         src += stride;
2398                         dst += stride;
2399                     }
2400                 }
2401             }
2402         }
2403     }
2404 }
2405
2406 static int set_side_data(HEVCContext *s)
2407 {
2408     AVFrame *out = s->ref->frame;
2409
2410     if (s->sei_frame_packing_present &&
2411         s->frame_packing_arrangement_type >= 3 &&
2412         s->frame_packing_arrangement_type <= 5 &&
2413         s->content_interpretation_type > 0 &&
2414         s->content_interpretation_type < 3) {
2415         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2416         if (!stereo)
2417             return AVERROR(ENOMEM);
2418
2419         switch (s->frame_packing_arrangement_type) {
2420         case 3:
2421             if (s->quincunx_subsampling)
2422                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2423             else
2424                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2425             break;
2426         case 4:
2427             stereo->type = AV_STEREO3D_TOPBOTTOM;
2428             break;
2429         case 5:
2430             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2431             break;
2432         }
2433
2434         if (s->content_interpretation_type == 2)
2435             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2436     }
2437
2438     if (s->sei_display_orientation_present &&
2439         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2440         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2441         AVFrameSideData *rotation = av_frame_new_side_data(out,
2442                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2443                                                            sizeof(int32_t) * 9);
2444         if (!rotation)
2445             return AVERROR(ENOMEM);
2446
2447         av_display_rotation_set((int32_t *)rotation->data, angle);
2448         av_display_matrix_flip((int32_t *)rotation->data,
2449                                s->sei_vflip, s->sei_hflip);
2450     }
2451
2452     return 0;
2453 }
2454
2455 static int hevc_frame_start(HEVCContext *s)
2456 {
2457     HEVCLocalContext *lc = &s->HEVClc;
2458     int ret;
2459
2460     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2461     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2462     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2463     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2464
2465     lc->start_of_tiles_x = 0;
2466     s->is_decoded        = 0;
2467     s->first_nal_type    = s->nal_unit_type;
2468
2469     if (s->pps->tiles_enabled_flag)
2470         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2471
2472     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2473                               s->poc);
2474     if (ret < 0)
2475         goto fail;
2476
2477     ret = ff_hevc_frame_rps(s);
2478     if (ret < 0) {
2479         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2480         goto fail;
2481     }
2482
2483     s->ref->frame->key_frame = IS_IRAP(s);
2484
2485     ret = set_side_data(s);
2486     if (ret < 0)
2487         goto fail;
2488
2489     av_frame_unref(s->output_frame);
2490     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2491     if (ret < 0)
2492         goto fail;
2493
2494     ff_thread_finish_setup(s->avctx);
2495
2496     return 0;
2497
2498 fail:
2499     if (s->ref)
2500         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2501     s->ref = NULL;
2502     return ret;
2503 }
2504
2505 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2506 {
2507     HEVCLocalContext *lc = &s->HEVClc;
2508     GetBitContext *gb    = &lc->gb;
2509     int ctb_addr_ts, ret;
2510
2511     ret = init_get_bits8(gb, nal, length);
2512     if (ret < 0)
2513         return ret;
2514
2515     ret = hls_nal_unit(s);
2516     if (ret < 0) {
2517         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2518                s->nal_unit_type);
2519         goto fail;
2520     } else if (!ret)
2521         return 0;
2522
2523     switch (s->nal_unit_type) {
2524     case NAL_VPS:
2525         ret = ff_hevc_decode_nal_vps(s);
2526         if (ret < 0)
2527             goto fail;
2528         break;
2529     case NAL_SPS:
2530         ret = ff_hevc_decode_nal_sps(s);
2531         if (ret < 0)
2532             goto fail;
2533         break;
2534     case NAL_PPS:
2535         ret = ff_hevc_decode_nal_pps(s);
2536         if (ret < 0)
2537             goto fail;
2538         break;
2539     case NAL_SEI_PREFIX:
2540     case NAL_SEI_SUFFIX:
2541         ret = ff_hevc_decode_nal_sei(s);
2542         if (ret < 0)
2543             goto fail;
2544         break;
2545     case NAL_TRAIL_R:
2546     case NAL_TRAIL_N:
2547     case NAL_TSA_N:
2548     case NAL_TSA_R:
2549     case NAL_STSA_N:
2550     case NAL_STSA_R:
2551     case NAL_BLA_W_LP:
2552     case NAL_BLA_W_RADL:
2553     case NAL_BLA_N_LP:
2554     case NAL_IDR_W_RADL:
2555     case NAL_IDR_N_LP:
2556     case NAL_CRA_NUT:
2557     case NAL_RADL_N:
2558     case NAL_RADL_R:
2559     case NAL_RASL_N:
2560     case NAL_RASL_R:
2561         ret = hls_slice_header(s);
2562         if (ret < 0)
2563             return ret;
2564
2565         if (s->max_ra == INT_MAX) {
2566             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2567                 s->max_ra = s->poc;
2568             } else {
2569                 if (IS_IDR(s))
2570                     s->max_ra = INT_MIN;
2571             }
2572         }
2573
2574         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2575             s->poc <= s->max_ra) {
2576             s->is_decoded = 0;
2577             break;
2578         } else {
2579             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2580                 s->max_ra = INT_MIN;
2581         }
2582
2583         if (s->sh.first_slice_in_pic_flag) {
2584             ret = hevc_frame_start(s);
2585             if (ret < 0)
2586                 return ret;
2587         } else if (!s->ref) {
2588             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2589             goto fail;
2590         }
2591
2592         if (s->nal_unit_type != s->first_nal_type) {
2593             av_log(s->avctx, AV_LOG_ERROR,
2594                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2595                    s->first_nal_type, s->nal_unit_type);
2596             return AVERROR_INVALIDDATA;
2597         }
2598
2599         if (!s->sh.dependent_slice_segment_flag &&
2600             s->sh.slice_type != I_SLICE) {
2601             ret = ff_hevc_slice_rpl(s);
2602             if (ret < 0) {
2603                 av_log(s->avctx, AV_LOG_WARNING,
2604                        "Error constructing the reference lists for the current slice.\n");
2605                 goto fail;
2606             }
2607         }
2608
2609         ctb_addr_ts = hls_slice_data(s);
2610         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2611             s->is_decoded = 1;
2612             if ((s->pps->transquant_bypass_enable_flag ||
2613                  (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2614                 s->sps->sao_enabled)
2615                 restore_tqb_pixels(s);
2616         }
2617
2618         if (ctb_addr_ts < 0) {
2619             ret = ctb_addr_ts;
2620             goto fail;
2621         }
2622         break;
2623     case NAL_EOS_NUT:
2624     case NAL_EOB_NUT:
2625         s->seq_decode = (s->seq_decode + 1) & 0xff;
2626         s->max_ra     = INT_MAX;
2627         break;
2628     case NAL_AUD:
2629     case NAL_FD_NUT:
2630         break;
2631     default:
2632         av_log(s->avctx, AV_LOG_INFO,
2633                "Skipping NAL unit %d\n", s->nal_unit_type);
2634     }
2635
2636     return 0;
2637 fail:
2638     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2639         return ret;
2640     return 0;
2641 }
2642
2643 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2644  * between these functions would be nice. */
2645 static int extract_rbsp(const uint8_t *src, int length,
2646                         HEVCNAL *nal)
2647 {
2648     int i, si, di;
2649     uint8_t *dst;
2650
2651 #define STARTCODE_TEST                                                  \
2652         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2653             if (src[i + 2] != 3) {                                      \
2654                 /* startcode, so we must be past the end */             \
2655                 length = i;                                             \
2656             }                                                           \
2657             break;                                                      \
2658         }
2659 #if HAVE_FAST_UNALIGNED
2660 #define FIND_FIRST_ZERO                                                 \
2661         if (i > 0 && !src[i])                                           \
2662             i--;                                                        \
2663         while (src[i])                                                  \
2664             i++
2665 #if HAVE_FAST_64BIT
2666     for (i = 0; i + 1 < length; i += 9) {
2667         if (!((~AV_RN64A(src + i) &
2668                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2669               0x8000800080008080ULL))
2670             continue;
2671         FIND_FIRST_ZERO;
2672         STARTCODE_TEST;
2673         i -= 7;
2674     }
2675 #else
2676     for (i = 0; i + 1 < length; i += 5) {
2677         if (!((~AV_RN32A(src + i) &
2678                (AV_RN32A(src + i) - 0x01000101U)) &
2679               0x80008080U))
2680             continue;
2681         FIND_FIRST_ZERO;
2682         STARTCODE_TEST;
2683         i -= 3;
2684     }
2685 #endif /* HAVE_FAST_64BIT */
2686 #else
2687     for (i = 0; i + 1 < length; i += 2) {
2688         if (src[i])
2689             continue;
2690         if (i > 0 && src[i - 1] == 0)
2691             i--;
2692         STARTCODE_TEST;
2693     }
2694 #endif /* HAVE_FAST_UNALIGNED */
2695
2696     if (i >= length - 1) { // no escaped 0
2697         nal->data = src;
2698         nal->size = length;
2699         return length;
2700     }
2701
2702     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2703                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2704     if (!nal->rbsp_buffer)
2705         return AVERROR(ENOMEM);
2706
2707     dst = nal->rbsp_buffer;
2708
2709     memcpy(dst, src, i);
2710     si = di = i;
2711     while (si + 2 < length) {
2712         // remove escapes (very rare 1:2^22)
2713         if (src[si + 2] > 3) {
2714             dst[di++] = src[si++];
2715             dst[di++] = src[si++];
2716         } else if (src[si] == 0 && src[si + 1] == 0) {
2717             if (src[si + 2] == 3) { // escape
2718                 dst[di++] = 0;
2719                 dst[di++] = 0;
2720                 si       += 3;
2721
2722                 continue;
2723             } else // next start code
2724                 goto nsc;
2725         }
2726
2727         dst[di++] = src[si++];
2728     }
2729     while (si < length)
2730         dst[di++] = src[si++];
2731
2732 nsc:
2733     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2734
2735     nal->data = dst;
2736     nal->size = di;
2737     return si;
2738 }
2739
2740 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2741 {
2742     int i, consumed, ret = 0;
2743
2744     s->ref = NULL;
2745     s->eos = 0;
2746
2747     /* split the input packet into NAL units, so we know the upper bound on the
2748      * number of slices in the frame */
2749     s->nb_nals = 0;
2750     while (length >= 4) {
2751         HEVCNAL *nal;
2752         int extract_length = 0;
2753
2754         if (s->is_nalff) {
2755             int i;
2756             for (i = 0; i < s->nal_length_size; i++)
2757                 extract_length = (extract_length << 8) | buf[i];
2758             buf    += s->nal_length_size;
2759             length -= s->nal_length_size;
2760
2761             if (extract_length > length) {
2762                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2763                 ret = AVERROR_INVALIDDATA;
2764                 goto fail;
2765             }
2766         } else {
2767             if (buf[2] == 0) {
2768                 length--;
2769                 buf++;
2770                 continue;
2771             }
2772             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2773                 ret = AVERROR_INVALIDDATA;
2774                 goto fail;
2775             }
2776
2777             buf           += 3;
2778             length        -= 3;
2779             extract_length = length;
2780         }
2781
2782         if (s->nals_allocated < s->nb_nals + 1) {
2783             int new_size = s->nals_allocated + 1;
2784             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2785             if (!tmp) {
2786                 ret = AVERROR(ENOMEM);
2787                 goto fail;
2788             }
2789             s->nals = tmp;
2790             memset(s->nals + s->nals_allocated, 0,
2791                    (new_size - s->nals_allocated) * sizeof(*tmp));
2792             s->nals_allocated = new_size;
2793         }
2794         nal = &s->nals[s->nb_nals++];
2795
2796         consumed = extract_rbsp(buf, extract_length, nal);
2797         if (consumed < 0) {
2798             ret = consumed;
2799             goto fail;
2800         }
2801
2802         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2803         if (ret < 0)
2804             goto fail;
2805         hls_nal_unit(s);
2806
2807         if (s->nal_unit_type == NAL_EOB_NUT ||
2808             s->nal_unit_type == NAL_EOS_NUT)
2809             s->eos = 1;
2810
2811         buf    += consumed;
2812         length -= consumed;
2813     }
2814
2815     /* parse the NAL units */
2816     for (i = 0; i < s->nb_nals; i++) {
2817         int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2818         if (ret < 0) {
2819             av_log(s->avctx, AV_LOG_WARNING,
2820                    "Error parsing NAL unit #%d.\n", i);
2821             goto fail;
2822         }
2823     }
2824
2825 fail:
2826     if (s->ref)
2827         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2828
2829     return ret;
2830 }
2831
2832 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2833 {
2834     int i;
2835     for (i = 0; i < 16; i++)
2836         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2837 }
2838
2839 static int verify_md5(HEVCContext *s, AVFrame *frame)
2840 {
2841     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2842     int pixel_shift;
2843     int i, j;
2844
2845     if (!desc)
2846         return AVERROR(EINVAL);
2847
2848     pixel_shift = desc->comp[0].depth_minus1 > 7;
2849
2850     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2851            s->poc);
2852
2853     /* the checksums are LE, so we have to byteswap for >8bpp formats
2854      * on BE arches */
2855 #if HAVE_BIGENDIAN
2856     if (pixel_shift && !s->checksum_buf) {
2857         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2858                        FFMAX3(frame->linesize[0], frame->linesize[1],
2859                               frame->linesize[2]));
2860         if (!s->checksum_buf)
2861             return AVERROR(ENOMEM);
2862     }
2863 #endif
2864
2865     for (i = 0; frame->data[i]; i++) {
2866         int width  = s->avctx->coded_width;
2867         int height = s->avctx->coded_height;
2868         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2869         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2870         uint8_t md5[16];
2871
2872         av_md5_init(s->md5_ctx);
2873         for (j = 0; j < h; j++) {
2874             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2875 #if HAVE_BIGENDIAN
2876             if (pixel_shift) {
2877                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2878                                     (const uint16_t *) src, w);
2879                 src = s->checksum_buf;
2880             }
2881 #endif
2882             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2883         }
2884         av_md5_final(s->md5_ctx, md5);
2885
2886         if (!memcmp(md5, s->md5[i], 16)) {
2887             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2888             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2889             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2890         } else {
2891             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2892             print_md5(s->avctx, AV_LOG_ERROR, md5);
2893             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2894             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2895             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2896             return AVERROR_INVALIDDATA;
2897         }
2898     }
2899
2900     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2901
2902     return 0;
2903 }
2904
2905 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2906                              AVPacket *avpkt)
2907 {
2908     int ret;
2909     HEVCContext *s = avctx->priv_data;
2910
2911     if (!avpkt->size) {
2912         ret = ff_hevc_output_frame(s, data, 1);
2913         if (ret < 0)
2914             return ret;
2915
2916         *got_output = ret;
2917         return 0;
2918     }
2919
2920     s->ref = NULL;
2921     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2922     if (ret < 0)
2923         return ret;
2924
2925     /* verify the SEI checksum */
2926     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2927         s->is_md5) {
2928         ret = verify_md5(s, s->ref->frame);
2929         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2930             ff_hevc_unref_frame(s, s->ref, ~0);
2931             return ret;
2932         }
2933     }
2934     s->is_md5 = 0;
2935
2936     if (s->is_decoded) {
2937         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2938         s->is_decoded = 0;
2939     }
2940
2941     if (s->output_frame->buf[0]) {
2942         av_frame_move_ref(data, s->output_frame);
2943         *got_output = 1;
2944     }
2945
2946     return avpkt->size;
2947 }
2948
2949 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2950 {
2951     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2952     if (ret < 0)
2953         return ret;
2954
2955     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2956     if (!dst->tab_mvf_buf)
2957         goto fail;
2958     dst->tab_mvf = src->tab_mvf;
2959
2960     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2961     if (!dst->rpl_tab_buf)
2962         goto fail;
2963     dst->rpl_tab = src->rpl_tab;
2964
2965     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2966     if (!dst->rpl_buf)
2967         goto fail;
2968
2969     dst->poc        = src->poc;
2970     dst->ctb_count  = src->ctb_count;
2971     dst->window     = src->window;
2972     dst->flags      = src->flags;
2973     dst->sequence   = src->sequence;
2974
2975     return 0;
2976 fail:
2977     ff_hevc_unref_frame(s, dst, ~0);
2978     return AVERROR(ENOMEM);
2979 }
2980
2981 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2982 {
2983     HEVCContext       *s = avctx->priv_data;
2984     int i;
2985
2986     pic_arrays_free(s);
2987
2988     av_freep(&s->md5_ctx);
2989
2990     av_frame_free(&s->tmp_frame);
2991     av_frame_free(&s->output_frame);
2992
2993     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2994         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2995         av_frame_free(&s->DPB[i].frame);
2996     }
2997
2998     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2999         av_buffer_unref(&s->vps_list[i]);
3000     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3001         av_buffer_unref(&s->sps_list[i]);
3002     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3003         av_buffer_unref(&s->pps_list[i]);
3004
3005     for (i = 0; i < s->nals_allocated; i++)
3006         av_freep(&s->nals[i].rbsp_buffer);
3007     av_freep(&s->nals);
3008     s->nals_allocated = 0;
3009
3010     return 0;
3011 }
3012
3013 static av_cold int hevc_init_context(AVCodecContext *avctx)
3014 {
3015     HEVCContext *s = avctx->priv_data;
3016     int i;
3017
3018     s->avctx = avctx;
3019
3020     s->tmp_frame = av_frame_alloc();
3021     if (!s->tmp_frame)
3022         goto fail;
3023
3024     s->output_frame = av_frame_alloc();
3025     if (!s->output_frame)
3026         goto fail;
3027
3028     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3029         s->DPB[i].frame = av_frame_alloc();
3030         if (!s->DPB[i].frame)
3031             goto fail;
3032         s->DPB[i].tf.f = s->DPB[i].frame;
3033     }
3034
3035     s->max_ra = INT_MAX;
3036
3037     s->md5_ctx = av_md5_alloc();
3038     if (!s->md5_ctx)
3039         goto fail;
3040
3041     ff_bswapdsp_init(&s->bdsp);
3042
3043     s->context_initialized = 1;
3044
3045     return 0;
3046
3047 fail:
3048     hevc_decode_free(avctx);
3049     return AVERROR(ENOMEM);
3050 }
3051
3052 static int hevc_update_thread_context(AVCodecContext *dst,
3053                                       const AVCodecContext *src)
3054 {
3055     HEVCContext *s  = dst->priv_data;
3056     HEVCContext *s0 = src->priv_data;
3057     int i, ret;
3058
3059     if (!s->context_initialized) {
3060         ret = hevc_init_context(dst);
3061         if (ret < 0)
3062             return ret;
3063     }
3064
3065     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3066         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3067         if (s0->DPB[i].frame->buf[0]) {
3068             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3069             if (ret < 0)
3070                 return ret;
3071         }
3072     }
3073
3074     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3075         av_buffer_unref(&s->vps_list[i]);
3076         if (s0->vps_list[i]) {
3077             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3078             if (!s->vps_list[i])
3079                 return AVERROR(ENOMEM);
3080         }
3081     }
3082
3083     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3084         av_buffer_unref(&s->sps_list[i]);
3085         if (s0->sps_list[i]) {
3086             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3087             if (!s->sps_list[i])
3088                 return AVERROR(ENOMEM);
3089         }
3090     }
3091
3092     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3093         av_buffer_unref(&s->pps_list[i]);
3094         if (s0->pps_list[i]) {
3095             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3096             if (!s->pps_list[i])
3097                 return AVERROR(ENOMEM);
3098         }
3099     }
3100
3101     if (s->sps != s0->sps)
3102         ret = set_sps(s, s0->sps);
3103
3104     s->seq_decode = s0->seq_decode;
3105     s->seq_output = s0->seq_output;
3106     s->pocTid0    = s0->pocTid0;
3107     s->max_ra     = s0->max_ra;
3108
3109     s->is_nalff        = s0->is_nalff;
3110     s->nal_length_size = s0->nal_length_size;
3111
3112     if (s0->eos) {
3113         s->seq_decode = (s->seq_decode + 1) & 0xff;
3114         s->max_ra = INT_MAX;
3115     }
3116
3117     return 0;
3118 }
3119
3120 static int hevc_decode_extradata(HEVCContext *s)
3121 {
3122     AVCodecContext *avctx = s->avctx;
3123     GetByteContext gb;
3124     int ret;
3125
3126     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3127
3128     if (avctx->extradata_size > 3 &&
3129         (avctx->extradata[0] || avctx->extradata[1] ||
3130          avctx->extradata[2] > 1)) {
3131         /* It seems the extradata is encoded as hvcC format.
3132          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3133          * is finalized. When finalized, configurationVersion will be 1 and we
3134          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3135         int i, j, num_arrays, nal_len_size;
3136
3137         s->is_nalff = 1;
3138
3139         bytestream2_skip(&gb, 21);
3140         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3141         num_arrays   = bytestream2_get_byte(&gb);
3142
3143         /* nal units in the hvcC always have length coded with 2 bytes,
3144          * so put a fake nal_length_size = 2 while parsing them */
3145         s->nal_length_size = 2;
3146
3147         /* Decode nal units from hvcC. */
3148         for (i = 0; i < num_arrays; i++) {
3149             int type = bytestream2_get_byte(&gb) & 0x3f;
3150             int cnt  = bytestream2_get_be16(&gb);
3151
3152             for (j = 0; j < cnt; j++) {
3153                 // +2 for the nal size field
3154                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3155                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3156                     av_log(s->avctx, AV_LOG_ERROR,
3157                            "Invalid NAL unit size in extradata.\n");
3158                     return AVERROR_INVALIDDATA;
3159                 }
3160
3161                 ret = decode_nal_units(s, gb.buffer, nalsize);
3162                 if (ret < 0) {
3163                     av_log(avctx, AV_LOG_ERROR,
3164                            "Decoding nal unit %d %d from hvcC failed\n",
3165                            type, i);
3166                     return ret;
3167                 }
3168                 bytestream2_skip(&gb, nalsize);
3169             }
3170         }
3171
3172         /* Now store right nal length size, that will be used to parse
3173          * all other nals */
3174         s->nal_length_size = nal_len_size;
3175     } else {
3176         s->is_nalff = 0;
3177         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3178         if (ret < 0)
3179             return ret;
3180     }
3181     return 0;
3182 }
3183
3184 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3185 {
3186     HEVCContext *s = avctx->priv_data;
3187     int ret;
3188
3189     ff_init_cabac_states();
3190
3191     avctx->internal->allocate_progress = 1;
3192
3193     ret = hevc_init_context(avctx);
3194     if (ret < 0)
3195         return ret;
3196
3197     if (avctx->extradata_size > 0 && avctx->extradata) {
3198         ret = hevc_decode_extradata(s);
3199         if (ret < 0) {
3200             hevc_decode_free(avctx);
3201             return ret;
3202         }
3203     }
3204
3205     return 0;
3206 }
3207
3208 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3209 {
3210     HEVCContext *s = avctx->priv_data;
3211     int ret;
3212
3213     memset(s, 0, sizeof(*s));
3214
3215     ret = hevc_init_context(avctx);
3216     if (ret < 0)
3217         return ret;
3218
3219     return 0;
3220 }
3221
3222 static void hevc_decode_flush(AVCodecContext *avctx)
3223 {
3224     HEVCContext *s = avctx->priv_data;
3225     ff_hevc_flush_dpb(s);
3226     s->max_ra = INT_MAX;
3227 }
3228
3229 #define OFFSET(x) offsetof(HEVCContext, x)
3230 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3231
3232 static const AVProfile profiles[] = {
3233     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3234     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3235     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3236     { FF_PROFILE_UNKNOWN },
3237 };
3238
3239 static const AVOption options[] = {
3240     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3241         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3242     { NULL },
3243 };
3244
3245 static const AVClass hevc_decoder_class = {
3246     .class_name = "HEVC decoder",
3247     .item_name  = av_default_item_name,
3248     .option     = options,
3249     .version    = LIBAVUTIL_VERSION_INT,
3250 };
3251
3252 AVCodec ff_hevc_decoder = {
3253     .name                  = "hevc",
3254     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3255     .type                  = AVMEDIA_TYPE_VIDEO,
3256     .id                    = AV_CODEC_ID_HEVC,
3257     .priv_data_size        = sizeof(HEVCContext),
3258     .priv_class            = &hevc_decoder_class,
3259     .init                  = hevc_decode_init,
3260     .close                 = hevc_decode_free,
3261     .decode                = hevc_decode_frame,
3262     .flush                 = hevc_decode_flush,
3263     .update_thread_context = hevc_update_thread_context,
3264     .init_thread_copy      = hevc_init_thread_copy,
3265     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3266                              CODEC_CAP_FRAME_THREADS,
3267     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3268 };