]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: check slice address length
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static void export_stream_params(AVCodecContext *avctx,
387                                  const HEVCContext *s, const HEVCSPS *sps)
388 {
389     const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
390     unsigned int num = 0, den = 0;
391
392     avctx->pix_fmt             = sps->pix_fmt;
393     avctx->coded_width         = sps->width;
394     avctx->coded_height        = sps->height;
395     avctx->width               = sps->output_width;
396     avctx->height              = sps->output_height;
397     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
398     avctx->profile             = sps->ptl.general_ptl.profile_idc;
399     avctx->level               = sps->ptl.general_ptl.level_idc;
400
401     ff_set_sar(avctx, sps->vui.sar);
402
403     if (sps->vui.video_signal_type_present_flag)
404         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
405                                                             : AVCOL_RANGE_MPEG;
406     else
407         avctx->color_range = AVCOL_RANGE_MPEG;
408
409     if (sps->vui.colour_description_present_flag) {
410         avctx->color_primaries = sps->vui.colour_primaries;
411         avctx->color_trc       = sps->vui.transfer_characteristic;
412         avctx->colorspace      = sps->vui.matrix_coeffs;
413     } else {
414         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
415         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
416         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
417     }
418
419     if (vps->vps_timing_info_present_flag) {
420         num = vps->vps_num_units_in_tick;
421         den = vps->vps_time_scale;
422     } else if (sps->vui.vui_timing_info_present_flag) {
423         num = sps->vui.vui_num_units_in_tick;
424         den = sps->vui.vui_time_scale;
425     }
426
427     if (num != 0 && den != 0)
428         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
429                   num, den, 1 << 30);
430 }
431
432 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
433 {
434     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
435     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
436     int ret;
437
438     export_stream_params(s->avctx, s, sps);
439
440     pic_arrays_free(s);
441     ret = pic_arrays_init(s, sps);
442     if (ret < 0)
443         goto fail;
444
445     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
446 #if CONFIG_HEVC_DXVA2_HWACCEL
447         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
448 #endif
449 #if CONFIG_HEVC_D3D11VA_HWACCEL
450         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
451 #endif
452     }
453
454     *fmt++ = sps->pix_fmt;
455     *fmt = AV_PIX_FMT_NONE;
456
457     ret = ff_get_format(s->avctx, pix_fmts);
458     if (ret < 0)
459         goto fail;
460     s->avctx->pix_fmt = ret;
461
462     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
463     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
464     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
465
466     if (sps->sao_enabled && !s->avctx->hwaccel) {
467         av_frame_unref(s->tmp_frame);
468         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
469         if (ret < 0)
470             goto fail;
471         s->frame = s->tmp_frame;
472     }
473
474     s->sps = sps;
475     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
476
477     return 0;
478
479 fail:
480     pic_arrays_free(s);
481     s->sps = NULL;
482     return ret;
483 }
484
485 static int hls_slice_header(HEVCContext *s)
486 {
487     GetBitContext *gb = &s->HEVClc.gb;
488     SliceHeader *sh   = &s->sh;
489     int i, ret;
490
491     // Coded parameters
492     sh->first_slice_in_pic_flag = get_bits1(gb);
493     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
494         s->seq_decode = (s->seq_decode + 1) & 0xff;
495         s->max_ra     = INT_MAX;
496         if (IS_IDR(s))
497             ff_hevc_clear_refs(s);
498     }
499     if (IS_IRAP(s))
500         sh->no_output_of_prior_pics_flag = get_bits1(gb);
501
502     sh->pps_id = get_ue_golomb_long(gb);
503     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
504         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
505         return AVERROR_INVALIDDATA;
506     }
507     if (!sh->first_slice_in_pic_flag &&
508         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
509         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
510         return AVERROR_INVALIDDATA;
511     }
512     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
513
514     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
515         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
516
517         ff_hevc_clear_refs(s);
518         ret = set_sps(s, s->sps);
519         if (ret < 0)
520             return ret;
521
522         s->seq_decode = (s->seq_decode + 1) & 0xff;
523         s->max_ra     = INT_MAX;
524     }
525
526     sh->dependent_slice_segment_flag = 0;
527     if (!sh->first_slice_in_pic_flag) {
528         int slice_address_length;
529
530         if (s->pps->dependent_slice_segments_enabled_flag)
531             sh->dependent_slice_segment_flag = get_bits1(gb);
532
533         slice_address_length = av_ceil_log2(s->sps->ctb_width *
534                                             s->sps->ctb_height);
535         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
536         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
537             av_log(s->avctx, AV_LOG_ERROR,
538                    "Invalid slice segment address: %u.\n",
539                    sh->slice_segment_addr);
540             return AVERROR_INVALIDDATA;
541         }
542
543         if (!sh->dependent_slice_segment_flag) {
544             sh->slice_addr = sh->slice_segment_addr;
545             s->slice_idx++;
546         }
547     } else {
548         sh->slice_segment_addr = sh->slice_addr = 0;
549         s->slice_idx           = 0;
550         s->slice_initialized   = 0;
551     }
552
553     if (!sh->dependent_slice_segment_flag) {
554         s->slice_initialized = 0;
555
556         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
557             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
558
559         sh->slice_type = get_ue_golomb_long(gb);
560         if (!(sh->slice_type == I_SLICE ||
561               sh->slice_type == P_SLICE ||
562               sh->slice_type == B_SLICE)) {
563             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
564                    sh->slice_type);
565             return AVERROR_INVALIDDATA;
566         }
567         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
568             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
569             return AVERROR_INVALIDDATA;
570         }
571
572         // when flag is not present, picture is inferred to be output
573         sh->pic_output_flag = 1;
574         if (s->pps->output_flag_present_flag)
575             sh->pic_output_flag = get_bits1(gb);
576
577         if (s->sps->separate_colour_plane_flag)
578             sh->colour_plane_id = get_bits(gb, 2);
579
580         if (!IS_IDR(s)) {
581             int poc;
582
583             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
584             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
585             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
586                 av_log(s->avctx, AV_LOG_WARNING,
587                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
588                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
589                     return AVERROR_INVALIDDATA;
590                 poc = s->poc;
591             }
592             s->poc = poc;
593
594             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
595             if (!sh->short_term_ref_pic_set_sps_flag) {
596                 int pos = get_bits_left(gb);
597                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->sps, 1);
598                 if (ret < 0)
599                     return ret;
600
601                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
602                 sh->short_term_rps = &sh->slice_rps;
603             } else {
604                 int numbits, rps_idx;
605
606                 if (!s->sps->nb_st_rps) {
607                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
608                     return AVERROR_INVALIDDATA;
609                 }
610
611                 numbits = av_ceil_log2(s->sps->nb_st_rps);
612                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
613                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
614             }
615
616             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
617             if (ret < 0) {
618                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
619                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
620                     return AVERROR_INVALIDDATA;
621             }
622
623             if (s->sps->sps_temporal_mvp_enabled_flag)
624                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
625             else
626                 sh->slice_temporal_mvp_enabled_flag = 0;
627         } else {
628             s->sh.short_term_rps = NULL;
629             s->poc               = 0;
630         }
631
632         /* 8.3.1 */
633         if (s->temporal_id == 0 &&
634             s->nal_unit_type != NAL_TRAIL_N &&
635             s->nal_unit_type != NAL_TSA_N   &&
636             s->nal_unit_type != NAL_STSA_N  &&
637             s->nal_unit_type != NAL_RADL_N  &&
638             s->nal_unit_type != NAL_RADL_R  &&
639             s->nal_unit_type != NAL_RASL_N  &&
640             s->nal_unit_type != NAL_RASL_R)
641             s->pocTid0 = s->poc;
642
643         if (s->sps->sao_enabled) {
644             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
645             sh->slice_sample_adaptive_offset_flag[1] =
646             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
647         } else {
648             sh->slice_sample_adaptive_offset_flag[0] = 0;
649             sh->slice_sample_adaptive_offset_flag[1] = 0;
650             sh->slice_sample_adaptive_offset_flag[2] = 0;
651         }
652
653         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
654         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
655             int nb_refs;
656
657             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
658             if (sh->slice_type == B_SLICE)
659                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
660
661             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
662                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
663                 if (sh->slice_type == B_SLICE)
664                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
665             }
666             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
667                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
668                        sh->nb_refs[L0], sh->nb_refs[L1]);
669                 return AVERROR_INVALIDDATA;
670             }
671
672             sh->rpl_modification_flag[0] = 0;
673             sh->rpl_modification_flag[1] = 0;
674             nb_refs = ff_hevc_frame_nb_refs(s);
675             if (!nb_refs) {
676                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
677                 return AVERROR_INVALIDDATA;
678             }
679
680             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
681                 sh->rpl_modification_flag[0] = get_bits1(gb);
682                 if (sh->rpl_modification_flag[0]) {
683                     for (i = 0; i < sh->nb_refs[L0]; i++)
684                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
685                 }
686
687                 if (sh->slice_type == B_SLICE) {
688                     sh->rpl_modification_flag[1] = get_bits1(gb);
689                     if (sh->rpl_modification_flag[1] == 1)
690                         for (i = 0; i < sh->nb_refs[L1]; i++)
691                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
692                 }
693             }
694
695             if (sh->slice_type == B_SLICE)
696                 sh->mvd_l1_zero_flag = get_bits1(gb);
697
698             if (s->pps->cabac_init_present_flag)
699                 sh->cabac_init_flag = get_bits1(gb);
700             else
701                 sh->cabac_init_flag = 0;
702
703             sh->collocated_ref_idx = 0;
704             if (sh->slice_temporal_mvp_enabled_flag) {
705                 sh->collocated_list = L0;
706                 if (sh->slice_type == B_SLICE)
707                     sh->collocated_list = !get_bits1(gb);
708
709                 if (sh->nb_refs[sh->collocated_list] > 1) {
710                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
711                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
712                         av_log(s->avctx, AV_LOG_ERROR,
713                                "Invalid collocated_ref_idx: %d.\n",
714                                sh->collocated_ref_idx);
715                         return AVERROR_INVALIDDATA;
716                     }
717                 }
718             }
719
720             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
721                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
722                 pred_weight_table(s, gb);
723             }
724
725             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
726             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
727                 av_log(s->avctx, AV_LOG_ERROR,
728                        "Invalid number of merging MVP candidates: %d.\n",
729                        sh->max_num_merge_cand);
730                 return AVERROR_INVALIDDATA;
731             }
732         }
733
734         sh->slice_qp_delta = get_se_golomb(gb);
735
736         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
737             sh->slice_cb_qp_offset = get_se_golomb(gb);
738             sh->slice_cr_qp_offset = get_se_golomb(gb);
739         } else {
740             sh->slice_cb_qp_offset = 0;
741             sh->slice_cr_qp_offset = 0;
742         }
743
744         if (s->pps->deblocking_filter_control_present_flag) {
745             int deblocking_filter_override_flag = 0;
746
747             if (s->pps->deblocking_filter_override_enabled_flag)
748                 deblocking_filter_override_flag = get_bits1(gb);
749
750             if (deblocking_filter_override_flag) {
751                 sh->disable_deblocking_filter_flag = get_bits1(gb);
752                 if (!sh->disable_deblocking_filter_flag) {
753                     sh->beta_offset = get_se_golomb(gb) * 2;
754                     sh->tc_offset   = get_se_golomb(gb) * 2;
755                 }
756             } else {
757                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
758                 sh->beta_offset                    = s->pps->beta_offset;
759                 sh->tc_offset                      = s->pps->tc_offset;
760             }
761         } else {
762             sh->disable_deblocking_filter_flag = 0;
763             sh->beta_offset                    = 0;
764             sh->tc_offset                      = 0;
765         }
766
767         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
768             (sh->slice_sample_adaptive_offset_flag[0] ||
769              sh->slice_sample_adaptive_offset_flag[1] ||
770              !sh->disable_deblocking_filter_flag)) {
771             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
772         } else {
773             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
774         }
775     } else if (!s->slice_initialized) {
776         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
777         return AVERROR_INVALIDDATA;
778     }
779
780     sh->num_entry_point_offsets = 0;
781     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
782         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
783         if (sh->num_entry_point_offsets > 0) {
784             int offset_len = get_ue_golomb_long(gb) + 1;
785
786             for (i = 0; i < sh->num_entry_point_offsets; i++)
787                 skip_bits(gb, offset_len);
788         }
789     }
790
791     if (s->pps->slice_header_extension_present_flag) {
792         unsigned int length = get_ue_golomb_long(gb);
793         for (i = 0; i < length; i++)
794             skip_bits(gb, 8);  // slice_header_extension_data_byte
795     }
796
797     // Inferred parameters
798     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
799     if (sh->slice_qp > 51 ||
800         sh->slice_qp < -s->sps->qp_bd_offset) {
801         av_log(s->avctx, AV_LOG_ERROR,
802                "The slice_qp %d is outside the valid range "
803                "[%d, 51].\n",
804                sh->slice_qp,
805                -s->sps->qp_bd_offset);
806         return AVERROR_INVALIDDATA;
807     }
808
809     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
810
811     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
812         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
813         return AVERROR_INVALIDDATA;
814     }
815
816     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
817
818     if (!s->pps->cu_qp_delta_enabled_flag)
819         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
820                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
821
822     s->slice_initialized = 1;
823
824     return 0;
825 }
826
827 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
828
829 #define SET_SAO(elem, value)                            \
830 do {                                                    \
831     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
832         sao->elem = value;                              \
833     else if (sao_merge_left_flag)                       \
834         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
835     else if (sao_merge_up_flag)                         \
836         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
837     else                                                \
838         sao->elem = 0;                                  \
839 } while (0)
840
841 static void hls_sao_param(HEVCContext *s, int rx, int ry)
842 {
843     HEVCLocalContext *lc    = &s->HEVClc;
844     int sao_merge_left_flag = 0;
845     int sao_merge_up_flag   = 0;
846     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
847     SAOParams *sao          = &CTB(s->sao, rx, ry);
848     int c_idx, i;
849
850     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
851         s->sh.slice_sample_adaptive_offset_flag[1]) {
852         if (rx > 0) {
853             if (lc->ctb_left_flag)
854                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
855         }
856         if (ry > 0 && !sao_merge_left_flag) {
857             if (lc->ctb_up_flag)
858                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
859         }
860     }
861
862     for (c_idx = 0; c_idx < 3; c_idx++) {
863         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
864             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
865             continue;
866         }
867
868         if (c_idx == 2) {
869             sao->type_idx[2] = sao->type_idx[1];
870             sao->eo_class[2] = sao->eo_class[1];
871         } else {
872             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
873         }
874
875         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
876             continue;
877
878         for (i = 0; i < 4; i++)
879             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
880
881         if (sao->type_idx[c_idx] == SAO_BAND) {
882             for (i = 0; i < 4; i++) {
883                 if (sao->offset_abs[c_idx][i]) {
884                     SET_SAO(offset_sign[c_idx][i],
885                             ff_hevc_sao_offset_sign_decode(s));
886                 } else {
887                     sao->offset_sign[c_idx][i] = 0;
888                 }
889             }
890             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
891         } else if (c_idx != 2) {
892             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
893         }
894
895         // Inferred parameters
896         sao->offset_val[c_idx][0] = 0;
897         for (i = 0; i < 4; i++) {
898             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
899             if (sao->type_idx[c_idx] == SAO_EDGE) {
900                 if (i > 1)
901                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
902             } else if (sao->offset_sign[c_idx][i]) {
903                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
904             }
905         }
906     }
907 }
908
909 #undef SET_SAO
910 #undef CTB
911
912 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
913                                 int log2_trafo_size, enum ScanType scan_idx,
914                                 int c_idx)
915 {
916 #define GET_COORD(offset, n)                                    \
917     do {                                                        \
918         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
919         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
920     } while (0)
921     HEVCLocalContext *lc    = &s->HEVClc;
922     int transform_skip_flag = 0;
923
924     int last_significant_coeff_x, last_significant_coeff_y;
925     int last_scan_pos;
926     int n_end;
927     int num_coeff    = 0;
928     int greater1_ctx = 1;
929
930     int num_last_subset;
931     int x_cg_last_sig, y_cg_last_sig;
932
933     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
934
935     ptrdiff_t stride = s->frame->linesize[c_idx];
936     int hshift       = s->sps->hshift[c_idx];
937     int vshift       = s->sps->vshift[c_idx];
938     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
939                                               ((x0 >> hshift) << s->sps->pixel_shift)];
940     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
941     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
942
943     int trafo_size = 1 << log2_trafo_size;
944     int i, qp, shift, add, scale, scale_m;
945     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
946     const uint8_t *scale_matrix;
947     uint8_t dc_scale;
948
949     // Derive QP for dequant
950     if (!lc->cu.cu_transquant_bypass_flag) {
951         static const int qp_c[] = {
952             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
953         };
954
955         static const uint8_t rem6[51 + 2 * 6 + 1] = {
956             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
957             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
958             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
959         };
960
961         static const uint8_t div6[51 + 2 * 6 + 1] = {
962             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
963             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
964             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
965         };
966         int qp_y = lc->qp_y;
967
968         if (c_idx == 0) {
969             qp = qp_y + s->sps->qp_bd_offset;
970         } else {
971             int qp_i, offset;
972
973             if (c_idx == 1)
974                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
975             else
976                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
977
978             qp_i = av_clip(qp_y + offset, -s->sps->qp_bd_offset, 57);
979             if (qp_i < 30)
980                 qp = qp_i;
981             else if (qp_i > 43)
982                 qp = qp_i - 6;
983             else
984                 qp = qp_c[qp_i - 30];
985
986             qp += s->sps->qp_bd_offset;
987         }
988
989         shift    = s->sps->bit_depth + log2_trafo_size - 5;
990         add      = 1 << (shift - 1);
991         scale    = level_scale[rem6[qp]] << (div6[qp]);
992         scale_m  = 16; // default when no custom scaling lists.
993         dc_scale = 16;
994
995         if (s->sps->scaling_list_enable_flag) {
996             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
997                                     &s->pps->scaling_list : &s->sps->scaling_list;
998             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
999
1000             if (log2_trafo_size != 5)
1001                 matrix_id = 3 * matrix_id + c_idx;
1002
1003             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
1004             if (log2_trafo_size >= 4)
1005                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
1006         }
1007     }
1008
1009     if (s->pps->transform_skip_enabled_flag &&
1010         !lc->cu.cu_transquant_bypass_flag   &&
1011         log2_trafo_size == 2) {
1012         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
1013     }
1014
1015     last_significant_coeff_x =
1016         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
1017     last_significant_coeff_y =
1018         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
1019
1020     if (last_significant_coeff_x > 3) {
1021         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
1022         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
1023                                    (2 + (last_significant_coeff_x & 1)) +
1024                                    suffix;
1025     }
1026
1027     if (last_significant_coeff_y > 3) {
1028         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1029         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1030                                    (2 + (last_significant_coeff_y & 1)) +
1031                                    suffix;
1032     }
1033
1034     if (scan_idx == SCAN_VERT)
1035         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1036
1037     x_cg_last_sig = last_significant_coeff_x >> 2;
1038     y_cg_last_sig = last_significant_coeff_y >> 2;
1039
1040     switch (scan_idx) {
1041     case SCAN_DIAG: {
1042         int last_x_c = last_significant_coeff_x & 3;
1043         int last_y_c = last_significant_coeff_y & 3;
1044
1045         scan_x_off = ff_hevc_diag_scan4x4_x;
1046         scan_y_off = ff_hevc_diag_scan4x4_y;
1047         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1048         if (trafo_size == 4) {
1049             scan_x_cg = scan_1x1;
1050             scan_y_cg = scan_1x1;
1051         } else if (trafo_size == 8) {
1052             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1053             scan_x_cg  = diag_scan2x2_x;
1054             scan_y_cg  = diag_scan2x2_y;
1055         } else if (trafo_size == 16) {
1056             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1057             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1058             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1059         } else { // trafo_size == 32
1060             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1061             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1062             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1063         }
1064         break;
1065     }
1066     case SCAN_HORIZ:
1067         scan_x_cg  = horiz_scan2x2_x;
1068         scan_y_cg  = horiz_scan2x2_y;
1069         scan_x_off = horiz_scan4x4_x;
1070         scan_y_off = horiz_scan4x4_y;
1071         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1072         break;
1073     default: //SCAN_VERT
1074         scan_x_cg  = horiz_scan2x2_y;
1075         scan_y_cg  = horiz_scan2x2_x;
1076         scan_x_off = horiz_scan4x4_y;
1077         scan_y_off = horiz_scan4x4_x;
1078         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1079         break;
1080     }
1081     num_coeff++;
1082     num_last_subset = (num_coeff - 1) >> 4;
1083
1084     for (i = num_last_subset; i >= 0; i--) {
1085         int n, m;
1086         int x_cg, y_cg, x_c, y_c;
1087         int implicit_non_zero_coeff = 0;
1088         int64_t trans_coeff_level;
1089         int prev_sig = 0;
1090         int offset   = i << 4;
1091
1092         uint8_t significant_coeff_flag_idx[16];
1093         uint8_t nb_significant_coeff_flag = 0;
1094
1095         x_cg = scan_x_cg[i];
1096         y_cg = scan_y_cg[i];
1097
1098         if (i < num_last_subset && i > 0) {
1099             int ctx_cg = 0;
1100             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1101                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1102             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1103                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1104
1105             significant_coeff_group_flag[x_cg][y_cg] =
1106                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1107             implicit_non_zero_coeff = 1;
1108         } else {
1109             significant_coeff_group_flag[x_cg][y_cg] =
1110                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1111                  (x_cg == 0 && y_cg == 0));
1112         }
1113
1114         last_scan_pos = num_coeff - offset - 1;
1115
1116         if (i == num_last_subset) {
1117             n_end                         = last_scan_pos - 1;
1118             significant_coeff_flag_idx[0] = last_scan_pos;
1119             nb_significant_coeff_flag     = 1;
1120         } else {
1121             n_end = 15;
1122         }
1123
1124         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1125             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1126         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1127             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1128
1129         for (n = n_end; n >= 0; n--) {
1130             GET_COORD(offset, n);
1131
1132             if (significant_coeff_group_flag[x_cg][y_cg] &&
1133                 (n > 0 || implicit_non_zero_coeff == 0)) {
1134                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1135                                                           log2_trafo_size,
1136                                                           scan_idx,
1137                                                           prev_sig) == 1) {
1138                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1139                     nb_significant_coeff_flag++;
1140                     implicit_non_zero_coeff = 0;
1141                 }
1142             } else {
1143                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1144                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1145                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1146                     nb_significant_coeff_flag++;
1147                 }
1148             }
1149         }
1150
1151         n_end = nb_significant_coeff_flag;
1152
1153         if (n_end) {
1154             int first_nz_pos_in_cg = 16;
1155             int last_nz_pos_in_cg = -1;
1156             int c_rice_param = 0;
1157             int first_greater1_coeff_idx = -1;
1158             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1159             uint16_t coeff_sign_flag;
1160             int sum_abs = 0;
1161             int sign_hidden = 0;
1162
1163             // initialize first elem of coeff_bas_level_greater1_flag
1164             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1165
1166             if (!(i == num_last_subset) && greater1_ctx == 0)
1167                 ctx_set++;
1168             greater1_ctx      = 1;
1169             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1170
1171             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1172                 int n_idx = significant_coeff_flag_idx[m];
1173                 int inc   = (ctx_set << 2) + greater1_ctx;
1174                 coeff_abs_level_greater1_flag[n_idx] =
1175                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1176                 if (coeff_abs_level_greater1_flag[n_idx]) {
1177                     greater1_ctx = 0;
1178                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1179                     greater1_ctx++;
1180                 }
1181
1182                 if (coeff_abs_level_greater1_flag[n_idx] &&
1183                     first_greater1_coeff_idx == -1)
1184                     first_greater1_coeff_idx = n_idx;
1185             }
1186             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1187             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1188                                  !lc->cu.cu_transquant_bypass_flag;
1189
1190             if (first_greater1_coeff_idx != -1) {
1191                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1192             }
1193             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1194                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1195             } else {
1196                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1197             }
1198
1199             for (m = 0; m < n_end; m++) {
1200                 n = significant_coeff_flag_idx[m];
1201                 GET_COORD(offset, n);
1202                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1203                 if (trans_coeff_level == ((m < 8) ?
1204                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1205                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1206
1207                     trans_coeff_level += last_coeff_abs_level_remaining;
1208                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1209                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1210                 }
1211                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1212                     sum_abs += trans_coeff_level;
1213                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1214                         trans_coeff_level = -trans_coeff_level;
1215                 }
1216                 if (coeff_sign_flag >> 15)
1217                     trans_coeff_level = -trans_coeff_level;
1218                 coeff_sign_flag <<= 1;
1219                 if (!lc->cu.cu_transquant_bypass_flag) {
1220                     if (s->sps->scaling_list_enable_flag) {
1221                         if (y_c || x_c || log2_trafo_size < 4) {
1222                             int pos;
1223                             switch (log2_trafo_size) {
1224                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1225                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1226                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1227                             default: pos = (y_c        << 2) +  x_c;
1228                             }
1229                             scale_m = scale_matrix[pos];
1230                         } else {
1231                             scale_m = dc_scale;
1232                         }
1233                     }
1234                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1235                     if(trans_coeff_level < 0) {
1236                         if((~trans_coeff_level) & 0xFffffffffff8000)
1237                             trans_coeff_level = -32768;
1238                     } else {
1239                         if (trans_coeff_level & 0xffffffffffff8000)
1240                             trans_coeff_level = 32767;
1241                     }
1242                 }
1243                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1244             }
1245         }
1246     }
1247
1248     if (lc->cu.cu_transquant_bypass_flag) {
1249         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1250     } else {
1251         if (transform_skip_flag)
1252             s->hevcdsp.transform_skip(dst, coeffs, stride);
1253         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1254                  log2_trafo_size == 2)
1255             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1256         else
1257             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1258     }
1259 }
1260
1261 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1262                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1263                               int log2_cb_size, int log2_trafo_size,
1264                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1265 {
1266     HEVCLocalContext *lc = &s->HEVClc;
1267
1268     if (lc->cu.pred_mode == MODE_INTRA) {
1269         int trafo_size = 1 << log2_trafo_size;
1270         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1271
1272         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1273         if (log2_trafo_size > 2) {
1274             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1275             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1276             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1277             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1278         } else if (blk_idx == 3) {
1279             trafo_size = trafo_size << s->sps->hshift[1];
1280             ff_hevc_set_neighbour_available(s, xBase, yBase,
1281                                             trafo_size, trafo_size);
1282             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1283             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1284         }
1285     }
1286
1287     if (cbf_luma || cbf_cb || cbf_cr) {
1288         int scan_idx   = SCAN_DIAG;
1289         int scan_idx_c = SCAN_DIAG;
1290
1291         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1292             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1293             if (lc->tu.cu_qp_delta != 0)
1294                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1295                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1296             lc->tu.is_cu_qp_delta_coded = 1;
1297
1298             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1299                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1300                 av_log(s->avctx, AV_LOG_ERROR,
1301                        "The cu_qp_delta %d is outside the valid range "
1302                        "[%d, %d].\n",
1303                        lc->tu.cu_qp_delta,
1304                        -(26 + s->sps->qp_bd_offset / 2),
1305                         (25 + s->sps->qp_bd_offset / 2));
1306                 return AVERROR_INVALIDDATA;
1307             }
1308
1309             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1310         }
1311
1312         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1313             if (lc->tu.cur_intra_pred_mode >= 6 &&
1314                 lc->tu.cur_intra_pred_mode <= 14) {
1315                 scan_idx = SCAN_VERT;
1316             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1317                        lc->tu.cur_intra_pred_mode <= 30) {
1318                 scan_idx = SCAN_HORIZ;
1319             }
1320
1321             if (lc->pu.intra_pred_mode_c >=  6 &&
1322                 lc->pu.intra_pred_mode_c <= 14) {
1323                 scan_idx_c = SCAN_VERT;
1324             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1325                        lc->pu.intra_pred_mode_c <= 30) {
1326                 scan_idx_c = SCAN_HORIZ;
1327             }
1328         }
1329
1330         if (cbf_luma)
1331             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1332         if (log2_trafo_size > 2) {
1333             if (cbf_cb)
1334                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1335             if (cbf_cr)
1336                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1337         } else if (blk_idx == 3) {
1338             if (cbf_cb)
1339                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1340             if (cbf_cr)
1341                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1342         }
1343     }
1344     return 0;
1345 }
1346
1347 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1348 {
1349     int cb_size          = 1 << log2_cb_size;
1350     int log2_min_pu_size = s->sps->log2_min_pu_size;
1351
1352     int min_pu_width     = s->sps->min_pu_width;
1353     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1354     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1355     int i, j;
1356
1357     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1358         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1359             s->is_pcm[i + j * min_pu_width] = 2;
1360 }
1361
1362 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1363                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1364                               int log2_cb_size, int log2_trafo_size,
1365                               int trafo_depth, int blk_idx,
1366                               int cbf_cb, int cbf_cr)
1367 {
1368     HEVCLocalContext *lc = &s->HEVClc;
1369     uint8_t split_transform_flag;
1370     int ret;
1371
1372     if (lc->cu.intra_split_flag) {
1373         if (trafo_depth == 1)
1374             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1375     } else {
1376         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1377     }
1378
1379     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1380         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1381         trafo_depth     < lc->cu.max_trafo_depth       &&
1382         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1383         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1384     } else {
1385         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1386                           lc->cu.pred_mode == MODE_INTER &&
1387                           lc->cu.part_mode != PART_2Nx2N &&
1388                           trafo_depth == 0;
1389
1390         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1391                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1392                                inter_split;
1393     }
1394
1395     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1396         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1397     else if (log2_trafo_size > 2 || trafo_depth == 0)
1398         cbf_cb = 0;
1399     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1400         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1401     else if (log2_trafo_size > 2 || trafo_depth == 0)
1402         cbf_cr = 0;
1403
1404     if (split_transform_flag) {
1405         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1406         const int x1 = x0 + trafo_size_split;
1407         const int y1 = y0 + trafo_size_split;
1408
1409 #define SUBDIVIDE(x, y, idx)                                                    \
1410 do {                                                                            \
1411     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1412                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1413                              cbf_cb, cbf_cr);                                   \
1414     if (ret < 0)                                                                \
1415         return ret;                                                             \
1416 } while (0)
1417
1418         SUBDIVIDE(x0, y0, 0);
1419         SUBDIVIDE(x1, y0, 1);
1420         SUBDIVIDE(x0, y1, 2);
1421         SUBDIVIDE(x1, y1, 3);
1422
1423 #undef SUBDIVIDE
1424     } else {
1425         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1426         int log2_min_tu_size = s->sps->log2_min_tb_size;
1427         int min_tu_width     = s->sps->min_tb_width;
1428         int cbf_luma         = 1;
1429
1430         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1431             cbf_cb || cbf_cr)
1432             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1433
1434         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1435                                  log2_cb_size, log2_trafo_size,
1436                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1437         if (ret < 0)
1438             return ret;
1439         // TODO: store cbf_luma somewhere else
1440         if (cbf_luma) {
1441             int i, j;
1442             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1443                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1444                     int x_tu = (x0 + j) >> log2_min_tu_size;
1445                     int y_tu = (y0 + i) >> log2_min_tu_size;
1446                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1447                 }
1448         }
1449         if (!s->sh.disable_deblocking_filter_flag) {
1450             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1451             if (s->pps->transquant_bypass_enable_flag &&
1452                 lc->cu.cu_transquant_bypass_flag)
1453                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1454         }
1455     }
1456     return 0;
1457 }
1458
1459 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1460 {
1461     //TODO: non-4:2:0 support
1462     HEVCLocalContext *lc = &s->HEVClc;
1463     GetBitContext gb;
1464     int cb_size   = 1 << log2_cb_size;
1465     int stride0   = s->frame->linesize[0];
1466     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1467     int   stride1 = s->frame->linesize[1];
1468     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1469     int   stride2 = s->frame->linesize[2];
1470     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1471
1472     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1473     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1474     int ret;
1475
1476     if (!s->sh.disable_deblocking_filter_flag)
1477         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1478
1479     ret = init_get_bits(&gb, pcm, length);
1480     if (ret < 0)
1481         return ret;
1482
1483     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1484     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1485     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1486     return 0;
1487 }
1488
1489 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1490 {
1491     HEVCLocalContext *lc = &s->HEVClc;
1492     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1493     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1494
1495     if (x)
1496         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1497     if (y)
1498         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1499
1500     switch (x) {
1501     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1502     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1503     case 0: lc->pu.mvd.x = 0;                               break;
1504     }
1505
1506     switch (y) {
1507     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1508     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1509     case 0: lc->pu.mvd.y = 0;                               break;
1510     }
1511 }
1512
1513 /**
1514  * 8.5.3.2.2.1 Luma sample interpolation process
1515  *
1516  * @param s HEVC decoding context
1517  * @param dst target buffer for block data at block position
1518  * @param dststride stride of the dst buffer
1519  * @param ref reference picture buffer at origin (0, 0)
1520  * @param mv motion vector (relative to block position) to get pixel data from
1521  * @param x_off horizontal position of block from origin (0, 0)
1522  * @param y_off vertical position of block from origin (0, 0)
1523  * @param block_w width of block
1524  * @param block_h height of block
1525  */
1526 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1527                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1528                     int block_w, int block_h)
1529 {
1530     HEVCLocalContext *lc = &s->HEVClc;
1531     uint8_t *src         = ref->data[0];
1532     ptrdiff_t srcstride  = ref->linesize[0];
1533     int pic_width        = s->sps->width;
1534     int pic_height       = s->sps->height;
1535
1536     int mx         = mv->x & 3;
1537     int my         = mv->y & 3;
1538     int extra_left = ff_hevc_qpel_extra_before[mx];
1539     int extra_top  = ff_hevc_qpel_extra_before[my];
1540
1541     x_off += mv->x >> 2;
1542     y_off += mv->y >> 2;
1543     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1544
1545     if (x_off < extra_left || y_off < extra_top ||
1546         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1547         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1548         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1549         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1550         int buf_offset = extra_top *
1551                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1552
1553         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1554                                  edge_emu_stride, srcstride,
1555                                  block_w + ff_hevc_qpel_extra[mx],
1556                                  block_h + ff_hevc_qpel_extra[my],
1557                                  x_off - extra_left, y_off - extra_top,
1558                                  pic_width, pic_height);
1559         src = lc->edge_emu_buffer + buf_offset;
1560         srcstride = edge_emu_stride;
1561     }
1562     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1563                                      block_h, lc->mc_buffer);
1564 }
1565
1566 /**
1567  * 8.5.3.2.2.2 Chroma sample interpolation process
1568  *
1569  * @param s HEVC decoding context
1570  * @param dst1 target buffer for block data at block position (U plane)
1571  * @param dst2 target buffer for block data at block position (V plane)
1572  * @param dststride stride of the dst1 and dst2 buffers
1573  * @param ref reference picture buffer at origin (0, 0)
1574  * @param mv motion vector (relative to block position) to get pixel data from
1575  * @param x_off horizontal position of block from origin (0, 0)
1576  * @param y_off vertical position of block from origin (0, 0)
1577  * @param block_w width of block
1578  * @param block_h height of block
1579  */
1580 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1581                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1582                       int x_off, int y_off, int block_w, int block_h)
1583 {
1584     HEVCLocalContext *lc = &s->HEVClc;
1585     uint8_t *src1        = ref->data[1];
1586     uint8_t *src2        = ref->data[2];
1587     ptrdiff_t src1stride = ref->linesize[1];
1588     ptrdiff_t src2stride = ref->linesize[2];
1589     int pic_width        = s->sps->width >> 1;
1590     int pic_height       = s->sps->height >> 1;
1591
1592     int mx = mv->x & 7;
1593     int my = mv->y & 7;
1594
1595     x_off += mv->x >> 3;
1596     y_off += mv->y >> 3;
1597     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1598     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1599
1600     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1601         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1602         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1603         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1604         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1605         int buf_offset1 = EPEL_EXTRA_BEFORE *
1606                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1607         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1608         int buf_offset2 = EPEL_EXTRA_BEFORE *
1609                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1610
1611         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1612                                  edge_emu_stride, src1stride,
1613                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1614                                  x_off - EPEL_EXTRA_BEFORE,
1615                                  y_off - EPEL_EXTRA_BEFORE,
1616                                  pic_width, pic_height);
1617
1618         src1 = lc->edge_emu_buffer + buf_offset1;
1619         src1stride = edge_emu_stride;
1620         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1621                                              block_w, block_h, mx, my, lc->mc_buffer);
1622
1623         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1624                                  edge_emu_stride, src2stride,
1625                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1626                                  x_off - EPEL_EXTRA_BEFORE,
1627                                  y_off - EPEL_EXTRA_BEFORE,
1628                                  pic_width, pic_height);
1629         src2 = lc->edge_emu_buffer + buf_offset2;
1630         src2stride = edge_emu_stride;
1631
1632         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1633                                              block_w, block_h, mx, my,
1634                                              lc->mc_buffer);
1635     } else {
1636         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1637                                              block_w, block_h, mx, my,
1638                                              lc->mc_buffer);
1639         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1640                                              block_w, block_h, mx, my,
1641                                              lc->mc_buffer);
1642     }
1643 }
1644
1645 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1646                                 const Mv *mv, int y0, int height)
1647 {
1648     int y = (mv->y >> 2) + y0 + height + 9;
1649     ff_thread_await_progress(&ref->tf, y, 0);
1650 }
1651
1652 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1653                                   int nPbH, int log2_cb_size, int part_idx,
1654                                   int merge_idx, MvField *mv)
1655 {
1656     HEVCLocalContext *lc             = &s->HEVClc;
1657     enum InterPredIdc inter_pred_idc = PRED_L0;
1658     int mvp_flag;
1659
1660     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1661     if (s->sh.slice_type == B_SLICE)
1662         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1663
1664     if (inter_pred_idc != PRED_L1) {
1665         if (s->sh.nb_refs[L0])
1666             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1667
1668         mv->pred_flag[0] = 1;
1669         hls_mvd_coding(s, x0, y0, 0);
1670         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1671         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1672                                  part_idx, merge_idx, mv, mvp_flag, 0);
1673         mv->mv[0].x += lc->pu.mvd.x;
1674         mv->mv[0].y += lc->pu.mvd.y;
1675     }
1676
1677     if (inter_pred_idc != PRED_L0) {
1678         if (s->sh.nb_refs[L1])
1679             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1680
1681         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1682             AV_ZERO32(&lc->pu.mvd);
1683         } else {
1684             hls_mvd_coding(s, x0, y0, 1);
1685         }
1686
1687         mv->pred_flag[1] = 1;
1688         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1689         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1690                                  part_idx, merge_idx, mv, mvp_flag, 1);
1691         mv->mv[1].x += lc->pu.mvd.x;
1692         mv->mv[1].y += lc->pu.mvd.y;
1693     }
1694 }
1695
1696 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1697                                 int nPbW, int nPbH,
1698                                 int log2_cb_size, int partIdx)
1699 {
1700 #define POS(c_idx, x, y)                                                              \
1701     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1702                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1703     HEVCLocalContext *lc = &s->HEVClc;
1704     int merge_idx = 0;
1705     struct MvField current_mv = {{{ 0 }}};
1706
1707     int min_pu_width = s->sps->min_pu_width;
1708
1709     MvField *tab_mvf = s->ref->tab_mvf;
1710     RefPicList  *refPicList = s->ref->refPicList;
1711     HEVCFrame *ref0, *ref1;
1712
1713     int tmpstride = MAX_PB_SIZE;
1714
1715     uint8_t *dst0 = POS(0, x0, y0);
1716     uint8_t *dst1 = POS(1, x0, y0);
1717     uint8_t *dst2 = POS(2, x0, y0);
1718     int log2_min_cb_size = s->sps->log2_min_cb_size;
1719     int min_cb_width     = s->sps->min_cb_width;
1720     int x_cb             = x0 >> log2_min_cb_size;
1721     int y_cb             = y0 >> log2_min_cb_size;
1722     int x_pu, y_pu;
1723     int i, j;
1724
1725     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1726
1727     if (!skip_flag)
1728         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1729
1730     if (skip_flag || lc->pu.merge_flag) {
1731         if (s->sh.max_num_merge_cand > 1)
1732             merge_idx = ff_hevc_merge_idx_decode(s);
1733         else
1734             merge_idx = 0;
1735
1736         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1737                                    partIdx, merge_idx, &current_mv);
1738     } else {
1739         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1740                               partIdx, merge_idx, &current_mv);
1741     }
1742
1743     x_pu = x0 >> s->sps->log2_min_pu_size;
1744     y_pu = y0 >> s->sps->log2_min_pu_size;
1745
1746     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1747         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1748             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1749
1750     if (current_mv.pred_flag[0]) {
1751         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1752         if (!ref0)
1753             return;
1754         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1755     }
1756     if (current_mv.pred_flag[1]) {
1757         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1758         if (!ref1)
1759             return;
1760         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1761     }
1762
1763     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1764         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1765         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1766
1767         luma_mc(s, tmp, tmpstride, ref0->frame,
1768                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1769
1770         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1771             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1772             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1773                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1774                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1775                                      dst0, s->frame->linesize[0], tmp,
1776                                      tmpstride, nPbW, nPbH);
1777         } else {
1778             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1779         }
1780         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1781                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1782
1783         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1784             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1785             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1786                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1787                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1788                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1789                                      nPbW / 2, nPbH / 2);
1790             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1791                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1792                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1793                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1794                                      nPbW / 2, nPbH / 2);
1795         } else {
1796             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1797             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1798         }
1799     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1800         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1801         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1802
1803         luma_mc(s, tmp, tmpstride, ref1->frame,
1804                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1805
1806         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1807             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1808             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1809                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1810                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1811                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1812                                       nPbW, nPbH);
1813         } else {
1814             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1815         }
1816
1817         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1818                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1819
1820         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1821             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1822             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1823                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1824                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1825                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1826             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1827                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1828                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1829                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1830         } else {
1831             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1832             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1833         }
1834     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1835         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1836         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1837         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1838         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1839
1840         luma_mc(s, tmp, tmpstride, ref0->frame,
1841                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1842         luma_mc(s, tmp2, tmpstride, ref1->frame,
1843                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1844
1845         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1846             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1847             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1848                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1849                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1850                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1851                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1852                                          dst0, s->frame->linesize[0],
1853                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1854         } else {
1855             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1856                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1857         }
1858
1859         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1860                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1861         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1862                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1863
1864         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1865             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1866             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1867                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1868                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1869                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1870                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1871                                          dst1, s->frame->linesize[1], tmp, tmp3,
1872                                          tmpstride, nPbW / 2, nPbH / 2);
1873             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1874                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1875                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1876                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1877                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1878                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1879                                          tmpstride, nPbW / 2, nPbH / 2);
1880         } else {
1881             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1882             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1883         }
1884     }
1885 }
1886
1887 /**
1888  * 8.4.1
1889  */
1890 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1891                                 int prev_intra_luma_pred_flag)
1892 {
1893     HEVCLocalContext *lc = &s->HEVClc;
1894     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1895     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1896     int min_pu_width     = s->sps->min_pu_width;
1897     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1898     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1899     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1900
1901     int cand_up   = (lc->ctb_up_flag || y0b) ?
1902                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1903     int cand_left = (lc->ctb_left_flag || x0b) ?
1904                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1905
1906     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1907
1908     MvField *tab_mvf = s->ref->tab_mvf;
1909     int intra_pred_mode;
1910     int candidate[3];
1911     int i, j;
1912
1913     // intra_pred_mode prediction does not cross vertical CTB boundaries
1914     if ((y0 - 1) < y_ctb)
1915         cand_up = INTRA_DC;
1916
1917     if (cand_left == cand_up) {
1918         if (cand_left < 2) {
1919             candidate[0] = INTRA_PLANAR;
1920             candidate[1] = INTRA_DC;
1921             candidate[2] = INTRA_ANGULAR_26;
1922         } else {
1923             candidate[0] = cand_left;
1924             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1925             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1926         }
1927     } else {
1928         candidate[0] = cand_left;
1929         candidate[1] = cand_up;
1930         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1931             candidate[2] = INTRA_PLANAR;
1932         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1933             candidate[2] = INTRA_DC;
1934         } else {
1935             candidate[2] = INTRA_ANGULAR_26;
1936         }
1937     }
1938
1939     if (prev_intra_luma_pred_flag) {
1940         intra_pred_mode = candidate[lc->pu.mpm_idx];
1941     } else {
1942         if (candidate[0] > candidate[1])
1943             FFSWAP(uint8_t, candidate[0], candidate[1]);
1944         if (candidate[0] > candidate[2])
1945             FFSWAP(uint8_t, candidate[0], candidate[2]);
1946         if (candidate[1] > candidate[2])
1947             FFSWAP(uint8_t, candidate[1], candidate[2]);
1948
1949         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1950         for (i = 0; i < 3; i++)
1951             if (intra_pred_mode >= candidate[i])
1952                 intra_pred_mode++;
1953     }
1954
1955     /* write the intra prediction units into the mv array */
1956     if (!size_in_pus)
1957         size_in_pus = 1;
1958     for (i = 0; i < size_in_pus; i++) {
1959         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1960                intra_pred_mode, size_in_pus);
1961
1962         for (j = 0; j < size_in_pus; j++) {
1963             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1964             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1965             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1966             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1967             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1969             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1970             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1971             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1972         }
1973     }
1974
1975     return intra_pred_mode;
1976 }
1977
1978 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1979                                           int log2_cb_size, int ct_depth)
1980 {
1981     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1982     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1983     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1984     int y;
1985
1986     for (y = 0; y < length; y++)
1987         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1988                ct_depth, length);
1989 }
1990
1991 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1992                                   int log2_cb_size)
1993 {
1994     HEVCLocalContext *lc = &s->HEVClc;
1995     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1996     uint8_t prev_intra_luma_pred_flag[4];
1997     int split   = lc->cu.part_mode == PART_NxN;
1998     int pb_size = (1 << log2_cb_size) >> split;
1999     int side    = split + 1;
2000     int chroma_mode;
2001     int i, j;
2002
2003     for (i = 0; i < side; i++)
2004         for (j = 0; j < side; j++)
2005             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2006
2007     for (i = 0; i < side; i++) {
2008         for (j = 0; j < side; j++) {
2009             if (prev_intra_luma_pred_flag[2 * i + j])
2010                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2011             else
2012                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2013
2014             lc->pu.intra_pred_mode[2 * i + j] =
2015                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2016                                      prev_intra_luma_pred_flag[2 * i + j]);
2017         }
2018     }
2019
2020     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2021     if (chroma_mode != 4) {
2022         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2023             lc->pu.intra_pred_mode_c = 34;
2024         else
2025             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2026     } else {
2027         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2028     }
2029 }
2030
2031 static void intra_prediction_unit_default_value(HEVCContext *s,
2032                                                 int x0, int y0,
2033                                                 int log2_cb_size)
2034 {
2035     HEVCLocalContext *lc = &s->HEVClc;
2036     int pb_size          = 1 << log2_cb_size;
2037     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2038     int min_pu_width     = s->sps->min_pu_width;
2039     MvField *tab_mvf     = s->ref->tab_mvf;
2040     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2041     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2042     int j, k;
2043
2044     if (size_in_pus == 0)
2045         size_in_pus = 1;
2046     for (j = 0; j < size_in_pus; j++) {
2047         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2048         for (k = 0; k < size_in_pus; k++)
2049             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2050     }
2051 }
2052
2053 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2054 {
2055     int cb_size          = 1 << log2_cb_size;
2056     HEVCLocalContext *lc = &s->HEVClc;
2057     int log2_min_cb_size = s->sps->log2_min_cb_size;
2058     int length           = cb_size >> log2_min_cb_size;
2059     int min_cb_width     = s->sps->min_cb_width;
2060     int x_cb             = x0 >> log2_min_cb_size;
2061     int y_cb             = y0 >> log2_min_cb_size;
2062     int x, y, ret;
2063
2064     lc->cu.x                = x0;
2065     lc->cu.y                = y0;
2066     lc->cu.pred_mode        = MODE_INTRA;
2067     lc->cu.part_mode        = PART_2Nx2N;
2068     lc->cu.intra_split_flag = 0;
2069
2070     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2071     for (x = 0; x < 4; x++)
2072         lc->pu.intra_pred_mode[x] = 1;
2073     if (s->pps->transquant_bypass_enable_flag) {
2074         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2075         if (lc->cu.cu_transquant_bypass_flag)
2076             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2077     } else
2078         lc->cu.cu_transquant_bypass_flag = 0;
2079
2080     if (s->sh.slice_type != I_SLICE) {
2081         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2082
2083         x = y_cb * min_cb_width + x_cb;
2084         for (y = 0; y < length; y++) {
2085             memset(&s->skip_flag[x], skip_flag, length);
2086             x += min_cb_width;
2087         }
2088         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2089     }
2090
2091     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2092         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2093         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2094
2095         if (!s->sh.disable_deblocking_filter_flag)
2096             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2097     } else {
2098         int pcm_flag = 0;
2099
2100         if (s->sh.slice_type != I_SLICE)
2101             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2102         if (lc->cu.pred_mode != MODE_INTRA ||
2103             log2_cb_size == s->sps->log2_min_cb_size) {
2104             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2105             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2106                                       lc->cu.pred_mode == MODE_INTRA;
2107         }
2108
2109         if (lc->cu.pred_mode == MODE_INTRA) {
2110             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2111                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2112                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2113                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2114             }
2115             if (pcm_flag) {
2116                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2117                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2118                 if (s->sps->pcm.loop_filter_disable_flag)
2119                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2120
2121                 if (ret < 0)
2122                     return ret;
2123             } else {
2124                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2125             }
2126         } else {
2127             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2128             switch (lc->cu.part_mode) {
2129             case PART_2Nx2N:
2130                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2131                 break;
2132             case PART_2NxN:
2133                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2134                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2135                 break;
2136             case PART_Nx2N:
2137                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2138                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2139                 break;
2140             case PART_2NxnU:
2141                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2142                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2143                 break;
2144             case PART_2NxnD:
2145                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2146                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2147                 break;
2148             case PART_nLx2N:
2149                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2150                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2151                 break;
2152             case PART_nRx2N:
2153                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2154                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2155                 break;
2156             case PART_NxN:
2157                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2158                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2159                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2160                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2161                 break;
2162             }
2163         }
2164
2165         if (!pcm_flag) {
2166             int rqt_root_cbf = 1;
2167
2168             if (lc->cu.pred_mode != MODE_INTRA &&
2169                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2170                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2171             }
2172             if (rqt_root_cbf) {
2173                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2174                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2175                                          s->sps->max_transform_hierarchy_depth_inter;
2176                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2177                                          log2_cb_size,
2178                                          log2_cb_size, 0, 0, 0, 0);
2179                 if (ret < 0)
2180                     return ret;
2181             } else {
2182                 if (!s->sh.disable_deblocking_filter_flag)
2183                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2184             }
2185         }
2186     }
2187
2188     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2189         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2190
2191     x = y_cb * min_cb_width + x_cb;
2192     for (y = 0; y < length; y++) {
2193         memset(&s->qp_y_tab[x], lc->qp_y, length);
2194         x += min_cb_width;
2195     }
2196
2197     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2198
2199     return 0;
2200 }
2201
2202 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2203                                int log2_cb_size, int cb_depth)
2204 {
2205     HEVCLocalContext *lc = &s->HEVClc;
2206     const int cb_size    = 1 << log2_cb_size;
2207     int split_cu;
2208
2209     lc->ct.depth = cb_depth;
2210     if (x0 + cb_size <= s->sps->width  &&
2211         y0 + cb_size <= s->sps->height &&
2212         log2_cb_size > s->sps->log2_min_cb_size) {
2213         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2214     } else {
2215         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2216     }
2217     if (s->pps->cu_qp_delta_enabled_flag &&
2218         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2219         lc->tu.is_cu_qp_delta_coded = 0;
2220         lc->tu.cu_qp_delta          = 0;
2221     }
2222
2223     if (split_cu) {
2224         const int cb_size_split = cb_size >> 1;
2225         const int x1 = x0 + cb_size_split;
2226         const int y1 = y0 + cb_size_split;
2227
2228         log2_cb_size--;
2229         cb_depth++;
2230
2231 #define SUBDIVIDE(x, y)                                                \
2232 do {                                                                   \
2233     if (x < s->sps->width && y < s->sps->height) {                     \
2234         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2235         if (ret < 0)                                                   \
2236             return ret;                                                \
2237     }                                                                  \
2238 } while (0)
2239
2240         SUBDIVIDE(x0, y0);
2241         SUBDIVIDE(x1, y0);
2242         SUBDIVIDE(x0, y1);
2243         SUBDIVIDE(x1, y1);
2244     } else {
2245         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2246         if (ret < 0)
2247             return ret;
2248     }
2249
2250     return 0;
2251 }
2252
2253 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2254                                  int ctb_addr_ts)
2255 {
2256     HEVCLocalContext *lc  = &s->HEVClc;
2257     int ctb_size          = 1 << s->sps->log2_ctb_size;
2258     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2259     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2260
2261     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2262
2263     if (s->pps->entropy_coding_sync_enabled_flag) {
2264         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2265             lc->first_qp_group = 1;
2266         lc->end_of_tiles_x = s->sps->width;
2267     } else if (s->pps->tiles_enabled_flag) {
2268         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2269             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2270             lc->start_of_tiles_x = x_ctb;
2271             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2272             lc->first_qp_group   = 1;
2273         }
2274     } else {
2275         lc->end_of_tiles_x = s->sps->width;
2276     }
2277
2278     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2279
2280     lc->boundary_flags = 0;
2281     if (s->pps->tiles_enabled_flag) {
2282         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2283             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2284         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2285             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2286         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2287             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2288         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2289             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2290     } else {
2291         if (!ctb_addr_in_slice > 0)
2292             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2293         if (ctb_addr_in_slice < s->sps->ctb_width)
2294             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2295     }
2296
2297     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2298     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2299     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2300     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2301 }
2302
2303 static int hls_slice_data(HEVCContext *s)
2304 {
2305     int ctb_size    = 1 << s->sps->log2_ctb_size;
2306     int more_data   = 1;
2307     int x_ctb       = 0;
2308     int y_ctb       = 0;
2309     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2310     int ret;
2311
2312     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2313         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2314
2315         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2316         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2317         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2318
2319         ff_hevc_cabac_init(s, ctb_addr_ts);
2320
2321         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2322
2323         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2324         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2325         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2326
2327         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2328         if (ret < 0)
2329             return ret;
2330         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2331
2332         ctb_addr_ts++;
2333         ff_hevc_save_states(s, ctb_addr_ts);
2334         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2335     }
2336
2337     if (x_ctb + ctb_size >= s->sps->width &&
2338         y_ctb + ctb_size >= s->sps->height)
2339         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2340
2341     return ctb_addr_ts;
2342 }
2343
2344 /**
2345  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2346  * 0 if the unit should be skipped, 1 otherwise
2347  */
2348 static int hls_nal_unit(HEVCContext *s)
2349 {
2350     GetBitContext *gb = &s->HEVClc.gb;
2351     int nuh_layer_id;
2352
2353     if (get_bits1(gb) != 0)
2354         return AVERROR_INVALIDDATA;
2355
2356     s->nal_unit_type = get_bits(gb, 6);
2357
2358     nuh_layer_id   = get_bits(gb, 6);
2359     s->temporal_id = get_bits(gb, 3) - 1;
2360     if (s->temporal_id < 0)
2361         return AVERROR_INVALIDDATA;
2362
2363     av_log(s->avctx, AV_LOG_DEBUG,
2364            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2365            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2366
2367     return nuh_layer_id == 0;
2368 }
2369
2370 static void restore_tqb_pixels(HEVCContext *s)
2371 {
2372     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2373     int x, y, c_idx;
2374
2375     for (c_idx = 0; c_idx < 3; c_idx++) {
2376         ptrdiff_t stride = s->frame->linesize[c_idx];
2377         int hshift       = s->sps->hshift[c_idx];
2378         int vshift       = s->sps->vshift[c_idx];
2379         for (y = 0; y < s->sps->min_pu_height; y++) {
2380             for (x = 0; x < s->sps->min_pu_width; x++) {
2381                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2382                     int n;
2383                     int len      = min_pu_size >> hshift;
2384                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2385                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2386                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2387                         memcpy(dst, src, len);
2388                         src += stride;
2389                         dst += stride;
2390                     }
2391                 }
2392             }
2393         }
2394     }
2395 }
2396
2397 static int set_side_data(HEVCContext *s)
2398 {
2399     AVFrame *out = s->ref->frame;
2400
2401     if (s->sei_frame_packing_present &&
2402         s->frame_packing_arrangement_type >= 3 &&
2403         s->frame_packing_arrangement_type <= 5 &&
2404         s->content_interpretation_type > 0 &&
2405         s->content_interpretation_type < 3) {
2406         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2407         if (!stereo)
2408             return AVERROR(ENOMEM);
2409
2410         switch (s->frame_packing_arrangement_type) {
2411         case 3:
2412             if (s->quincunx_subsampling)
2413                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2414             else
2415                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2416             break;
2417         case 4:
2418             stereo->type = AV_STEREO3D_TOPBOTTOM;
2419             break;
2420         case 5:
2421             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2422             break;
2423         }
2424
2425         if (s->content_interpretation_type == 2)
2426             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2427     }
2428
2429     if (s->sei_display_orientation_present &&
2430         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2431         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2432         AVFrameSideData *rotation = av_frame_new_side_data(out,
2433                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2434                                                            sizeof(int32_t) * 9);
2435         if (!rotation)
2436             return AVERROR(ENOMEM);
2437
2438         av_display_rotation_set((int32_t *)rotation->data, angle);
2439         av_display_matrix_flip((int32_t *)rotation->data,
2440                                s->sei_hflip, s->sei_vflip);
2441     }
2442
2443     return 0;
2444 }
2445
2446 static int hevc_frame_start(HEVCContext *s)
2447 {
2448     HEVCLocalContext *lc = &s->HEVClc;
2449     int ret;
2450
2451     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2452     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2453     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2454     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2455
2456     lc->start_of_tiles_x = 0;
2457     s->is_decoded        = 0;
2458     s->first_nal_type    = s->nal_unit_type;
2459
2460     if (s->pps->tiles_enabled_flag)
2461         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2462
2463     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2464                               s->poc);
2465     if (ret < 0)
2466         goto fail;
2467
2468     ret = ff_hevc_frame_rps(s);
2469     if (ret < 0) {
2470         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2471         goto fail;
2472     }
2473
2474     s->ref->frame->key_frame = IS_IRAP(s);
2475
2476     ret = set_side_data(s);
2477     if (ret < 0)
2478         goto fail;
2479
2480     av_frame_unref(s->output_frame);
2481     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2482     if (ret < 0)
2483         goto fail;
2484
2485     ff_thread_finish_setup(s->avctx);
2486
2487     return 0;
2488
2489 fail:
2490     if (s->ref)
2491         ff_hevc_unref_frame(s, s->ref, ~0);
2492     s->ref = NULL;
2493     return ret;
2494 }
2495
2496 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2497 {
2498     HEVCLocalContext *lc = &s->HEVClc;
2499     GetBitContext *gb    = &lc->gb;
2500     int ctb_addr_ts, ret;
2501
2502     ret = init_get_bits8(gb, nal->data, nal->size);
2503     if (ret < 0)
2504         return ret;
2505
2506     ret = hls_nal_unit(s);
2507     if (ret < 0) {
2508         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2509                s->nal_unit_type);
2510         goto fail;
2511     } else if (!ret)
2512         return 0;
2513
2514     switch (s->nal_unit_type) {
2515     case NAL_VPS:
2516         ret = ff_hevc_decode_nal_vps(s);
2517         if (ret < 0)
2518             goto fail;
2519         break;
2520     case NAL_SPS:
2521         ret = ff_hevc_decode_nal_sps(s);
2522         if (ret < 0)
2523             goto fail;
2524         break;
2525     case NAL_PPS:
2526         ret = ff_hevc_decode_nal_pps(s);
2527         if (ret < 0)
2528             goto fail;
2529         break;
2530     case NAL_SEI_PREFIX:
2531     case NAL_SEI_SUFFIX:
2532         ret = ff_hevc_decode_nal_sei(s);
2533         if (ret < 0)
2534             goto fail;
2535         break;
2536     case NAL_TRAIL_R:
2537     case NAL_TRAIL_N:
2538     case NAL_TSA_N:
2539     case NAL_TSA_R:
2540     case NAL_STSA_N:
2541     case NAL_STSA_R:
2542     case NAL_BLA_W_LP:
2543     case NAL_BLA_W_RADL:
2544     case NAL_BLA_N_LP:
2545     case NAL_IDR_W_RADL:
2546     case NAL_IDR_N_LP:
2547     case NAL_CRA_NUT:
2548     case NAL_RADL_N:
2549     case NAL_RADL_R:
2550     case NAL_RASL_N:
2551     case NAL_RASL_R:
2552         ret = hls_slice_header(s);
2553         if (ret < 0)
2554             return ret;
2555
2556         if (s->max_ra == INT_MAX) {
2557             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2558                 s->max_ra = s->poc;
2559             } else {
2560                 if (IS_IDR(s))
2561                     s->max_ra = INT_MIN;
2562             }
2563         }
2564
2565         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2566             s->poc <= s->max_ra) {
2567             s->is_decoded = 0;
2568             break;
2569         } else {
2570             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2571                 s->max_ra = INT_MIN;
2572         }
2573
2574         if (s->sh.first_slice_in_pic_flag) {
2575             ret = hevc_frame_start(s);
2576             if (ret < 0)
2577                 return ret;
2578         } else if (!s->ref) {
2579             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2580             goto fail;
2581         }
2582
2583         if (s->nal_unit_type != s->first_nal_type) {
2584             av_log(s->avctx, AV_LOG_ERROR,
2585                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2586                    s->first_nal_type, s->nal_unit_type);
2587             return AVERROR_INVALIDDATA;
2588         }
2589
2590         if (!s->sh.dependent_slice_segment_flag &&
2591             s->sh.slice_type != I_SLICE) {
2592             ret = ff_hevc_slice_rpl(s);
2593             if (ret < 0) {
2594                 av_log(s->avctx, AV_LOG_WARNING,
2595                        "Error constructing the reference lists for the current slice.\n");
2596                 goto fail;
2597             }
2598         }
2599
2600         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2601             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2602             if (ret < 0)
2603                 goto fail;
2604         }
2605
2606         if (s->avctx->hwaccel) {
2607             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2608             if (ret < 0)
2609                 goto fail;
2610         } else {
2611             ctb_addr_ts = hls_slice_data(s);
2612             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2613                 s->is_decoded = 1;
2614                 if ((s->pps->transquant_bypass_enable_flag ||
2615                      (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2616                     s->sps->sao_enabled)
2617                     restore_tqb_pixels(s);
2618             }
2619
2620             if (ctb_addr_ts < 0) {
2621                 ret = ctb_addr_ts;
2622                 goto fail;
2623             }
2624         }
2625         break;
2626     case NAL_EOS_NUT:
2627     case NAL_EOB_NUT:
2628         s->seq_decode = (s->seq_decode + 1) & 0xff;
2629         s->max_ra     = INT_MAX;
2630         break;
2631     case NAL_AUD:
2632     case NAL_FD_NUT:
2633         break;
2634     default:
2635         av_log(s->avctx, AV_LOG_INFO,
2636                "Skipping NAL unit %d\n", s->nal_unit_type);
2637     }
2638
2639     return 0;
2640 fail:
2641     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2642         return ret;
2643     return 0;
2644 }
2645
2646 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2647 {
2648     int i, consumed, ret = 0;
2649
2650     s->ref = NULL;
2651     s->eos = 0;
2652
2653     /* split the input packet into NAL units, so we know the upper bound on the
2654      * number of slices in the frame */
2655     s->nb_nals = 0;
2656     while (length >= 4) {
2657         HEVCNAL *nal;
2658         int extract_length = 0;
2659
2660         if (s->is_nalff) {
2661             int i;
2662             for (i = 0; i < s->nal_length_size; i++)
2663                 extract_length = (extract_length << 8) | buf[i];
2664             buf    += s->nal_length_size;
2665             length -= s->nal_length_size;
2666
2667             if (extract_length > length) {
2668                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2669                 ret = AVERROR_INVALIDDATA;
2670                 goto fail;
2671             }
2672         } else {
2673             if (buf[2] == 0) {
2674                 length--;
2675                 buf++;
2676                 continue;
2677             }
2678             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2679                 ret = AVERROR_INVALIDDATA;
2680                 goto fail;
2681             }
2682
2683             buf           += 3;
2684             length        -= 3;
2685             extract_length = length;
2686         }
2687
2688         if (s->nals_allocated < s->nb_nals + 1) {
2689             int new_size = s->nals_allocated + 1;
2690             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2691             if (!tmp) {
2692                 ret = AVERROR(ENOMEM);
2693                 goto fail;
2694             }
2695             s->nals = tmp;
2696             memset(s->nals + s->nals_allocated, 0,
2697                    (new_size - s->nals_allocated) * sizeof(*tmp));
2698             s->nals_allocated = new_size;
2699         }
2700         nal = &s->nals[s->nb_nals++];
2701
2702         consumed = ff_hevc_extract_rbsp(buf, extract_length, nal);
2703         if (consumed < 0) {
2704             ret = consumed;
2705             goto fail;
2706         }
2707
2708         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2709         if (ret < 0)
2710             goto fail;
2711         hls_nal_unit(s);
2712
2713         if (s->nal_unit_type == NAL_EOB_NUT ||
2714             s->nal_unit_type == NAL_EOS_NUT)
2715             s->eos = 1;
2716
2717         buf    += consumed;
2718         length -= consumed;
2719     }
2720
2721     /* parse the NAL units */
2722     for (i = 0; i < s->nb_nals; i++) {
2723         ret = decode_nal_unit(s, &s->nals[i]);
2724         if (ret < 0) {
2725             av_log(s->avctx, AV_LOG_WARNING,
2726                    "Error parsing NAL unit #%d.\n", i);
2727             goto fail;
2728         }
2729     }
2730
2731 fail:
2732     if (s->ref)
2733         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2734
2735     return ret;
2736 }
2737
2738 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2739 {
2740     int i;
2741     for (i = 0; i < 16; i++)
2742         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2743 }
2744
2745 static int verify_md5(HEVCContext *s, AVFrame *frame)
2746 {
2747     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2748     int pixel_shift;
2749     int i, j;
2750
2751     if (!desc)
2752         return AVERROR(EINVAL);
2753
2754     pixel_shift = desc->comp[0].depth_minus1 > 7;
2755
2756     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2757            s->poc);
2758
2759     /* the checksums are LE, so we have to byteswap for >8bpp formats
2760      * on BE arches */
2761 #if HAVE_BIGENDIAN
2762     if (pixel_shift && !s->checksum_buf) {
2763         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2764                        FFMAX3(frame->linesize[0], frame->linesize[1],
2765                               frame->linesize[2]));
2766         if (!s->checksum_buf)
2767             return AVERROR(ENOMEM);
2768     }
2769 #endif
2770
2771     for (i = 0; frame->data[i]; i++) {
2772         int width  = s->avctx->coded_width;
2773         int height = s->avctx->coded_height;
2774         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2775         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2776         uint8_t md5[16];
2777
2778         av_md5_init(s->md5_ctx);
2779         for (j = 0; j < h; j++) {
2780             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2781 #if HAVE_BIGENDIAN
2782             if (pixel_shift) {
2783                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2784                                     (const uint16_t *) src, w);
2785                 src = s->checksum_buf;
2786             }
2787 #endif
2788             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2789         }
2790         av_md5_final(s->md5_ctx, md5);
2791
2792         if (!memcmp(md5, s->md5[i], 16)) {
2793             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2794             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2795             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2796         } else {
2797             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2798             print_md5(s->avctx, AV_LOG_ERROR, md5);
2799             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2800             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2801             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2802             return AVERROR_INVALIDDATA;
2803         }
2804     }
2805
2806     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2807
2808     return 0;
2809 }
2810
2811 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2812                              AVPacket *avpkt)
2813 {
2814     int ret;
2815     HEVCContext *s = avctx->priv_data;
2816
2817     if (!avpkt->size) {
2818         ret = ff_hevc_output_frame(s, data, 1);
2819         if (ret < 0)
2820             return ret;
2821
2822         *got_output = ret;
2823         return 0;
2824     }
2825
2826     s->ref = NULL;
2827     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2828     if (ret < 0)
2829         return ret;
2830
2831     if (avctx->hwaccel) {
2832         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2833             av_log(avctx, AV_LOG_ERROR,
2834                    "hardware accelerator failed to decode picture\n");
2835     } else {
2836         /* verify the SEI checksum */
2837         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2838             s->is_md5) {
2839             ret = verify_md5(s, s->ref->frame);
2840             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2841                 ff_hevc_unref_frame(s, s->ref, ~0);
2842                 return ret;
2843             }
2844         }
2845     }
2846     s->is_md5 = 0;
2847
2848     if (s->is_decoded) {
2849         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2850         s->is_decoded = 0;
2851     }
2852
2853     if (s->output_frame->buf[0]) {
2854         av_frame_move_ref(data, s->output_frame);
2855         *got_output = 1;
2856     }
2857
2858     return avpkt->size;
2859 }
2860
2861 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2862 {
2863     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2864     if (ret < 0)
2865         return ret;
2866
2867     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2868     if (!dst->tab_mvf_buf)
2869         goto fail;
2870     dst->tab_mvf = src->tab_mvf;
2871
2872     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2873     if (!dst->rpl_tab_buf)
2874         goto fail;
2875     dst->rpl_tab = src->rpl_tab;
2876
2877     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2878     if (!dst->rpl_buf)
2879         goto fail;
2880
2881     dst->poc        = src->poc;
2882     dst->ctb_count  = src->ctb_count;
2883     dst->window     = src->window;
2884     dst->flags      = src->flags;
2885     dst->sequence   = src->sequence;
2886
2887     if (src->hwaccel_picture_private) {
2888         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2889         if (!dst->hwaccel_priv_buf)
2890             goto fail;
2891         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2892     }
2893
2894     return 0;
2895 fail:
2896     ff_hevc_unref_frame(s, dst, ~0);
2897     return AVERROR(ENOMEM);
2898 }
2899
2900 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2901 {
2902     HEVCContext       *s = avctx->priv_data;
2903     int i;
2904
2905     pic_arrays_free(s);
2906
2907     av_freep(&s->md5_ctx);
2908
2909     av_frame_free(&s->tmp_frame);
2910     av_frame_free(&s->output_frame);
2911
2912     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2913         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2914         av_frame_free(&s->DPB[i].frame);
2915     }
2916
2917     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
2918         av_buffer_unref(&s->vps_list[i]);
2919     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
2920         av_buffer_unref(&s->sps_list[i]);
2921     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
2922         av_buffer_unref(&s->pps_list[i]);
2923
2924     for (i = 0; i < s->nals_allocated; i++)
2925         av_freep(&s->nals[i].rbsp_buffer);
2926     av_freep(&s->nals);
2927     s->nals_allocated = 0;
2928
2929     return 0;
2930 }
2931
2932 static av_cold int hevc_init_context(AVCodecContext *avctx)
2933 {
2934     HEVCContext *s = avctx->priv_data;
2935     int i;
2936
2937     s->avctx = avctx;
2938
2939     s->tmp_frame = av_frame_alloc();
2940     if (!s->tmp_frame)
2941         goto fail;
2942
2943     s->output_frame = av_frame_alloc();
2944     if (!s->output_frame)
2945         goto fail;
2946
2947     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2948         s->DPB[i].frame = av_frame_alloc();
2949         if (!s->DPB[i].frame)
2950             goto fail;
2951         s->DPB[i].tf.f = s->DPB[i].frame;
2952     }
2953
2954     s->max_ra = INT_MAX;
2955
2956     s->md5_ctx = av_md5_alloc();
2957     if (!s->md5_ctx)
2958         goto fail;
2959
2960     ff_bswapdsp_init(&s->bdsp);
2961
2962     s->context_initialized = 1;
2963
2964     return 0;
2965
2966 fail:
2967     hevc_decode_free(avctx);
2968     return AVERROR(ENOMEM);
2969 }
2970
2971 static int hevc_update_thread_context(AVCodecContext *dst,
2972                                       const AVCodecContext *src)
2973 {
2974     HEVCContext *s  = dst->priv_data;
2975     HEVCContext *s0 = src->priv_data;
2976     int i, ret;
2977
2978     if (!s->context_initialized) {
2979         ret = hevc_init_context(dst);
2980         if (ret < 0)
2981             return ret;
2982     }
2983
2984     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2985         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2986         if (s0->DPB[i].frame->buf[0]) {
2987             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2988             if (ret < 0)
2989                 return ret;
2990         }
2991     }
2992
2993     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
2994         av_buffer_unref(&s->vps_list[i]);
2995         if (s0->vps_list[i]) {
2996             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
2997             if (!s->vps_list[i])
2998                 return AVERROR(ENOMEM);
2999         }
3000     }
3001
3002     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3003         av_buffer_unref(&s->sps_list[i]);
3004         if (s0->sps_list[i]) {
3005             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3006             if (!s->sps_list[i])
3007                 return AVERROR(ENOMEM);
3008         }
3009     }
3010
3011     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3012         av_buffer_unref(&s->pps_list[i]);
3013         if (s0->pps_list[i]) {
3014             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3015             if (!s->pps_list[i])
3016                 return AVERROR(ENOMEM);
3017         }
3018     }
3019
3020     if (s->sps != s0->sps)
3021         ret = set_sps(s, s0->sps);
3022
3023     s->seq_decode = s0->seq_decode;
3024     s->seq_output = s0->seq_output;
3025     s->pocTid0    = s0->pocTid0;
3026     s->max_ra     = s0->max_ra;
3027
3028     s->is_nalff        = s0->is_nalff;
3029     s->nal_length_size = s0->nal_length_size;
3030
3031     if (s0->eos) {
3032         s->seq_decode = (s->seq_decode + 1) & 0xff;
3033         s->max_ra = INT_MAX;
3034     }
3035
3036     return 0;
3037 }
3038
3039 static int hevc_decode_extradata(HEVCContext *s)
3040 {
3041     AVCodecContext *avctx = s->avctx;
3042     GetByteContext gb;
3043     int ret, i;
3044
3045     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3046
3047     if (avctx->extradata_size > 3 &&
3048         (avctx->extradata[0] || avctx->extradata[1] ||
3049          avctx->extradata[2] > 1)) {
3050         /* It seems the extradata is encoded as hvcC format.
3051          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3052          * is finalized. When finalized, configurationVersion will be 1 and we
3053          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3054         int i, j, num_arrays, nal_len_size;
3055
3056         s->is_nalff = 1;
3057
3058         bytestream2_skip(&gb, 21);
3059         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3060         num_arrays   = bytestream2_get_byte(&gb);
3061
3062         /* nal units in the hvcC always have length coded with 2 bytes,
3063          * so put a fake nal_length_size = 2 while parsing them */
3064         s->nal_length_size = 2;
3065
3066         /* Decode nal units from hvcC. */
3067         for (i = 0; i < num_arrays; i++) {
3068             int type = bytestream2_get_byte(&gb) & 0x3f;
3069             int cnt  = bytestream2_get_be16(&gb);
3070
3071             for (j = 0; j < cnt; j++) {
3072                 // +2 for the nal size field
3073                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3074                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3075                     av_log(s->avctx, AV_LOG_ERROR,
3076                            "Invalid NAL unit size in extradata.\n");
3077                     return AVERROR_INVALIDDATA;
3078                 }
3079
3080                 ret = decode_nal_units(s, gb.buffer, nalsize);
3081                 if (ret < 0) {
3082                     av_log(avctx, AV_LOG_ERROR,
3083                            "Decoding nal unit %d %d from hvcC failed\n",
3084                            type, i);
3085                     return ret;
3086                 }
3087                 bytestream2_skip(&gb, nalsize);
3088             }
3089         }
3090
3091         /* Now store right nal length size, that will be used to parse
3092          * all other nals */
3093         s->nal_length_size = nal_len_size;
3094     } else {
3095         s->is_nalff = 0;
3096         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3097         if (ret < 0)
3098             return ret;
3099     }
3100
3101     /* export stream parameters from the first SPS */
3102     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3103         if (s->sps_list[i]) {
3104             const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
3105             export_stream_params(s->avctx, s, sps);
3106             break;
3107         }
3108     }
3109
3110     return 0;
3111 }
3112
3113 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3114 {
3115     HEVCContext *s = avctx->priv_data;
3116     int ret;
3117
3118     ff_init_cabac_states();
3119
3120     avctx->internal->allocate_progress = 1;
3121
3122     ret = hevc_init_context(avctx);
3123     if (ret < 0)
3124         return ret;
3125
3126     if (avctx->extradata_size > 0 && avctx->extradata) {
3127         ret = hevc_decode_extradata(s);
3128         if (ret < 0) {
3129             hevc_decode_free(avctx);
3130             return ret;
3131         }
3132     }
3133
3134     return 0;
3135 }
3136
3137 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3138 {
3139     HEVCContext *s = avctx->priv_data;
3140     int ret;
3141
3142     memset(s, 0, sizeof(*s));
3143
3144     ret = hevc_init_context(avctx);
3145     if (ret < 0)
3146         return ret;
3147
3148     return 0;
3149 }
3150
3151 static void hevc_decode_flush(AVCodecContext *avctx)
3152 {
3153     HEVCContext *s = avctx->priv_data;
3154     ff_hevc_flush_dpb(s);
3155     s->max_ra = INT_MAX;
3156 }
3157
3158 #define OFFSET(x) offsetof(HEVCContext, x)
3159 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3160
3161 static const AVProfile profiles[] = {
3162     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3163     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3164     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3165     { FF_PROFILE_UNKNOWN },
3166 };
3167
3168 static const AVOption options[] = {
3169     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3170         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3171     { NULL },
3172 };
3173
3174 static const AVClass hevc_decoder_class = {
3175     .class_name = "HEVC decoder",
3176     .item_name  = av_default_item_name,
3177     .option     = options,
3178     .version    = LIBAVUTIL_VERSION_INT,
3179 };
3180
3181 AVCodec ff_hevc_decoder = {
3182     .name                  = "hevc",
3183     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3184     .type                  = AVMEDIA_TYPE_VIDEO,
3185     .id                    = AV_CODEC_ID_HEVC,
3186     .priv_data_size        = sizeof(HEVCContext),
3187     .priv_class            = &hevc_decoder_class,
3188     .init                  = hevc_decode_init,
3189     .close                 = hevc_decode_free,
3190     .decode                = hevc_decode_frame,
3191     .flush                 = hevc_decode_flush,
3192     .update_thread_context = hevc_update_thread_context,
3193     .init_thread_copy      = hevc_init_thread_copy,
3194     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3195                              CODEC_CAP_FRAME_THREADS,
3196     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3197 };