]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
hevc: Use generic av_clip function, not C implementation
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40
41 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
42 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
43 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
44
45 static const uint8_t scan_1x1[1] = { 0 };
46
47 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
48
49 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
50
51 static const uint8_t horiz_scan4x4_x[16] = {
52     0, 1, 2, 3,
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56 };
57
58 static const uint8_t horiz_scan4x4_y[16] = {
59     0, 0, 0, 0,
60     1, 1, 1, 1,
61     2, 2, 2, 2,
62     3, 3, 3, 3,
63 };
64
65 static const uint8_t horiz_scan8x8_inv[8][8] = {
66     {  0,  1,  2,  3, 16, 17, 18, 19, },
67     {  4,  5,  6,  7, 20, 21, 22, 23, },
68     {  8,  9, 10, 11, 24, 25, 26, 27, },
69     { 12, 13, 14, 15, 28, 29, 30, 31, },
70     { 32, 33, 34, 35, 48, 49, 50, 51, },
71     { 36, 37, 38, 39, 52, 53, 54, 55, },
72     { 40, 41, 42, 43, 56, 57, 58, 59, },
73     { 44, 45, 46, 47, 60, 61, 62, 63, },
74 };
75
76 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
77
78 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
79
80 static const uint8_t diag_scan2x2_inv[2][2] = {
81     { 0, 2, },
82     { 1, 3, },
83 };
84
85 const uint8_t ff_hevc_diag_scan4x4_x[16] = {
86     0, 0, 1, 0,
87     1, 2, 0, 1,
88     2, 3, 1, 2,
89     3, 2, 3, 3,
90 };
91
92 const uint8_t ff_hevc_diag_scan4x4_y[16] = {
93     0, 1, 0, 2,
94     1, 0, 3, 2,
95     1, 0, 3, 2,
96     1, 3, 2, 3,
97 };
98
99 static const uint8_t diag_scan4x4_inv[4][4] = {
100     { 0,  2,  5,  9, },
101     { 1,  4,  8, 12, },
102     { 3,  7, 11, 14, },
103     { 6, 10, 13, 15, },
104 };
105
106 const uint8_t ff_hevc_diag_scan8x8_x[64] = {
107     0, 0, 1, 0,
108     1, 2, 0, 1,
109     2, 3, 0, 1,
110     2, 3, 4, 0,
111     1, 2, 3, 4,
112     5, 0, 1, 2,
113     3, 4, 5, 6,
114     0, 1, 2, 3,
115     4, 5, 6, 7,
116     1, 2, 3, 4,
117     5, 6, 7, 2,
118     3, 4, 5, 6,
119     7, 3, 4, 5,
120     6, 7, 4, 5,
121     6, 7, 5, 6,
122     7, 6, 7, 7,
123 };
124
125 const uint8_t ff_hevc_diag_scan8x8_y[64] = {
126     0, 1, 0, 2,
127     1, 0, 3, 2,
128     1, 0, 4, 3,
129     2, 1, 0, 5,
130     4, 3, 2, 1,
131     0, 6, 5, 4,
132     3, 2, 1, 0,
133     7, 6, 5, 4,
134     3, 2, 1, 0,
135     7, 6, 5, 4,
136     3, 2, 1, 7,
137     6, 5, 4, 3,
138     2, 7, 6, 5,
139     4, 3, 7, 6,
140     5, 4, 7, 6,
141     5, 7, 6, 7,
142 };
143
144 static const uint8_t diag_scan8x8_inv[8][8] = {
145     {  0,  2,  5,  9, 14, 20, 27, 35, },
146     {  1,  4,  8, 13, 19, 26, 34, 42, },
147     {  3,  7, 12, 18, 25, 33, 41, 48, },
148     {  6, 11, 17, 24, 32, 40, 47, 53, },
149     { 10, 16, 23, 31, 39, 46, 52, 57, },
150     { 15, 22, 30, 38, 45, 51, 56, 60, },
151     { 21, 29, 37, 44, 50, 55, 59, 62, },
152     { 28, 36, 43, 49, 54, 58, 61, 63, },
153 };
154
155 /**
156  * NOTE: Each function hls_foo correspond to the function foo in the
157  * specification (HLS stands for High Level Syntax).
158  */
159
160 /**
161  * Section 5.7
162  */
163
164 /* free everything allocated  by pic_arrays_init() */
165 static void pic_arrays_free(HEVCContext *s)
166 {
167     av_freep(&s->sao);
168     av_freep(&s->deblock);
169
170     av_freep(&s->skip_flag);
171     av_freep(&s->tab_ct_depth);
172
173     av_freep(&s->tab_ipm);
174     av_freep(&s->cbf_luma);
175     av_freep(&s->is_pcm);
176
177     av_freep(&s->qp_y_tab);
178     av_freep(&s->tab_slice_address);
179     av_freep(&s->filter_slice_edges);
180
181     av_freep(&s->horizontal_bs);
182     av_freep(&s->vertical_bs);
183
184     av_buffer_pool_uninit(&s->tab_mvf_pool);
185     av_buffer_pool_uninit(&s->rpl_tab_pool);
186 }
187
188 /* allocate arrays that depend on frame dimensions */
189 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
190 {
191     int log2_min_cb_size = sps->log2_min_cb_size;
192     int width            = sps->width;
193     int height           = sps->height;
194     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
195                            ((height >> log2_min_cb_size) + 1);
196     int ctb_count        = sps->ctb_width * sps->ctb_height;
197     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
198
199     s->bs_width  = width  >> 3;
200     s->bs_height = height >> 3;
201
202     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
203     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
204     if (!s->sao || !s->deblock)
205         goto fail;
206
207     s->skip_flag    = av_malloc(pic_size_in_ctb);
208     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
209     if (!s->skip_flag || !s->tab_ct_depth)
210         goto fail;
211
212     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
213     s->tab_ipm  = av_mallocz(min_pu_size);
214     s->is_pcm   = av_malloc(min_pu_size);
215     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
216         goto fail;
217
218     s->filter_slice_edges = av_malloc(ctb_count);
219     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
220                                       sizeof(*s->tab_slice_address));
221     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
222                                       sizeof(*s->qp_y_tab));
223     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
224         goto fail;
225
226     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
227     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
228     if (!s->horizontal_bs || !s->vertical_bs)
229         goto fail;
230
231     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
232                                           av_buffer_alloc);
233     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
234                                           av_buffer_allocz);
235     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
236         goto fail;
237
238     return 0;
239
240 fail:
241     pic_arrays_free(s);
242     return AVERROR(ENOMEM);
243 }
244
245 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
246 {
247     int i = 0;
248     int j = 0;
249     uint8_t luma_weight_l0_flag[16];
250     uint8_t chroma_weight_l0_flag[16];
251     uint8_t luma_weight_l1_flag[16];
252     uint8_t chroma_weight_l1_flag[16];
253
254     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
255     if (s->sps->chroma_format_idc != 0) {
256         int delta = get_se_golomb(gb);
257         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
258     }
259
260     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
261         luma_weight_l0_flag[i] = get_bits1(gb);
262         if (!luma_weight_l0_flag[i]) {
263             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
264             s->sh.luma_offset_l0[i] = 0;
265         }
266     }
267     if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
268         for (i = 0; i < s->sh.nb_refs[L0]; i++)
269             chroma_weight_l0_flag[i] = get_bits1(gb);
270     } else {
271         for (i = 0; i < s->sh.nb_refs[L0]; i++)
272             chroma_weight_l0_flag[i] = 0;
273     }
274     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
275         if (luma_weight_l0_flag[i]) {
276             int delta_luma_weight_l0 = get_se_golomb(gb);
277             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
278             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
279         }
280         if (chroma_weight_l0_flag[i]) {
281             for (j = 0; j < 2; j++) {
282                 int delta_chroma_weight_l0 = get_se_golomb(gb);
283                 int delta_chroma_offset_l0 = get_se_golomb(gb);
284                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
285                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
286                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
287             }
288         } else {
289             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
290             s->sh.chroma_offset_l0[i][0] = 0;
291             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
292             s->sh.chroma_offset_l0[i][1] = 0;
293         }
294     }
295     if (s->sh.slice_type == B_SLICE) {
296         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
297             luma_weight_l1_flag[i] = get_bits1(gb);
298             if (!luma_weight_l1_flag[i]) {
299                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
300                 s->sh.luma_offset_l1[i] = 0;
301             }
302         }
303         if (s->sps->chroma_format_idc != 0) {
304             for (i = 0; i < s->sh.nb_refs[L1]; i++)
305                 chroma_weight_l1_flag[i] = get_bits1(gb);
306         } else {
307             for (i = 0; i < s->sh.nb_refs[L1]; i++)
308                 chroma_weight_l1_flag[i] = 0;
309         }
310         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
311             if (luma_weight_l1_flag[i]) {
312                 int delta_luma_weight_l1 = get_se_golomb(gb);
313                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
314                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
315             }
316             if (chroma_weight_l1_flag[i]) {
317                 for (j = 0; j < 2; j++) {
318                     int delta_chroma_weight_l1 = get_se_golomb(gb);
319                     int delta_chroma_offset_l1 = get_se_golomb(gb);
320                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
321                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
322                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
323                 }
324             } else {
325                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
326                 s->sh.chroma_offset_l1[i][0] = 0;
327                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
328                 s->sh.chroma_offset_l1[i][1] = 0;
329             }
330         }
331     }
332 }
333
334 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
335 {
336     const HEVCSPS *sps = s->sps;
337     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
338     int prev_delta_msb = 0;
339     unsigned int nb_sps = 0, nb_sh;
340     int i;
341
342     rps->nb_refs = 0;
343     if (!sps->long_term_ref_pics_present_flag)
344         return 0;
345
346     if (sps->num_long_term_ref_pics_sps > 0)
347         nb_sps = get_ue_golomb_long(gb);
348     nb_sh = get_ue_golomb_long(gb);
349
350     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
351         return AVERROR_INVALIDDATA;
352
353     rps->nb_refs = nb_sh + nb_sps;
354
355     for (i = 0; i < rps->nb_refs; i++) {
356         uint8_t delta_poc_msb_present;
357
358         if (i < nb_sps) {
359             uint8_t lt_idx_sps = 0;
360
361             if (sps->num_long_term_ref_pics_sps > 1)
362                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
363
364             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
365             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
366         } else {
367             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
368             rps->used[i] = get_bits1(gb);
369         }
370
371         delta_poc_msb_present = get_bits1(gb);
372         if (delta_poc_msb_present) {
373             int delta = get_ue_golomb_long(gb);
374
375             if (i && i != nb_sps)
376                 delta += prev_delta_msb;
377
378             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
379             prev_delta_msb = delta;
380         }
381     }
382
383     return 0;
384 }
385
386 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
387 {
388     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL)
389     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
390     int ret;
391     unsigned int num = 0, den = 0;
392
393     pic_arrays_free(s);
394     ret = pic_arrays_init(s, sps);
395     if (ret < 0)
396         goto fail;
397
398     s->avctx->coded_width         = sps->width;
399     s->avctx->coded_height        = sps->height;
400     s->avctx->width               = sps->output_width;
401     s->avctx->height              = sps->output_height;
402     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
403
404     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
405 #if CONFIG_HEVC_DXVA2_HWACCEL
406         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
407 #endif
408     }
409
410     *fmt++ = sps->pix_fmt;
411     *fmt = AV_PIX_FMT_NONE;
412
413     ret = ff_get_format(s->avctx, pix_fmts);
414     if (ret < 0)
415         goto fail;
416     s->avctx->pix_fmt = ret;
417
418     ff_set_sar(s->avctx, sps->vui.sar);
419
420     if (sps->vui.video_signal_type_present_flag)
421         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
422                                                                : AVCOL_RANGE_MPEG;
423     else
424         s->avctx->color_range = AVCOL_RANGE_MPEG;
425
426     if (sps->vui.colour_description_present_flag) {
427         s->avctx->color_primaries = sps->vui.colour_primaries;
428         s->avctx->color_trc       = sps->vui.transfer_characteristic;
429         s->avctx->colorspace      = sps->vui.matrix_coeffs;
430     } else {
431         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
432         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
433         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
434     }
435
436     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
437     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
438     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
439
440     if (sps->sao_enabled && !s->avctx->hwaccel) {
441         av_frame_unref(s->tmp_frame);
442         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
443         if (ret < 0)
444             goto fail;
445         s->frame = s->tmp_frame;
446     }
447
448     s->sps = sps;
449     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
450
451     if (s->vps->vps_timing_info_present_flag) {
452         num = s->vps->vps_num_units_in_tick;
453         den = s->vps->vps_time_scale;
454     } else if (sps->vui.vui_timing_info_present_flag) {
455         num = sps->vui.vui_num_units_in_tick;
456         den = sps->vui.vui_time_scale;
457     }
458
459     if (num != 0 && den != 0)
460         av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
461                   num, den, 1 << 30);
462
463     return 0;
464
465 fail:
466     pic_arrays_free(s);
467     s->sps = NULL;
468     return ret;
469 }
470
471 static int hls_slice_header(HEVCContext *s)
472 {
473     GetBitContext *gb = &s->HEVClc.gb;
474     SliceHeader *sh   = &s->sh;
475     int i, ret;
476
477     // Coded parameters
478     sh->first_slice_in_pic_flag = get_bits1(gb);
479     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
480         s->seq_decode = (s->seq_decode + 1) & 0xff;
481         s->max_ra     = INT_MAX;
482         if (IS_IDR(s))
483             ff_hevc_clear_refs(s);
484     }
485     if (IS_IRAP(s))
486         sh->no_output_of_prior_pics_flag = get_bits1(gb);
487
488     sh->pps_id = get_ue_golomb_long(gb);
489     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
490         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
491         return AVERROR_INVALIDDATA;
492     }
493     if (!sh->first_slice_in_pic_flag &&
494         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
495         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
496         return AVERROR_INVALIDDATA;
497     }
498     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
499
500     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
501         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
502
503         ff_hevc_clear_refs(s);
504         ret = set_sps(s, s->sps);
505         if (ret < 0)
506             return ret;
507
508         s->seq_decode = (s->seq_decode + 1) & 0xff;
509         s->max_ra     = INT_MAX;
510     }
511
512     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
513     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
514
515     sh->dependent_slice_segment_flag = 0;
516     if (!sh->first_slice_in_pic_flag) {
517         int slice_address_length;
518
519         if (s->pps->dependent_slice_segments_enabled_flag)
520             sh->dependent_slice_segment_flag = get_bits1(gb);
521
522         slice_address_length = av_ceil_log2(s->sps->ctb_width *
523                                             s->sps->ctb_height);
524         sh->slice_segment_addr = get_bits(gb, slice_address_length);
525         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
526             av_log(s->avctx, AV_LOG_ERROR,
527                    "Invalid slice segment address: %u.\n",
528                    sh->slice_segment_addr);
529             return AVERROR_INVALIDDATA;
530         }
531
532         if (!sh->dependent_slice_segment_flag) {
533             sh->slice_addr = sh->slice_segment_addr;
534             s->slice_idx++;
535         }
536     } else {
537         sh->slice_segment_addr = sh->slice_addr = 0;
538         s->slice_idx           = 0;
539         s->slice_initialized   = 0;
540     }
541
542     if (!sh->dependent_slice_segment_flag) {
543         s->slice_initialized = 0;
544
545         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
546             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
547
548         sh->slice_type = get_ue_golomb_long(gb);
549         if (!(sh->slice_type == I_SLICE ||
550               sh->slice_type == P_SLICE ||
551               sh->slice_type == B_SLICE)) {
552             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
553                    sh->slice_type);
554             return AVERROR_INVALIDDATA;
555         }
556         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
557             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
558             return AVERROR_INVALIDDATA;
559         }
560
561         // when flag is not present, picture is inferred to be output
562         sh->pic_output_flag = 1;
563         if (s->pps->output_flag_present_flag)
564             sh->pic_output_flag = get_bits1(gb);
565
566         if (s->sps->separate_colour_plane_flag)
567             sh->colour_plane_id = get_bits(gb, 2);
568
569         if (!IS_IDR(s)) {
570             int poc;
571
572             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
573             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
574             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
575                 av_log(s->avctx, AV_LOG_WARNING,
576                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
577                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
578                     return AVERROR_INVALIDDATA;
579                 poc = s->poc;
580             }
581             s->poc = poc;
582
583             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
584             if (!sh->short_term_ref_pic_set_sps_flag) {
585                 int pos = get_bits_left(gb);
586                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
587                 if (ret < 0)
588                     return ret;
589
590                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
591                 sh->short_term_rps = &sh->slice_rps;
592             } else {
593                 int numbits, rps_idx;
594
595                 if (!s->sps->nb_st_rps) {
596                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
597                     return AVERROR_INVALIDDATA;
598                 }
599
600                 numbits = av_ceil_log2(s->sps->nb_st_rps);
601                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
602                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
603             }
604
605             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
606             if (ret < 0) {
607                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
608                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
609                     return AVERROR_INVALIDDATA;
610             }
611
612             if (s->sps->sps_temporal_mvp_enabled_flag)
613                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
614             else
615                 sh->slice_temporal_mvp_enabled_flag = 0;
616         } else {
617             s->sh.short_term_rps = NULL;
618             s->poc               = 0;
619         }
620
621         /* 8.3.1 */
622         if (s->temporal_id == 0 &&
623             s->nal_unit_type != NAL_TRAIL_N &&
624             s->nal_unit_type != NAL_TSA_N   &&
625             s->nal_unit_type != NAL_STSA_N  &&
626             s->nal_unit_type != NAL_RADL_N  &&
627             s->nal_unit_type != NAL_RADL_R  &&
628             s->nal_unit_type != NAL_RASL_N  &&
629             s->nal_unit_type != NAL_RASL_R)
630             s->pocTid0 = s->poc;
631
632         if (s->sps->sao_enabled) {
633             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
634             sh->slice_sample_adaptive_offset_flag[1] =
635             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
636         } else {
637             sh->slice_sample_adaptive_offset_flag[0] = 0;
638             sh->slice_sample_adaptive_offset_flag[1] = 0;
639             sh->slice_sample_adaptive_offset_flag[2] = 0;
640         }
641
642         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
643         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
644             int nb_refs;
645
646             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
647             if (sh->slice_type == B_SLICE)
648                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
649
650             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
651                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
652                 if (sh->slice_type == B_SLICE)
653                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
654             }
655             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
656                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
657                        sh->nb_refs[L0], sh->nb_refs[L1]);
658                 return AVERROR_INVALIDDATA;
659             }
660
661             sh->rpl_modification_flag[0] = 0;
662             sh->rpl_modification_flag[1] = 0;
663             nb_refs = ff_hevc_frame_nb_refs(s);
664             if (!nb_refs) {
665                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
666                 return AVERROR_INVALIDDATA;
667             }
668
669             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
670                 sh->rpl_modification_flag[0] = get_bits1(gb);
671                 if (sh->rpl_modification_flag[0]) {
672                     for (i = 0; i < sh->nb_refs[L0]; i++)
673                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
674                 }
675
676                 if (sh->slice_type == B_SLICE) {
677                     sh->rpl_modification_flag[1] = get_bits1(gb);
678                     if (sh->rpl_modification_flag[1] == 1)
679                         for (i = 0; i < sh->nb_refs[L1]; i++)
680                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
681                 }
682             }
683
684             if (sh->slice_type == B_SLICE)
685                 sh->mvd_l1_zero_flag = get_bits1(gb);
686
687             if (s->pps->cabac_init_present_flag)
688                 sh->cabac_init_flag = get_bits1(gb);
689             else
690                 sh->cabac_init_flag = 0;
691
692             sh->collocated_ref_idx = 0;
693             if (sh->slice_temporal_mvp_enabled_flag) {
694                 sh->collocated_list = L0;
695                 if (sh->slice_type == B_SLICE)
696                     sh->collocated_list = !get_bits1(gb);
697
698                 if (sh->nb_refs[sh->collocated_list] > 1) {
699                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
700                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
701                         av_log(s->avctx, AV_LOG_ERROR,
702                                "Invalid collocated_ref_idx: %d.\n",
703                                sh->collocated_ref_idx);
704                         return AVERROR_INVALIDDATA;
705                     }
706                 }
707             }
708
709             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
710                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
711                 pred_weight_table(s, gb);
712             }
713
714             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
715             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
716                 av_log(s->avctx, AV_LOG_ERROR,
717                        "Invalid number of merging MVP candidates: %d.\n",
718                        sh->max_num_merge_cand);
719                 return AVERROR_INVALIDDATA;
720             }
721         }
722
723         sh->slice_qp_delta = get_se_golomb(gb);
724
725         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
726             sh->slice_cb_qp_offset = get_se_golomb(gb);
727             sh->slice_cr_qp_offset = get_se_golomb(gb);
728         } else {
729             sh->slice_cb_qp_offset = 0;
730             sh->slice_cr_qp_offset = 0;
731         }
732
733         if (s->pps->deblocking_filter_control_present_flag) {
734             int deblocking_filter_override_flag = 0;
735
736             if (s->pps->deblocking_filter_override_enabled_flag)
737                 deblocking_filter_override_flag = get_bits1(gb);
738
739             if (deblocking_filter_override_flag) {
740                 sh->disable_deblocking_filter_flag = get_bits1(gb);
741                 if (!sh->disable_deblocking_filter_flag) {
742                     sh->beta_offset = get_se_golomb(gb) * 2;
743                     sh->tc_offset   = get_se_golomb(gb) * 2;
744                 }
745             } else {
746                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
747                 sh->beta_offset                    = s->pps->beta_offset;
748                 sh->tc_offset                      = s->pps->tc_offset;
749             }
750         } else {
751             sh->disable_deblocking_filter_flag = 0;
752             sh->beta_offset                    = 0;
753             sh->tc_offset                      = 0;
754         }
755
756         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
757             (sh->slice_sample_adaptive_offset_flag[0] ||
758              sh->slice_sample_adaptive_offset_flag[1] ||
759              !sh->disable_deblocking_filter_flag)) {
760             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
761         } else {
762             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
763         }
764     } else if (!s->slice_initialized) {
765         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
766         return AVERROR_INVALIDDATA;
767     }
768
769     sh->num_entry_point_offsets = 0;
770     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
771         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
772         if (sh->num_entry_point_offsets > 0) {
773             int offset_len = get_ue_golomb_long(gb) + 1;
774
775             for (i = 0; i < sh->num_entry_point_offsets; i++)
776                 skip_bits(gb, offset_len);
777         }
778     }
779
780     if (s->pps->slice_header_extension_present_flag) {
781         unsigned int length = get_ue_golomb_long(gb);
782         for (i = 0; i < length; i++)
783             skip_bits(gb, 8);  // slice_header_extension_data_byte
784     }
785
786     // Inferred parameters
787     sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
788     if (sh->slice_qp > 51 ||
789         sh->slice_qp < -s->sps->qp_bd_offset) {
790         av_log(s->avctx, AV_LOG_ERROR,
791                "The slice_qp %d is outside the valid range "
792                "[%d, 51].\n",
793                sh->slice_qp,
794                -s->sps->qp_bd_offset);
795         return AVERROR_INVALIDDATA;
796     }
797
798     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
799
800     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
801         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
802         return AVERROR_INVALIDDATA;
803     }
804
805     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
806
807     if (!s->pps->cu_qp_delta_enabled_flag)
808         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
809                                 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
810
811     s->slice_initialized = 1;
812
813     return 0;
814 }
815
816 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
817
818 #define SET_SAO(elem, value)                            \
819 do {                                                    \
820     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
821         sao->elem = value;                              \
822     else if (sao_merge_left_flag)                       \
823         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
824     else if (sao_merge_up_flag)                         \
825         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
826     else                                                \
827         sao->elem = 0;                                  \
828 } while (0)
829
830 static void hls_sao_param(HEVCContext *s, int rx, int ry)
831 {
832     HEVCLocalContext *lc    = &s->HEVClc;
833     int sao_merge_left_flag = 0;
834     int sao_merge_up_flag   = 0;
835     int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
836     SAOParams *sao          = &CTB(s->sao, rx, ry);
837     int c_idx, i;
838
839     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
840         s->sh.slice_sample_adaptive_offset_flag[1]) {
841         if (rx > 0) {
842             if (lc->ctb_left_flag)
843                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
844         }
845         if (ry > 0 && !sao_merge_left_flag) {
846             if (lc->ctb_up_flag)
847                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
848         }
849     }
850
851     for (c_idx = 0; c_idx < 3; c_idx++) {
852         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
853             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
854             continue;
855         }
856
857         if (c_idx == 2) {
858             sao->type_idx[2] = sao->type_idx[1];
859             sao->eo_class[2] = sao->eo_class[1];
860         } else {
861             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
862         }
863
864         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
865             continue;
866
867         for (i = 0; i < 4; i++)
868             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
869
870         if (sao->type_idx[c_idx] == SAO_BAND) {
871             for (i = 0; i < 4; i++) {
872                 if (sao->offset_abs[c_idx][i]) {
873                     SET_SAO(offset_sign[c_idx][i],
874                             ff_hevc_sao_offset_sign_decode(s));
875                 } else {
876                     sao->offset_sign[c_idx][i] = 0;
877                 }
878             }
879             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
880         } else if (c_idx != 2) {
881             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
882         }
883
884         // Inferred parameters
885         sao->offset_val[c_idx][0] = 0;
886         for (i = 0; i < 4; i++) {
887             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
888             if (sao->type_idx[c_idx] == SAO_EDGE) {
889                 if (i > 1)
890                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
891             } else if (sao->offset_sign[c_idx][i]) {
892                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
893             }
894         }
895     }
896 }
897
898 #undef SET_SAO
899 #undef CTB
900
901 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
902                                 int log2_trafo_size, enum ScanType scan_idx,
903                                 int c_idx)
904 {
905 #define GET_COORD(offset, n)                                    \
906     do {                                                        \
907         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
908         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
909     } while (0)
910     HEVCLocalContext *lc    = &s->HEVClc;
911     int transform_skip_flag = 0;
912
913     int last_significant_coeff_x, last_significant_coeff_y;
914     int last_scan_pos;
915     int n_end;
916     int num_coeff    = 0;
917     int greater1_ctx = 1;
918
919     int num_last_subset;
920     int x_cg_last_sig, y_cg_last_sig;
921
922     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
923
924     ptrdiff_t stride = s->frame->linesize[c_idx];
925     int hshift       = s->sps->hshift[c_idx];
926     int vshift       = s->sps->vshift[c_idx];
927     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
928                                               ((x0 >> hshift) << s->sps->pixel_shift)];
929     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
930     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
931
932     int trafo_size = 1 << log2_trafo_size;
933     int i, qp, shift, add, scale, scale_m;
934     const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
935     const uint8_t *scale_matrix;
936     uint8_t dc_scale;
937
938     // Derive QP for dequant
939     if (!lc->cu.cu_transquant_bypass_flag) {
940         static const int qp_c[] = {
941             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
942         };
943
944         static const uint8_t rem6[51 + 2 * 6 + 1] = {
945             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
946             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
947             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
948         };
949
950         static const uint8_t div6[51 + 2 * 6 + 1] = {
951             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
952             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
953             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
954         };
955         int qp_y = lc->qp_y;
956
957         if (c_idx == 0) {
958             qp = qp_y + s->sps->qp_bd_offset;
959         } else {
960             int qp_i, offset;
961
962             if (c_idx == 1)
963                 offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
964             else
965                 offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
966
967             qp_i = av_clip(qp_y + offset, -s->sps->qp_bd_offset, 57);
968             if (qp_i < 30)
969                 qp = qp_i;
970             else if (qp_i > 43)
971                 qp = qp_i - 6;
972             else
973                 qp = qp_c[qp_i - 30];
974
975             qp += s->sps->qp_bd_offset;
976         }
977
978         shift    = s->sps->bit_depth + log2_trafo_size - 5;
979         add      = 1 << (shift - 1);
980         scale    = level_scale[rem6[qp]] << (div6[qp]);
981         scale_m  = 16; // default when no custom scaling lists.
982         dc_scale = 16;
983
984         if (s->sps->scaling_list_enable_flag) {
985             const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
986                                     &s->pps->scaling_list : &s->sps->scaling_list;
987             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
988
989             if (log2_trafo_size != 5)
990                 matrix_id = 3 * matrix_id + c_idx;
991
992             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
993             if (log2_trafo_size >= 4)
994                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
995         }
996     }
997
998     if (s->pps->transform_skip_enabled_flag &&
999         !lc->cu.cu_transquant_bypass_flag   &&
1000         log2_trafo_size == 2) {
1001         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
1002     }
1003
1004     last_significant_coeff_x =
1005         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
1006     last_significant_coeff_y =
1007         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
1008
1009     if (last_significant_coeff_x > 3) {
1010         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
1011         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
1012                                    (2 + (last_significant_coeff_x & 1)) +
1013                                    suffix;
1014     }
1015
1016     if (last_significant_coeff_y > 3) {
1017         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
1018         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
1019                                    (2 + (last_significant_coeff_y & 1)) +
1020                                    suffix;
1021     }
1022
1023     if (scan_idx == SCAN_VERT)
1024         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1025
1026     x_cg_last_sig = last_significant_coeff_x >> 2;
1027     y_cg_last_sig = last_significant_coeff_y >> 2;
1028
1029     switch (scan_idx) {
1030     case SCAN_DIAG: {
1031         int last_x_c = last_significant_coeff_x & 3;
1032         int last_y_c = last_significant_coeff_y & 3;
1033
1034         scan_x_off = ff_hevc_diag_scan4x4_x;
1035         scan_y_off = ff_hevc_diag_scan4x4_y;
1036         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1037         if (trafo_size == 4) {
1038             scan_x_cg = scan_1x1;
1039             scan_y_cg = scan_1x1;
1040         } else if (trafo_size == 8) {
1041             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1042             scan_x_cg  = diag_scan2x2_x;
1043             scan_y_cg  = diag_scan2x2_y;
1044         } else if (trafo_size == 16) {
1045             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1046             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1047             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1048         } else { // trafo_size == 32
1049             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1050             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1051             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1052         }
1053         break;
1054     }
1055     case SCAN_HORIZ:
1056         scan_x_cg  = horiz_scan2x2_x;
1057         scan_y_cg  = horiz_scan2x2_y;
1058         scan_x_off = horiz_scan4x4_x;
1059         scan_y_off = horiz_scan4x4_y;
1060         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1061         break;
1062     default: //SCAN_VERT
1063         scan_x_cg  = horiz_scan2x2_y;
1064         scan_y_cg  = horiz_scan2x2_x;
1065         scan_x_off = horiz_scan4x4_y;
1066         scan_y_off = horiz_scan4x4_x;
1067         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1068         break;
1069     }
1070     num_coeff++;
1071     num_last_subset = (num_coeff - 1) >> 4;
1072
1073     for (i = num_last_subset; i >= 0; i--) {
1074         int n, m;
1075         int x_cg, y_cg, x_c, y_c;
1076         int implicit_non_zero_coeff = 0;
1077         int64_t trans_coeff_level;
1078         int prev_sig = 0;
1079         int offset   = i << 4;
1080
1081         uint8_t significant_coeff_flag_idx[16];
1082         uint8_t nb_significant_coeff_flag = 0;
1083
1084         x_cg = scan_x_cg[i];
1085         y_cg = scan_y_cg[i];
1086
1087         if (i < num_last_subset && i > 0) {
1088             int ctx_cg = 0;
1089             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1090                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1091             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1092                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1093
1094             significant_coeff_group_flag[x_cg][y_cg] =
1095                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1096             implicit_non_zero_coeff = 1;
1097         } else {
1098             significant_coeff_group_flag[x_cg][y_cg] =
1099                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1100                  (x_cg == 0 && y_cg == 0));
1101         }
1102
1103         last_scan_pos = num_coeff - offset - 1;
1104
1105         if (i == num_last_subset) {
1106             n_end                         = last_scan_pos - 1;
1107             significant_coeff_flag_idx[0] = last_scan_pos;
1108             nb_significant_coeff_flag     = 1;
1109         } else {
1110             n_end = 15;
1111         }
1112
1113         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1114             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1115         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1116             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1117
1118         for (n = n_end; n >= 0; n--) {
1119             GET_COORD(offset, n);
1120
1121             if (significant_coeff_group_flag[x_cg][y_cg] &&
1122                 (n > 0 || implicit_non_zero_coeff == 0)) {
1123                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1124                                                           log2_trafo_size,
1125                                                           scan_idx,
1126                                                           prev_sig) == 1) {
1127                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1128                     nb_significant_coeff_flag++;
1129                     implicit_non_zero_coeff = 0;
1130                 }
1131             } else {
1132                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1133                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1134                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1135                     nb_significant_coeff_flag++;
1136                 }
1137             }
1138         }
1139
1140         n_end = nb_significant_coeff_flag;
1141
1142         if (n_end) {
1143             int first_nz_pos_in_cg = 16;
1144             int last_nz_pos_in_cg = -1;
1145             int c_rice_param = 0;
1146             int first_greater1_coeff_idx = -1;
1147             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1148             uint16_t coeff_sign_flag;
1149             int sum_abs = 0;
1150             int sign_hidden = 0;
1151
1152             // initialize first elem of coeff_bas_level_greater1_flag
1153             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1154
1155             if (!(i == num_last_subset) && greater1_ctx == 0)
1156                 ctx_set++;
1157             greater1_ctx      = 1;
1158             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1159
1160             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1161                 int n_idx = significant_coeff_flag_idx[m];
1162                 int inc   = (ctx_set << 2) + greater1_ctx;
1163                 coeff_abs_level_greater1_flag[n_idx] =
1164                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1165                 if (coeff_abs_level_greater1_flag[n_idx]) {
1166                     greater1_ctx = 0;
1167                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1168                     greater1_ctx++;
1169                 }
1170
1171                 if (coeff_abs_level_greater1_flag[n_idx] &&
1172                     first_greater1_coeff_idx == -1)
1173                     first_greater1_coeff_idx = n_idx;
1174             }
1175             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1176             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1177                                  !lc->cu.cu_transquant_bypass_flag;
1178
1179             if (first_greater1_coeff_idx != -1) {
1180                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1181             }
1182             if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
1183                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1184             } else {
1185                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1186             }
1187
1188             for (m = 0; m < n_end; m++) {
1189                 n = significant_coeff_flag_idx[m];
1190                 GET_COORD(offset, n);
1191                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1192                 if (trans_coeff_level == ((m < 8) ?
1193                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1194                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1195
1196                     trans_coeff_level += last_coeff_abs_level_remaining;
1197                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1198                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1199                 }
1200                 if (s->pps->sign_data_hiding_flag && sign_hidden) {
1201                     sum_abs += trans_coeff_level;
1202                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1203                         trans_coeff_level = -trans_coeff_level;
1204                 }
1205                 if (coeff_sign_flag >> 15)
1206                     trans_coeff_level = -trans_coeff_level;
1207                 coeff_sign_flag <<= 1;
1208                 if (!lc->cu.cu_transquant_bypass_flag) {
1209                     if (s->sps->scaling_list_enable_flag) {
1210                         if (y_c || x_c || log2_trafo_size < 4) {
1211                             int pos;
1212                             switch (log2_trafo_size) {
1213                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1214                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1215                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1216                             default: pos = (y_c        << 2) +  x_c;
1217                             }
1218                             scale_m = scale_matrix[pos];
1219                         } else {
1220                             scale_m = dc_scale;
1221                         }
1222                     }
1223                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1224                     if(trans_coeff_level < 0) {
1225                         if((~trans_coeff_level) & 0xFffffffffff8000)
1226                             trans_coeff_level = -32768;
1227                     } else {
1228                         if (trans_coeff_level & 0xffffffffffff8000)
1229                             trans_coeff_level = 32767;
1230                     }
1231                 }
1232                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1233             }
1234         }
1235     }
1236
1237     if (lc->cu.cu_transquant_bypass_flag) {
1238         s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
1239     } else {
1240         if (transform_skip_flag)
1241             s->hevcdsp.transform_skip(dst, coeffs, stride);
1242         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1243                  log2_trafo_size == 2)
1244             s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
1245         else
1246             s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
1247     }
1248 }
1249
1250 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1251                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1252                               int log2_cb_size, int log2_trafo_size,
1253                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1254 {
1255     HEVCLocalContext *lc = &s->HEVClc;
1256
1257     if (lc->cu.pred_mode == MODE_INTRA) {
1258         int trafo_size = 1 << log2_trafo_size;
1259         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1260
1261         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1262         if (log2_trafo_size > 2) {
1263             trafo_size = trafo_size << (s->sps->hshift[1] - 1);
1264             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1265             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1266             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1267         } else if (blk_idx == 3) {
1268             trafo_size = trafo_size << s->sps->hshift[1];
1269             ff_hevc_set_neighbour_available(s, xBase, yBase,
1270                                             trafo_size, trafo_size);
1271             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1272             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1273         }
1274     }
1275
1276     if (cbf_luma || cbf_cb || cbf_cr) {
1277         int scan_idx   = SCAN_DIAG;
1278         int scan_idx_c = SCAN_DIAG;
1279
1280         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1281             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1282             if (lc->tu.cu_qp_delta != 0)
1283                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1284                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1285             lc->tu.is_cu_qp_delta_coded = 1;
1286
1287             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
1288                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
1289                 av_log(s->avctx, AV_LOG_ERROR,
1290                        "The cu_qp_delta %d is outside the valid range "
1291                        "[%d, %d].\n",
1292                        lc->tu.cu_qp_delta,
1293                        -(26 + s->sps->qp_bd_offset / 2),
1294                         (25 + s->sps->qp_bd_offset / 2));
1295                 return AVERROR_INVALIDDATA;
1296             }
1297
1298             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1299         }
1300
1301         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1302             if (lc->tu.cur_intra_pred_mode >= 6 &&
1303                 lc->tu.cur_intra_pred_mode <= 14) {
1304                 scan_idx = SCAN_VERT;
1305             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1306                        lc->tu.cur_intra_pred_mode <= 30) {
1307                 scan_idx = SCAN_HORIZ;
1308             }
1309
1310             if (lc->pu.intra_pred_mode_c >=  6 &&
1311                 lc->pu.intra_pred_mode_c <= 14) {
1312                 scan_idx_c = SCAN_VERT;
1313             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1314                        lc->pu.intra_pred_mode_c <= 30) {
1315                 scan_idx_c = SCAN_HORIZ;
1316             }
1317         }
1318
1319         if (cbf_luma)
1320             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1321         if (log2_trafo_size > 2) {
1322             if (cbf_cb)
1323                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1324             if (cbf_cr)
1325                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1326         } else if (blk_idx == 3) {
1327             if (cbf_cb)
1328                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1329             if (cbf_cr)
1330                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1331         }
1332     }
1333     return 0;
1334 }
1335
1336 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1337 {
1338     int cb_size          = 1 << log2_cb_size;
1339     int log2_min_pu_size = s->sps->log2_min_pu_size;
1340
1341     int min_pu_width     = s->sps->min_pu_width;
1342     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1343     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1344     int i, j;
1345
1346     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1347         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1348             s->is_pcm[i + j * min_pu_width] = 2;
1349 }
1350
1351 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1352                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1353                               int log2_cb_size, int log2_trafo_size,
1354                               int trafo_depth, int blk_idx,
1355                               int cbf_cb, int cbf_cr)
1356 {
1357     HEVCLocalContext *lc = &s->HEVClc;
1358     uint8_t split_transform_flag;
1359     int ret;
1360
1361     if (lc->cu.intra_split_flag) {
1362         if (trafo_depth == 1)
1363             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1364     } else {
1365         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1366     }
1367
1368     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1369         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1370         trafo_depth     < lc->cu.max_trafo_depth       &&
1371         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1372         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1373     } else {
1374         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1375                           lc->cu.pred_mode == MODE_INTER &&
1376                           lc->cu.part_mode != PART_2Nx2N &&
1377                           trafo_depth == 0;
1378
1379         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1380                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1381                                inter_split;
1382     }
1383
1384     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1385         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1386     else if (log2_trafo_size > 2 || trafo_depth == 0)
1387         cbf_cb = 0;
1388     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1389         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1390     else if (log2_trafo_size > 2 || trafo_depth == 0)
1391         cbf_cr = 0;
1392
1393     if (split_transform_flag) {
1394         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1395         const int x1 = x0 + trafo_size_split;
1396         const int y1 = y0 + trafo_size_split;
1397
1398 #define SUBDIVIDE(x, y, idx)                                                    \
1399 do {                                                                            \
1400     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1401                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1402                              cbf_cb, cbf_cr);                                   \
1403     if (ret < 0)                                                                \
1404         return ret;                                                             \
1405 } while (0)
1406
1407         SUBDIVIDE(x0, y0, 0);
1408         SUBDIVIDE(x1, y0, 1);
1409         SUBDIVIDE(x0, y1, 2);
1410         SUBDIVIDE(x1, y1, 3);
1411
1412 #undef SUBDIVIDE
1413     } else {
1414         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1415         int log2_min_tu_size = s->sps->log2_min_tb_size;
1416         int min_tu_width     = s->sps->min_tb_width;
1417         int cbf_luma         = 1;
1418
1419         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1420             cbf_cb || cbf_cr)
1421             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1422
1423         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1424                                  log2_cb_size, log2_trafo_size,
1425                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1426         if (ret < 0)
1427             return ret;
1428         // TODO: store cbf_luma somewhere else
1429         if (cbf_luma) {
1430             int i, j;
1431             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1432                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1433                     int x_tu = (x0 + j) >> log2_min_tu_size;
1434                     int y_tu = (y0 + i) >> log2_min_tu_size;
1435                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1436                 }
1437         }
1438         if (!s->sh.disable_deblocking_filter_flag) {
1439             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1440             if (s->pps->transquant_bypass_enable_flag &&
1441                 lc->cu.cu_transquant_bypass_flag)
1442                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1443         }
1444     }
1445     return 0;
1446 }
1447
1448 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1449 {
1450     //TODO: non-4:2:0 support
1451     HEVCLocalContext *lc = &s->HEVClc;
1452     GetBitContext gb;
1453     int cb_size   = 1 << log2_cb_size;
1454     int stride0   = s->frame->linesize[0];
1455     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1456     int   stride1 = s->frame->linesize[1];
1457     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1458     int   stride2 = s->frame->linesize[2];
1459     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1460
1461     int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
1462     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1463     int ret;
1464
1465     if (!s->sh.disable_deblocking_filter_flag)
1466         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1467
1468     ret = init_get_bits(&gb, pcm, length);
1469     if (ret < 0)
1470         return ret;
1471
1472     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
1473     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1474     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
1475     return 0;
1476 }
1477
1478 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1479 {
1480     HEVCLocalContext *lc = &s->HEVClc;
1481     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1482     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1483
1484     if (x)
1485         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1486     if (y)
1487         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1488
1489     switch (x) {
1490     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1491     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1492     case 0: lc->pu.mvd.x = 0;                               break;
1493     }
1494
1495     switch (y) {
1496     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1497     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1498     case 0: lc->pu.mvd.y = 0;                               break;
1499     }
1500 }
1501
1502 /**
1503  * 8.5.3.2.2.1 Luma sample interpolation process
1504  *
1505  * @param s HEVC decoding context
1506  * @param dst target buffer for block data at block position
1507  * @param dststride stride of the dst buffer
1508  * @param ref reference picture buffer at origin (0, 0)
1509  * @param mv motion vector (relative to block position) to get pixel data from
1510  * @param x_off horizontal position of block from origin (0, 0)
1511  * @param y_off vertical position of block from origin (0, 0)
1512  * @param block_w width of block
1513  * @param block_h height of block
1514  */
1515 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1516                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1517                     int block_w, int block_h)
1518 {
1519     HEVCLocalContext *lc = &s->HEVClc;
1520     uint8_t *src         = ref->data[0];
1521     ptrdiff_t srcstride  = ref->linesize[0];
1522     int pic_width        = s->sps->width;
1523     int pic_height       = s->sps->height;
1524
1525     int mx         = mv->x & 3;
1526     int my         = mv->y & 3;
1527     int extra_left = ff_hevc_qpel_extra_before[mx];
1528     int extra_top  = ff_hevc_qpel_extra_before[my];
1529
1530     x_off += mv->x >> 2;
1531     y_off += mv->y >> 2;
1532     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1533
1534     if (x_off < extra_left || y_off < extra_top ||
1535         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1536         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1537         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1538         int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
1539         int buf_offset = extra_top *
1540                          edge_emu_stride + (extra_left << s->sps->pixel_shift);
1541
1542         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1543                                  edge_emu_stride, srcstride,
1544                                  block_w + ff_hevc_qpel_extra[mx],
1545                                  block_h + ff_hevc_qpel_extra[my],
1546                                  x_off - extra_left, y_off - extra_top,
1547                                  pic_width, pic_height);
1548         src = lc->edge_emu_buffer + buf_offset;
1549         srcstride = edge_emu_stride;
1550     }
1551     s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
1552                                      block_h, lc->mc_buffer);
1553 }
1554
1555 /**
1556  * 8.5.3.2.2.2 Chroma sample interpolation process
1557  *
1558  * @param s HEVC decoding context
1559  * @param dst1 target buffer for block data at block position (U plane)
1560  * @param dst2 target buffer for block data at block position (V plane)
1561  * @param dststride stride of the dst1 and dst2 buffers
1562  * @param ref reference picture buffer at origin (0, 0)
1563  * @param mv motion vector (relative to block position) to get pixel data from
1564  * @param x_off horizontal position of block from origin (0, 0)
1565  * @param y_off vertical position of block from origin (0, 0)
1566  * @param block_w width of block
1567  * @param block_h height of block
1568  */
1569 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1570                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1571                       int x_off, int y_off, int block_w, int block_h)
1572 {
1573     HEVCLocalContext *lc = &s->HEVClc;
1574     uint8_t *src1        = ref->data[1];
1575     uint8_t *src2        = ref->data[2];
1576     ptrdiff_t src1stride = ref->linesize[1];
1577     ptrdiff_t src2stride = ref->linesize[2];
1578     int pic_width        = s->sps->width >> 1;
1579     int pic_height       = s->sps->height >> 1;
1580
1581     int mx = mv->x & 7;
1582     int my = mv->y & 7;
1583
1584     x_off += mv->x >> 3;
1585     y_off += mv->y >> 3;
1586     src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
1587     src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
1588
1589     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1590         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1591         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1592         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1593         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1594         int buf_offset1 = EPEL_EXTRA_BEFORE *
1595                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1596         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1597         int buf_offset2 = EPEL_EXTRA_BEFORE *
1598                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1599
1600         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1601                                  edge_emu_stride, src1stride,
1602                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1603                                  x_off - EPEL_EXTRA_BEFORE,
1604                                  y_off - EPEL_EXTRA_BEFORE,
1605                                  pic_width, pic_height);
1606
1607         src1 = lc->edge_emu_buffer + buf_offset1;
1608         src1stride = edge_emu_stride;
1609         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1610                                              block_w, block_h, mx, my, lc->mc_buffer);
1611
1612         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1613                                  edge_emu_stride, src2stride,
1614                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1615                                  x_off - EPEL_EXTRA_BEFORE,
1616                                  y_off - EPEL_EXTRA_BEFORE,
1617                                  pic_width, pic_height);
1618         src2 = lc->edge_emu_buffer + buf_offset2;
1619         src2stride = edge_emu_stride;
1620
1621         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1622                                              block_w, block_h, mx, my,
1623                                              lc->mc_buffer);
1624     } else {
1625         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
1626                                              block_w, block_h, mx, my,
1627                                              lc->mc_buffer);
1628         s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
1629                                              block_w, block_h, mx, my,
1630                                              lc->mc_buffer);
1631     }
1632 }
1633
1634 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1635                                 const Mv *mv, int y0, int height)
1636 {
1637     int y = (mv->y >> 2) + y0 + height + 9;
1638     ff_thread_await_progress(&ref->tf, y, 0);
1639 }
1640
1641 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1642                                   int nPbH, int log2_cb_size, int part_idx,
1643                                   int merge_idx, MvField *mv)
1644 {
1645     HEVCLocalContext *lc             = &s->HEVClc;
1646     enum InterPredIdc inter_pred_idc = PRED_L0;
1647     int mvp_flag;
1648
1649     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1650     if (s->sh.slice_type == B_SLICE)
1651         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1652
1653     if (inter_pred_idc != PRED_L1) {
1654         if (s->sh.nb_refs[L0])
1655             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1656
1657         mv->pred_flag[0] = 1;
1658         hls_mvd_coding(s, x0, y0, 0);
1659         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1660         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1661                                  part_idx, merge_idx, mv, mvp_flag, 0);
1662         mv->mv[0].x += lc->pu.mvd.x;
1663         mv->mv[0].y += lc->pu.mvd.y;
1664     }
1665
1666     if (inter_pred_idc != PRED_L0) {
1667         if (s->sh.nb_refs[L1])
1668             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1669
1670         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1671             AV_ZERO32(&lc->pu.mvd);
1672         } else {
1673             hls_mvd_coding(s, x0, y0, 1);
1674         }
1675
1676         mv->pred_flag[1] = 1;
1677         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1678         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1679                                  part_idx, merge_idx, mv, mvp_flag, 1);
1680         mv->mv[1].x += lc->pu.mvd.x;
1681         mv->mv[1].y += lc->pu.mvd.y;
1682     }
1683 }
1684
1685 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1686                                 int nPbW, int nPbH,
1687                                 int log2_cb_size, int partIdx)
1688 {
1689 #define POS(c_idx, x, y)                                                              \
1690     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1691                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1692     HEVCLocalContext *lc = &s->HEVClc;
1693     int merge_idx = 0;
1694     struct MvField current_mv = {{{ 0 }}};
1695
1696     int min_pu_width = s->sps->min_pu_width;
1697
1698     MvField *tab_mvf = s->ref->tab_mvf;
1699     RefPicList  *refPicList = s->ref->refPicList;
1700     HEVCFrame *ref0, *ref1;
1701
1702     int tmpstride = MAX_PB_SIZE;
1703
1704     uint8_t *dst0 = POS(0, x0, y0);
1705     uint8_t *dst1 = POS(1, x0, y0);
1706     uint8_t *dst2 = POS(2, x0, y0);
1707     int log2_min_cb_size = s->sps->log2_min_cb_size;
1708     int min_cb_width     = s->sps->min_cb_width;
1709     int x_cb             = x0 >> log2_min_cb_size;
1710     int y_cb             = y0 >> log2_min_cb_size;
1711     int x_pu, y_pu;
1712     int i, j;
1713
1714     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1715
1716     if (!skip_flag)
1717         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1718
1719     if (skip_flag || lc->pu.merge_flag) {
1720         if (s->sh.max_num_merge_cand > 1)
1721             merge_idx = ff_hevc_merge_idx_decode(s);
1722         else
1723             merge_idx = 0;
1724
1725         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1726                                    partIdx, merge_idx, &current_mv);
1727     } else {
1728         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1729                               partIdx, merge_idx, &current_mv);
1730     }
1731
1732     x_pu = x0 >> s->sps->log2_min_pu_size;
1733     y_pu = y0 >> s->sps->log2_min_pu_size;
1734
1735     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1736         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1737             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1738
1739     if (current_mv.pred_flag[0]) {
1740         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1741         if (!ref0)
1742             return;
1743         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1744     }
1745     if (current_mv.pred_flag[1]) {
1746         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1747         if (!ref1)
1748             return;
1749         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1750     }
1751
1752     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1753         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1754         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1755
1756         luma_mc(s, tmp, tmpstride, ref0->frame,
1757                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1758
1759         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1760             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1761             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1762                                      s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1763                                      s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1764                                      dst0, s->frame->linesize[0], tmp,
1765                                      tmpstride, nPbW, nPbH);
1766         } else {
1767             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1768         }
1769         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1770                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1771
1772         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1773             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1774             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1775                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1776                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1777                                      dst1, s->frame->linesize[1], tmp, tmpstride,
1778                                      nPbW / 2, nPbH / 2);
1779             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1780                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1781                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1782                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
1783                                      nPbW / 2, nPbH / 2);
1784         } else {
1785             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1786             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1787         }
1788     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1789         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1790         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1791
1792         luma_mc(s, tmp, tmpstride, ref1->frame,
1793                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1794
1795         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1796             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1797             s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
1798                                       s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1799                                       s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1800                                       dst0, s->frame->linesize[0], tmp, tmpstride,
1801                                       nPbW, nPbH);
1802         } else {
1803             s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
1804         }
1805
1806         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1807                   &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
1808
1809         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1810             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1811             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1812                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1813                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1814                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1815             s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
1816                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1817                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1818                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1819         } else {
1820             s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
1821             s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
1822         }
1823     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1824         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1825         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1826         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1827         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1828
1829         luma_mc(s, tmp, tmpstride, ref0->frame,
1830                 &current_mv.mv[0], x0, y0, nPbW, nPbH);
1831         luma_mc(s, tmp2, tmpstride, ref1->frame,
1832                 &current_mv.mv[1], x0, y0, nPbW, nPbH);
1833
1834         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1835             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1836             s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
1837                                          s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1838                                          s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1839                                          s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1840                                          s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1841                                          dst0, s->frame->linesize[0],
1842                                          tmp, tmp2, tmpstride, nPbW, nPbH);
1843         } else {
1844             s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
1845                                              tmp, tmp2, tmpstride, nPbW, nPbH);
1846         }
1847
1848         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1849                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1850         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1851                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
1852
1853         if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1854             (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
1855             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1856                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1857                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1858                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1859                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1860                                          dst1, s->frame->linesize[1], tmp, tmp3,
1861                                          tmpstride, nPbW / 2, nPbH / 2);
1862             s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
1863                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1864                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1865                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1866                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1867                                          dst2, s->frame->linesize[2], tmp2, tmp4,
1868                                          tmpstride, nPbW / 2, nPbH / 2);
1869         } else {
1870             s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
1871             s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
1872         }
1873     }
1874 }
1875
1876 /**
1877  * 8.4.1
1878  */
1879 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1880                                 int prev_intra_luma_pred_flag)
1881 {
1882     HEVCLocalContext *lc = &s->HEVClc;
1883     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1884     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1885     int min_pu_width     = s->sps->min_pu_width;
1886     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1887     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1888     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1889
1890     int cand_up   = (lc->ctb_up_flag || y0b) ?
1891                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1892     int cand_left = (lc->ctb_left_flag || x0b) ?
1893                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1894
1895     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1896
1897     MvField *tab_mvf = s->ref->tab_mvf;
1898     int intra_pred_mode;
1899     int candidate[3];
1900     int i, j;
1901
1902     // intra_pred_mode prediction does not cross vertical CTB boundaries
1903     if ((y0 - 1) < y_ctb)
1904         cand_up = INTRA_DC;
1905
1906     if (cand_left == cand_up) {
1907         if (cand_left < 2) {
1908             candidate[0] = INTRA_PLANAR;
1909             candidate[1] = INTRA_DC;
1910             candidate[2] = INTRA_ANGULAR_26;
1911         } else {
1912             candidate[0] = cand_left;
1913             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1914             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1915         }
1916     } else {
1917         candidate[0] = cand_left;
1918         candidate[1] = cand_up;
1919         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1920             candidate[2] = INTRA_PLANAR;
1921         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1922             candidate[2] = INTRA_DC;
1923         } else {
1924             candidate[2] = INTRA_ANGULAR_26;
1925         }
1926     }
1927
1928     if (prev_intra_luma_pred_flag) {
1929         intra_pred_mode = candidate[lc->pu.mpm_idx];
1930     } else {
1931         if (candidate[0] > candidate[1])
1932             FFSWAP(uint8_t, candidate[0], candidate[1]);
1933         if (candidate[0] > candidate[2])
1934             FFSWAP(uint8_t, candidate[0], candidate[2]);
1935         if (candidate[1] > candidate[2])
1936             FFSWAP(uint8_t, candidate[1], candidate[2]);
1937
1938         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1939         for (i = 0; i < 3; i++)
1940             if (intra_pred_mode >= candidate[i])
1941                 intra_pred_mode++;
1942     }
1943
1944     /* write the intra prediction units into the mv array */
1945     if (!size_in_pus)
1946         size_in_pus = 1;
1947     for (i = 0; i < size_in_pus; i++) {
1948         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1949                intra_pred_mode, size_in_pus);
1950
1951         for (j = 0; j < size_in_pus; j++) {
1952             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1953             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1954             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1955             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1956             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1957             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1958             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1959             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1960             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1961         }
1962     }
1963
1964     return intra_pred_mode;
1965 }
1966
1967 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1968                                           int log2_cb_size, int ct_depth)
1969 {
1970     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1971     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1972     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1973     int y;
1974
1975     for (y = 0; y < length; y++)
1976         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1977                ct_depth, length);
1978 }
1979
1980 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1981                                   int log2_cb_size)
1982 {
1983     HEVCLocalContext *lc = &s->HEVClc;
1984     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1985     uint8_t prev_intra_luma_pred_flag[4];
1986     int split   = lc->cu.part_mode == PART_NxN;
1987     int pb_size = (1 << log2_cb_size) >> split;
1988     int side    = split + 1;
1989     int chroma_mode;
1990     int i, j;
1991
1992     for (i = 0; i < side; i++)
1993         for (j = 0; j < side; j++)
1994             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1995
1996     for (i = 0; i < side; i++) {
1997         for (j = 0; j < side; j++) {
1998             if (prev_intra_luma_pred_flag[2 * i + j])
1999                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2000             else
2001                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2002
2003             lc->pu.intra_pred_mode[2 * i + j] =
2004                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2005                                      prev_intra_luma_pred_flag[2 * i + j]);
2006         }
2007     }
2008
2009     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2010     if (chroma_mode != 4) {
2011         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2012             lc->pu.intra_pred_mode_c = 34;
2013         else
2014             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2015     } else {
2016         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2017     }
2018 }
2019
2020 static void intra_prediction_unit_default_value(HEVCContext *s,
2021                                                 int x0, int y0,
2022                                                 int log2_cb_size)
2023 {
2024     HEVCLocalContext *lc = &s->HEVClc;
2025     int pb_size          = 1 << log2_cb_size;
2026     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
2027     int min_pu_width     = s->sps->min_pu_width;
2028     MvField *tab_mvf     = s->ref->tab_mvf;
2029     int x_pu             = x0 >> s->sps->log2_min_pu_size;
2030     int y_pu             = y0 >> s->sps->log2_min_pu_size;
2031     int j, k;
2032
2033     if (size_in_pus == 0)
2034         size_in_pus = 1;
2035     for (j = 0; j < size_in_pus; j++) {
2036         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2037         for (k = 0; k < size_in_pus; k++)
2038             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2039     }
2040 }
2041
2042 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2043 {
2044     int cb_size          = 1 << log2_cb_size;
2045     HEVCLocalContext *lc = &s->HEVClc;
2046     int log2_min_cb_size = s->sps->log2_min_cb_size;
2047     int length           = cb_size >> log2_min_cb_size;
2048     int min_cb_width     = s->sps->min_cb_width;
2049     int x_cb             = x0 >> log2_min_cb_size;
2050     int y_cb             = y0 >> log2_min_cb_size;
2051     int x, y, ret;
2052
2053     lc->cu.x                = x0;
2054     lc->cu.y                = y0;
2055     lc->cu.pred_mode        = MODE_INTRA;
2056     lc->cu.part_mode        = PART_2Nx2N;
2057     lc->cu.intra_split_flag = 0;
2058
2059     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2060     for (x = 0; x < 4; x++)
2061         lc->pu.intra_pred_mode[x] = 1;
2062     if (s->pps->transquant_bypass_enable_flag) {
2063         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2064         if (lc->cu.cu_transquant_bypass_flag)
2065             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2066     } else
2067         lc->cu.cu_transquant_bypass_flag = 0;
2068
2069     if (s->sh.slice_type != I_SLICE) {
2070         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2071
2072         x = y_cb * min_cb_width + x_cb;
2073         for (y = 0; y < length; y++) {
2074             memset(&s->skip_flag[x], skip_flag, length);
2075             x += min_cb_width;
2076         }
2077         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2078     }
2079
2080     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2081         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2082         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2083
2084         if (!s->sh.disable_deblocking_filter_flag)
2085             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2086     } else {
2087         int pcm_flag = 0;
2088
2089         if (s->sh.slice_type != I_SLICE)
2090             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2091         if (lc->cu.pred_mode != MODE_INTRA ||
2092             log2_cb_size == s->sps->log2_min_cb_size) {
2093             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2094             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2095                                       lc->cu.pred_mode == MODE_INTRA;
2096         }
2097
2098         if (lc->cu.pred_mode == MODE_INTRA) {
2099             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2100                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2101                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2102                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2103             }
2104             if (pcm_flag) {
2105                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2106                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2107                 if (s->sps->pcm.loop_filter_disable_flag)
2108                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2109
2110                 if (ret < 0)
2111                     return ret;
2112             } else {
2113                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2114             }
2115         } else {
2116             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2117             switch (lc->cu.part_mode) {
2118             case PART_2Nx2N:
2119                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2120                 break;
2121             case PART_2NxN:
2122                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2123                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2124                 break;
2125             case PART_Nx2N:
2126                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2127                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2128                 break;
2129             case PART_2NxnU:
2130                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2131                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2132                 break;
2133             case PART_2NxnD:
2134                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2135                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2136                 break;
2137             case PART_nLx2N:
2138                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2139                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2140                 break;
2141             case PART_nRx2N:
2142                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2143                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2144                 break;
2145             case PART_NxN:
2146                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2147                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2148                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2149                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2150                 break;
2151             }
2152         }
2153
2154         if (!pcm_flag) {
2155             int rqt_root_cbf = 1;
2156
2157             if (lc->cu.pred_mode != MODE_INTRA &&
2158                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2159                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2160             }
2161             if (rqt_root_cbf) {
2162                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2163                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2164                                          s->sps->max_transform_hierarchy_depth_inter;
2165                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2166                                          log2_cb_size,
2167                                          log2_cb_size, 0, 0, 0, 0);
2168                 if (ret < 0)
2169                     return ret;
2170             } else {
2171                 if (!s->sh.disable_deblocking_filter_flag)
2172                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2173             }
2174         }
2175     }
2176
2177     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2178         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2179
2180     x = y_cb * min_cb_width + x_cb;
2181     for (y = 0; y < length; y++) {
2182         memset(&s->qp_y_tab[x], lc->qp_y, length);
2183         x += min_cb_width;
2184     }
2185
2186     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2187
2188     return 0;
2189 }
2190
2191 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2192                                int log2_cb_size, int cb_depth)
2193 {
2194     HEVCLocalContext *lc = &s->HEVClc;
2195     const int cb_size    = 1 << log2_cb_size;
2196     int split_cu;
2197
2198     lc->ct.depth = cb_depth;
2199     if (x0 + cb_size <= s->sps->width  &&
2200         y0 + cb_size <= s->sps->height &&
2201         log2_cb_size > s->sps->log2_min_cb_size) {
2202         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2203     } else {
2204         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2205     }
2206     if (s->pps->cu_qp_delta_enabled_flag &&
2207         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2208         lc->tu.is_cu_qp_delta_coded = 0;
2209         lc->tu.cu_qp_delta          = 0;
2210     }
2211
2212     if (split_cu) {
2213         const int cb_size_split = cb_size >> 1;
2214         const int x1 = x0 + cb_size_split;
2215         const int y1 = y0 + cb_size_split;
2216
2217         log2_cb_size--;
2218         cb_depth++;
2219
2220 #define SUBDIVIDE(x, y)                                                \
2221 do {                                                                   \
2222     if (x < s->sps->width && y < s->sps->height) {                     \
2223         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2224         if (ret < 0)                                                   \
2225             return ret;                                                \
2226     }                                                                  \
2227 } while (0)
2228
2229         SUBDIVIDE(x0, y0);
2230         SUBDIVIDE(x1, y0);
2231         SUBDIVIDE(x0, y1);
2232         SUBDIVIDE(x1, y1);
2233     } else {
2234         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2235         if (ret < 0)
2236             return ret;
2237     }
2238
2239     return 0;
2240 }
2241
2242 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2243                                  int ctb_addr_ts)
2244 {
2245     HEVCLocalContext *lc  = &s->HEVClc;
2246     int ctb_size          = 1 << s->sps->log2_ctb_size;
2247     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2248     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2249
2250     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2251
2252     if (s->pps->entropy_coding_sync_enabled_flag) {
2253         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2254             lc->first_qp_group = 1;
2255         lc->end_of_tiles_x = s->sps->width;
2256     } else if (s->pps->tiles_enabled_flag) {
2257         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2258             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2259             lc->start_of_tiles_x = x_ctb;
2260             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2261             lc->first_qp_group   = 1;
2262         }
2263     } else {
2264         lc->end_of_tiles_x = s->sps->width;
2265     }
2266
2267     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2268
2269     lc->boundary_flags = 0;
2270     if (s->pps->tiles_enabled_flag) {
2271         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2272             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2273         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2274             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2275         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2276             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2277         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2278             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2279     } else {
2280         if (!ctb_addr_in_slice > 0)
2281             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2282         if (ctb_addr_in_slice < s->sps->ctb_width)
2283             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2284     }
2285
2286     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2287     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2288     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2289     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2290 }
2291
2292 static int hls_slice_data(HEVCContext *s)
2293 {
2294     int ctb_size    = 1 << s->sps->log2_ctb_size;
2295     int more_data   = 1;
2296     int x_ctb       = 0;
2297     int y_ctb       = 0;
2298     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2299     int ret;
2300
2301     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2302         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2303
2304         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2305         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2306         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2307
2308         ff_hevc_cabac_init(s, ctb_addr_ts);
2309
2310         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2311
2312         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2313         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2314         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2315
2316         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2317         if (ret < 0)
2318             return ret;
2319         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2320
2321         ctb_addr_ts++;
2322         ff_hevc_save_states(s, ctb_addr_ts);
2323         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2324     }
2325
2326     if (x_ctb + ctb_size >= s->sps->width &&
2327         y_ctb + ctb_size >= s->sps->height)
2328         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2329
2330     return ctb_addr_ts;
2331 }
2332
2333 /**
2334  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2335  * 0 if the unit should be skipped, 1 otherwise
2336  */
2337 static int hls_nal_unit(HEVCContext *s)
2338 {
2339     GetBitContext *gb = &s->HEVClc.gb;
2340     int nuh_layer_id;
2341
2342     if (get_bits1(gb) != 0)
2343         return AVERROR_INVALIDDATA;
2344
2345     s->nal_unit_type = get_bits(gb, 6);
2346
2347     nuh_layer_id   = get_bits(gb, 6);
2348     s->temporal_id = get_bits(gb, 3) - 1;
2349     if (s->temporal_id < 0)
2350         return AVERROR_INVALIDDATA;
2351
2352     av_log(s->avctx, AV_LOG_DEBUG,
2353            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2354            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2355
2356     return nuh_layer_id == 0;
2357 }
2358
2359 static void restore_tqb_pixels(HEVCContext *s)
2360 {
2361     int min_pu_size = 1 << s->sps->log2_min_pu_size;
2362     int x, y, c_idx;
2363
2364     for (c_idx = 0; c_idx < 3; c_idx++) {
2365         ptrdiff_t stride = s->frame->linesize[c_idx];
2366         int hshift       = s->sps->hshift[c_idx];
2367         int vshift       = s->sps->vshift[c_idx];
2368         for (y = 0; y < s->sps->min_pu_height; y++) {
2369             for (x = 0; x < s->sps->min_pu_width; x++) {
2370                 if (s->is_pcm[y * s->sps->min_pu_width + x]) {
2371                     int n;
2372                     int len      = min_pu_size >> hshift;
2373                     uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2374                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
2375                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2376                         memcpy(dst, src, len);
2377                         src += stride;
2378                         dst += stride;
2379                     }
2380                 }
2381             }
2382         }
2383     }
2384 }
2385
2386 static int set_side_data(HEVCContext *s)
2387 {
2388     AVFrame *out = s->ref->frame;
2389
2390     if (s->sei_frame_packing_present &&
2391         s->frame_packing_arrangement_type >= 3 &&
2392         s->frame_packing_arrangement_type <= 5 &&
2393         s->content_interpretation_type > 0 &&
2394         s->content_interpretation_type < 3) {
2395         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2396         if (!stereo)
2397             return AVERROR(ENOMEM);
2398
2399         switch (s->frame_packing_arrangement_type) {
2400         case 3:
2401             if (s->quincunx_subsampling)
2402                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2403             else
2404                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2405             break;
2406         case 4:
2407             stereo->type = AV_STEREO3D_TOPBOTTOM;
2408             break;
2409         case 5:
2410             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2411             break;
2412         }
2413
2414         if (s->content_interpretation_type == 2)
2415             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2416     }
2417
2418     if (s->sei_display_orientation_present &&
2419         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2420         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2421         AVFrameSideData *rotation = av_frame_new_side_data(out,
2422                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2423                                                            sizeof(int32_t) * 9);
2424         if (!rotation)
2425             return AVERROR(ENOMEM);
2426
2427         av_display_rotation_set((int32_t *)rotation->data, angle);
2428         av_display_matrix_flip((int32_t *)rotation->data,
2429                                s->sei_hflip, s->sei_vflip);
2430     }
2431
2432     return 0;
2433 }
2434
2435 static int hevc_frame_start(HEVCContext *s)
2436 {
2437     HEVCLocalContext *lc = &s->HEVClc;
2438     int ret;
2439
2440     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2441     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2442     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2443     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2444
2445     lc->start_of_tiles_x = 0;
2446     s->is_decoded        = 0;
2447     s->first_nal_type    = s->nal_unit_type;
2448
2449     if (s->pps->tiles_enabled_flag)
2450         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2451
2452     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2453                               s->poc);
2454     if (ret < 0)
2455         goto fail;
2456
2457     ret = ff_hevc_frame_rps(s);
2458     if (ret < 0) {
2459         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2460         goto fail;
2461     }
2462
2463     s->ref->frame->key_frame = IS_IRAP(s);
2464
2465     ret = set_side_data(s);
2466     if (ret < 0)
2467         goto fail;
2468
2469     av_frame_unref(s->output_frame);
2470     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2471     if (ret < 0)
2472         goto fail;
2473
2474     ff_thread_finish_setup(s->avctx);
2475
2476     return 0;
2477
2478 fail:
2479     if (s->ref)
2480         ff_hevc_unref_frame(s, s->ref, ~0);
2481     s->ref = NULL;
2482     return ret;
2483 }
2484
2485 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2486 {
2487     HEVCLocalContext *lc = &s->HEVClc;
2488     GetBitContext *gb    = &lc->gb;
2489     int ctb_addr_ts, ret;
2490
2491     ret = init_get_bits8(gb, nal->data, nal->size);
2492     if (ret < 0)
2493         return ret;
2494
2495     ret = hls_nal_unit(s);
2496     if (ret < 0) {
2497         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2498                s->nal_unit_type);
2499         goto fail;
2500     } else if (!ret)
2501         return 0;
2502
2503     switch (s->nal_unit_type) {
2504     case NAL_VPS:
2505         ret = ff_hevc_decode_nal_vps(s);
2506         if (ret < 0)
2507             goto fail;
2508         break;
2509     case NAL_SPS:
2510         ret = ff_hevc_decode_nal_sps(s);
2511         if (ret < 0)
2512             goto fail;
2513         break;
2514     case NAL_PPS:
2515         ret = ff_hevc_decode_nal_pps(s);
2516         if (ret < 0)
2517             goto fail;
2518         break;
2519     case NAL_SEI_PREFIX:
2520     case NAL_SEI_SUFFIX:
2521         ret = ff_hevc_decode_nal_sei(s);
2522         if (ret < 0)
2523             goto fail;
2524         break;
2525     case NAL_TRAIL_R:
2526     case NAL_TRAIL_N:
2527     case NAL_TSA_N:
2528     case NAL_TSA_R:
2529     case NAL_STSA_N:
2530     case NAL_STSA_R:
2531     case NAL_BLA_W_LP:
2532     case NAL_BLA_W_RADL:
2533     case NAL_BLA_N_LP:
2534     case NAL_IDR_W_RADL:
2535     case NAL_IDR_N_LP:
2536     case NAL_CRA_NUT:
2537     case NAL_RADL_N:
2538     case NAL_RADL_R:
2539     case NAL_RASL_N:
2540     case NAL_RASL_R:
2541         ret = hls_slice_header(s);
2542         if (ret < 0)
2543             return ret;
2544
2545         if (s->max_ra == INT_MAX) {
2546             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2547                 s->max_ra = s->poc;
2548             } else {
2549                 if (IS_IDR(s))
2550                     s->max_ra = INT_MIN;
2551             }
2552         }
2553
2554         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2555             s->poc <= s->max_ra) {
2556             s->is_decoded = 0;
2557             break;
2558         } else {
2559             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2560                 s->max_ra = INT_MIN;
2561         }
2562
2563         if (s->sh.first_slice_in_pic_flag) {
2564             ret = hevc_frame_start(s);
2565             if (ret < 0)
2566                 return ret;
2567         } else if (!s->ref) {
2568             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2569             goto fail;
2570         }
2571
2572         if (s->nal_unit_type != s->first_nal_type) {
2573             av_log(s->avctx, AV_LOG_ERROR,
2574                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2575                    s->first_nal_type, s->nal_unit_type);
2576             return AVERROR_INVALIDDATA;
2577         }
2578
2579         if (!s->sh.dependent_slice_segment_flag &&
2580             s->sh.slice_type != I_SLICE) {
2581             ret = ff_hevc_slice_rpl(s);
2582             if (ret < 0) {
2583                 av_log(s->avctx, AV_LOG_WARNING,
2584                        "Error constructing the reference lists for the current slice.\n");
2585                 goto fail;
2586             }
2587         }
2588
2589         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2590             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2591             if (ret < 0)
2592                 goto fail;
2593         }
2594
2595         if (s->avctx->hwaccel) {
2596             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2597             if (ret < 0)
2598                 goto fail;
2599         } else {
2600             ctb_addr_ts = hls_slice_data(s);
2601             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2602                 s->is_decoded = 1;
2603                 if ((s->pps->transquant_bypass_enable_flag ||
2604                      (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
2605                     s->sps->sao_enabled)
2606                     restore_tqb_pixels(s);
2607             }
2608
2609             if (ctb_addr_ts < 0) {
2610                 ret = ctb_addr_ts;
2611                 goto fail;
2612             }
2613         }
2614         break;
2615     case NAL_EOS_NUT:
2616     case NAL_EOB_NUT:
2617         s->seq_decode = (s->seq_decode + 1) & 0xff;
2618         s->max_ra     = INT_MAX;
2619         break;
2620     case NAL_AUD:
2621     case NAL_FD_NUT:
2622         break;
2623     default:
2624         av_log(s->avctx, AV_LOG_INFO,
2625                "Skipping NAL unit %d\n", s->nal_unit_type);
2626     }
2627
2628     return 0;
2629 fail:
2630     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2631         return ret;
2632     return 0;
2633 }
2634
2635 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2636  * between these functions would be nice. */
2637 static int extract_rbsp(const uint8_t *src, int length,
2638                         HEVCNAL *nal)
2639 {
2640     int i, si, di;
2641     uint8_t *dst;
2642
2643 #define STARTCODE_TEST                                                  \
2644         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2645             if (src[i + 2] != 3) {                                      \
2646                 /* startcode, so we must be past the end */             \
2647                 length = i;                                             \
2648             }                                                           \
2649             break;                                                      \
2650         }
2651 #if HAVE_FAST_UNALIGNED
2652 #define FIND_FIRST_ZERO                                                 \
2653         if (i > 0 && !src[i])                                           \
2654             i--;                                                        \
2655         while (src[i])                                                  \
2656             i++
2657 #if HAVE_FAST_64BIT
2658     for (i = 0; i + 1 < length; i += 9) {
2659         if (!((~AV_RN64A(src + i) &
2660                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2661               0x8000800080008080ULL))
2662             continue;
2663         FIND_FIRST_ZERO;
2664         STARTCODE_TEST;
2665         i -= 7;
2666     }
2667 #else
2668     for (i = 0; i + 1 < length; i += 5) {
2669         if (!((~AV_RN32A(src + i) &
2670                (AV_RN32A(src + i) - 0x01000101U)) &
2671               0x80008080U))
2672             continue;
2673         FIND_FIRST_ZERO;
2674         STARTCODE_TEST;
2675         i -= 3;
2676     }
2677 #endif /* HAVE_FAST_64BIT */
2678 #else
2679     for (i = 0; i + 1 < length; i += 2) {
2680         if (src[i])
2681             continue;
2682         if (i > 0 && src[i - 1] == 0)
2683             i--;
2684         STARTCODE_TEST;
2685     }
2686 #endif /* HAVE_FAST_UNALIGNED */
2687
2688     if (i >= length - 1) { // no escaped 0
2689         nal->data     =
2690         nal->raw_data = src;
2691         nal->size     =
2692         nal->raw_size = length;
2693         return length;
2694     }
2695
2696     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2697                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2698     if (!nal->rbsp_buffer)
2699         return AVERROR(ENOMEM);
2700
2701     dst = nal->rbsp_buffer;
2702
2703     memcpy(dst, src, i);
2704     si = di = i;
2705     while (si + 2 < length) {
2706         // remove escapes (very rare 1:2^22)
2707         if (src[si + 2] > 3) {
2708             dst[di++] = src[si++];
2709             dst[di++] = src[si++];
2710         } else if (src[si] == 0 && src[si + 1] == 0) {
2711             if (src[si + 2] == 3) { // escape
2712                 dst[di++] = 0;
2713                 dst[di++] = 0;
2714                 si       += 3;
2715
2716                 continue;
2717             } else // next start code
2718                 goto nsc;
2719         }
2720
2721         dst[di++] = src[si++];
2722     }
2723     while (si < length)
2724         dst[di++] = src[si++];
2725
2726 nsc:
2727     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2728
2729     nal->data = dst;
2730     nal->size = di;
2731     nal->raw_data = src;
2732     nal->raw_size = si;
2733     return si;
2734 }
2735
2736 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2737 {
2738     int i, consumed, ret = 0;
2739
2740     s->ref = NULL;
2741     s->eos = 0;
2742
2743     /* split the input packet into NAL units, so we know the upper bound on the
2744      * number of slices in the frame */
2745     s->nb_nals = 0;
2746     while (length >= 4) {
2747         HEVCNAL *nal;
2748         int extract_length = 0;
2749
2750         if (s->is_nalff) {
2751             int i;
2752             for (i = 0; i < s->nal_length_size; i++)
2753                 extract_length = (extract_length << 8) | buf[i];
2754             buf    += s->nal_length_size;
2755             length -= s->nal_length_size;
2756
2757             if (extract_length > length) {
2758                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2759                 ret = AVERROR_INVALIDDATA;
2760                 goto fail;
2761             }
2762         } else {
2763             if (buf[2] == 0) {
2764                 length--;
2765                 buf++;
2766                 continue;
2767             }
2768             if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2769                 ret = AVERROR_INVALIDDATA;
2770                 goto fail;
2771             }
2772
2773             buf           += 3;
2774             length        -= 3;
2775             extract_length = length;
2776         }
2777
2778         if (s->nals_allocated < s->nb_nals + 1) {
2779             int new_size = s->nals_allocated + 1;
2780             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2781             if (!tmp) {
2782                 ret = AVERROR(ENOMEM);
2783                 goto fail;
2784             }
2785             s->nals = tmp;
2786             memset(s->nals + s->nals_allocated, 0,
2787                    (new_size - s->nals_allocated) * sizeof(*tmp));
2788             s->nals_allocated = new_size;
2789         }
2790         nal = &s->nals[s->nb_nals++];
2791
2792         consumed = extract_rbsp(buf, extract_length, nal);
2793         if (consumed < 0) {
2794             ret = consumed;
2795             goto fail;
2796         }
2797
2798         ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
2799         if (ret < 0)
2800             goto fail;
2801         hls_nal_unit(s);
2802
2803         if (s->nal_unit_type == NAL_EOB_NUT ||
2804             s->nal_unit_type == NAL_EOS_NUT)
2805             s->eos = 1;
2806
2807         buf    += consumed;
2808         length -= consumed;
2809     }
2810
2811     /* parse the NAL units */
2812     for (i = 0; i < s->nb_nals; i++) {
2813         int ret = decode_nal_unit(s, &s->nals[i]);
2814         if (ret < 0) {
2815             av_log(s->avctx, AV_LOG_WARNING,
2816                    "Error parsing NAL unit #%d.\n", i);
2817             goto fail;
2818         }
2819     }
2820
2821 fail:
2822     if (s->ref)
2823         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2824
2825     return ret;
2826 }
2827
2828 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2829 {
2830     int i;
2831     for (i = 0; i < 16; i++)
2832         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2833 }
2834
2835 static int verify_md5(HEVCContext *s, AVFrame *frame)
2836 {
2837     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2838     int pixel_shift;
2839     int i, j;
2840
2841     if (!desc)
2842         return AVERROR(EINVAL);
2843
2844     pixel_shift = desc->comp[0].depth_minus1 > 7;
2845
2846     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2847            s->poc);
2848
2849     /* the checksums are LE, so we have to byteswap for >8bpp formats
2850      * on BE arches */
2851 #if HAVE_BIGENDIAN
2852     if (pixel_shift && !s->checksum_buf) {
2853         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2854                        FFMAX3(frame->linesize[0], frame->linesize[1],
2855                               frame->linesize[2]));
2856         if (!s->checksum_buf)
2857             return AVERROR(ENOMEM);
2858     }
2859 #endif
2860
2861     for (i = 0; frame->data[i]; i++) {
2862         int width  = s->avctx->coded_width;
2863         int height = s->avctx->coded_height;
2864         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2865         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2866         uint8_t md5[16];
2867
2868         av_md5_init(s->md5_ctx);
2869         for (j = 0; j < h; j++) {
2870             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2871 #if HAVE_BIGENDIAN
2872             if (pixel_shift) {
2873                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2874                                     (const uint16_t *) src, w);
2875                 src = s->checksum_buf;
2876             }
2877 #endif
2878             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2879         }
2880         av_md5_final(s->md5_ctx, md5);
2881
2882         if (!memcmp(md5, s->md5[i], 16)) {
2883             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2884             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2885             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2886         } else {
2887             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2888             print_md5(s->avctx, AV_LOG_ERROR, md5);
2889             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2890             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2891             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2892             return AVERROR_INVALIDDATA;
2893         }
2894     }
2895
2896     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2897
2898     return 0;
2899 }
2900
2901 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2902                              AVPacket *avpkt)
2903 {
2904     int ret;
2905     HEVCContext *s = avctx->priv_data;
2906
2907     if (!avpkt->size) {
2908         ret = ff_hevc_output_frame(s, data, 1);
2909         if (ret < 0)
2910             return ret;
2911
2912         *got_output = ret;
2913         return 0;
2914     }
2915
2916     s->ref = NULL;
2917     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2918     if (ret < 0)
2919         return ret;
2920
2921     if (avctx->hwaccel) {
2922         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2923             av_log(avctx, AV_LOG_ERROR,
2924                    "hardware accelerator failed to decode picture\n");
2925     } else {
2926         /* verify the SEI checksum */
2927         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2928             s->is_md5) {
2929             ret = verify_md5(s, s->ref->frame);
2930             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2931                 ff_hevc_unref_frame(s, s->ref, ~0);
2932                 return ret;
2933             }
2934         }
2935     }
2936     s->is_md5 = 0;
2937
2938     if (s->is_decoded) {
2939         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2940         s->is_decoded = 0;
2941     }
2942
2943     if (s->output_frame->buf[0]) {
2944         av_frame_move_ref(data, s->output_frame);
2945         *got_output = 1;
2946     }
2947
2948     return avpkt->size;
2949 }
2950
2951 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2952 {
2953     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2954     if (ret < 0)
2955         return ret;
2956
2957     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2958     if (!dst->tab_mvf_buf)
2959         goto fail;
2960     dst->tab_mvf = src->tab_mvf;
2961
2962     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2963     if (!dst->rpl_tab_buf)
2964         goto fail;
2965     dst->rpl_tab = src->rpl_tab;
2966
2967     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2968     if (!dst->rpl_buf)
2969         goto fail;
2970
2971     dst->poc        = src->poc;
2972     dst->ctb_count  = src->ctb_count;
2973     dst->window     = src->window;
2974     dst->flags      = src->flags;
2975     dst->sequence   = src->sequence;
2976
2977     if (src->hwaccel_picture_private) {
2978         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2979         if (!dst->hwaccel_priv_buf)
2980             goto fail;
2981         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2982     }
2983
2984     return 0;
2985 fail:
2986     ff_hevc_unref_frame(s, dst, ~0);
2987     return AVERROR(ENOMEM);
2988 }
2989
2990 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2991 {
2992     HEVCContext       *s = avctx->priv_data;
2993     int i;
2994
2995     pic_arrays_free(s);
2996
2997     av_freep(&s->md5_ctx);
2998
2999     av_frame_free(&s->tmp_frame);
3000     av_frame_free(&s->output_frame);
3001
3002     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3003         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3004         av_frame_free(&s->DPB[i].frame);
3005     }
3006
3007     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3008         av_buffer_unref(&s->vps_list[i]);
3009     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3010         av_buffer_unref(&s->sps_list[i]);
3011     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3012         av_buffer_unref(&s->pps_list[i]);
3013
3014     for (i = 0; i < s->nals_allocated; i++)
3015         av_freep(&s->nals[i].rbsp_buffer);
3016     av_freep(&s->nals);
3017     s->nals_allocated = 0;
3018
3019     return 0;
3020 }
3021
3022 static av_cold int hevc_init_context(AVCodecContext *avctx)
3023 {
3024     HEVCContext *s = avctx->priv_data;
3025     int i;
3026
3027     s->avctx = avctx;
3028
3029     s->tmp_frame = av_frame_alloc();
3030     if (!s->tmp_frame)
3031         goto fail;
3032
3033     s->output_frame = av_frame_alloc();
3034     if (!s->output_frame)
3035         goto fail;
3036
3037     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3038         s->DPB[i].frame = av_frame_alloc();
3039         if (!s->DPB[i].frame)
3040             goto fail;
3041         s->DPB[i].tf.f = s->DPB[i].frame;
3042     }
3043
3044     s->max_ra = INT_MAX;
3045
3046     s->md5_ctx = av_md5_alloc();
3047     if (!s->md5_ctx)
3048         goto fail;
3049
3050     ff_bswapdsp_init(&s->bdsp);
3051
3052     s->context_initialized = 1;
3053
3054     return 0;
3055
3056 fail:
3057     hevc_decode_free(avctx);
3058     return AVERROR(ENOMEM);
3059 }
3060
3061 static int hevc_update_thread_context(AVCodecContext *dst,
3062                                       const AVCodecContext *src)
3063 {
3064     HEVCContext *s  = dst->priv_data;
3065     HEVCContext *s0 = src->priv_data;
3066     int i, ret;
3067
3068     if (!s->context_initialized) {
3069         ret = hevc_init_context(dst);
3070         if (ret < 0)
3071             return ret;
3072     }
3073
3074     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3075         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3076         if (s0->DPB[i].frame->buf[0]) {
3077             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3078             if (ret < 0)
3079                 return ret;
3080         }
3081     }
3082
3083     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3084         av_buffer_unref(&s->vps_list[i]);
3085         if (s0->vps_list[i]) {
3086             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3087             if (!s->vps_list[i])
3088                 return AVERROR(ENOMEM);
3089         }
3090     }
3091
3092     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3093         av_buffer_unref(&s->sps_list[i]);
3094         if (s0->sps_list[i]) {
3095             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3096             if (!s->sps_list[i])
3097                 return AVERROR(ENOMEM);
3098         }
3099     }
3100
3101     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3102         av_buffer_unref(&s->pps_list[i]);
3103         if (s0->pps_list[i]) {
3104             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3105             if (!s->pps_list[i])
3106                 return AVERROR(ENOMEM);
3107         }
3108     }
3109
3110     if (s->sps != s0->sps)
3111         ret = set_sps(s, s0->sps);
3112
3113     s->seq_decode = s0->seq_decode;
3114     s->seq_output = s0->seq_output;
3115     s->pocTid0    = s0->pocTid0;
3116     s->max_ra     = s0->max_ra;
3117
3118     s->is_nalff        = s0->is_nalff;
3119     s->nal_length_size = s0->nal_length_size;
3120
3121     if (s0->eos) {
3122         s->seq_decode = (s->seq_decode + 1) & 0xff;
3123         s->max_ra = INT_MAX;
3124     }
3125
3126     return 0;
3127 }
3128
3129 static int hevc_decode_extradata(HEVCContext *s)
3130 {
3131     AVCodecContext *avctx = s->avctx;
3132     GetByteContext gb;
3133     int ret;
3134
3135     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3136
3137     if (avctx->extradata_size > 3 &&
3138         (avctx->extradata[0] || avctx->extradata[1] ||
3139          avctx->extradata[2] > 1)) {
3140         /* It seems the extradata is encoded as hvcC format.
3141          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3142          * is finalized. When finalized, configurationVersion will be 1 and we
3143          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3144         int i, j, num_arrays, nal_len_size;
3145
3146         s->is_nalff = 1;
3147
3148         bytestream2_skip(&gb, 21);
3149         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3150         num_arrays   = bytestream2_get_byte(&gb);
3151
3152         /* nal units in the hvcC always have length coded with 2 bytes,
3153          * so put a fake nal_length_size = 2 while parsing them */
3154         s->nal_length_size = 2;
3155
3156         /* Decode nal units from hvcC. */
3157         for (i = 0; i < num_arrays; i++) {
3158             int type = bytestream2_get_byte(&gb) & 0x3f;
3159             int cnt  = bytestream2_get_be16(&gb);
3160
3161             for (j = 0; j < cnt; j++) {
3162                 // +2 for the nal size field
3163                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3164                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3165                     av_log(s->avctx, AV_LOG_ERROR,
3166                            "Invalid NAL unit size in extradata.\n");
3167                     return AVERROR_INVALIDDATA;
3168                 }
3169
3170                 ret = decode_nal_units(s, gb.buffer, nalsize);
3171                 if (ret < 0) {
3172                     av_log(avctx, AV_LOG_ERROR,
3173                            "Decoding nal unit %d %d from hvcC failed\n",
3174                            type, i);
3175                     return ret;
3176                 }
3177                 bytestream2_skip(&gb, nalsize);
3178             }
3179         }
3180
3181         /* Now store right nal length size, that will be used to parse
3182          * all other nals */
3183         s->nal_length_size = nal_len_size;
3184     } else {
3185         s->is_nalff = 0;
3186         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3187         if (ret < 0)
3188             return ret;
3189     }
3190     return 0;
3191 }
3192
3193 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3194 {
3195     HEVCContext *s = avctx->priv_data;
3196     int ret;
3197
3198     ff_init_cabac_states();
3199
3200     avctx->internal->allocate_progress = 1;
3201
3202     ret = hevc_init_context(avctx);
3203     if (ret < 0)
3204         return ret;
3205
3206     if (avctx->extradata_size > 0 && avctx->extradata) {
3207         ret = hevc_decode_extradata(s);
3208         if (ret < 0) {
3209             hevc_decode_free(avctx);
3210             return ret;
3211         }
3212     }
3213
3214     return 0;
3215 }
3216
3217 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3218 {
3219     HEVCContext *s = avctx->priv_data;
3220     int ret;
3221
3222     memset(s, 0, sizeof(*s));
3223
3224     ret = hevc_init_context(avctx);
3225     if (ret < 0)
3226         return ret;
3227
3228     return 0;
3229 }
3230
3231 static void hevc_decode_flush(AVCodecContext *avctx)
3232 {
3233     HEVCContext *s = avctx->priv_data;
3234     ff_hevc_flush_dpb(s);
3235     s->max_ra = INT_MAX;
3236 }
3237
3238 #define OFFSET(x) offsetof(HEVCContext, x)
3239 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3240
3241 static const AVProfile profiles[] = {
3242     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3243     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3244     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3245     { FF_PROFILE_UNKNOWN },
3246 };
3247
3248 static const AVOption options[] = {
3249     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3250         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3251     { NULL },
3252 };
3253
3254 static const AVClass hevc_decoder_class = {
3255     .class_name = "HEVC decoder",
3256     .item_name  = av_default_item_name,
3257     .option     = options,
3258     .version    = LIBAVUTIL_VERSION_INT,
3259 };
3260
3261 AVCodec ff_hevc_decoder = {
3262     .name                  = "hevc",
3263     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3264     .type                  = AVMEDIA_TYPE_VIDEO,
3265     .id                    = AV_CODEC_ID_HEVC,
3266     .priv_data_size        = sizeof(HEVCContext),
3267     .priv_class            = &hevc_decoder_class,
3268     .init                  = hevc_decode_init,
3269     .close                 = hevc_decode_free,
3270     .decode                = hevc_decode_frame,
3271     .flush                 = hevc_decode_flush,
3272     .update_thread_context = hevc_update_thread_context,
3273     .init_thread_copy      = hevc_init_thread_copy,
3274     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3275                              CODEC_CAP_FRAME_THREADS,
3276     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3277 };