]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c
lavc: Remove old vaapi decode infrastructure
[ffmpeg] / libavcodec / hevc.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40 #include "profiles.h"
41
42 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
43 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
44 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
45
46 static const uint8_t scan_1x1[1] = { 0 };
47
48 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
49
50 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
51
52 static const uint8_t horiz_scan4x4_x[16] = {
53     0, 1, 2, 3,
54     0, 1, 2, 3,
55     0, 1, 2, 3,
56     0, 1, 2, 3,
57 };
58
59 static const uint8_t horiz_scan4x4_y[16] = {
60     0, 0, 0, 0,
61     1, 1, 1, 1,
62     2, 2, 2, 2,
63     3, 3, 3, 3,
64 };
65
66 static const uint8_t horiz_scan8x8_inv[8][8] = {
67     {  0,  1,  2,  3, 16, 17, 18, 19, },
68     {  4,  5,  6,  7, 20, 21, 22, 23, },
69     {  8,  9, 10, 11, 24, 25, 26, 27, },
70     { 12, 13, 14, 15, 28, 29, 30, 31, },
71     { 32, 33, 34, 35, 48, 49, 50, 51, },
72     { 36, 37, 38, 39, 52, 53, 54, 55, },
73     { 40, 41, 42, 43, 56, 57, 58, 59, },
74     { 44, 45, 46, 47, 60, 61, 62, 63, },
75 };
76
77 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
78
79 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
80
81 static const uint8_t diag_scan2x2_inv[2][2] = {
82     { 0, 2, },
83     { 1, 3, },
84 };
85
86 static const uint8_t diag_scan4x4_inv[4][4] = {
87     { 0,  2,  5,  9, },
88     { 1,  4,  8, 12, },
89     { 3,  7, 11, 14, },
90     { 6, 10, 13, 15, },
91 };
92
93 static const uint8_t diag_scan8x8_inv[8][8] = {
94     {  0,  2,  5,  9, 14, 20, 27, 35, },
95     {  1,  4,  8, 13, 19, 26, 34, 42, },
96     {  3,  7, 12, 18, 25, 33, 41, 48, },
97     {  6, 11, 17, 24, 32, 40, 47, 53, },
98     { 10, 16, 23, 31, 39, 46, 52, 57, },
99     { 15, 22, 30, 38, 45, 51, 56, 60, },
100     { 21, 29, 37, 44, 50, 55, 59, 62, },
101     { 28, 36, 43, 49, 54, 58, 61, 63, },
102 };
103
104 /**
105  * NOTE: Each function hls_foo correspond to the function foo in the
106  * specification (HLS stands for High Level Syntax).
107  */
108
109 /**
110  * Section 5.7
111  */
112
113 /* free everything allocated  by pic_arrays_init() */
114 static void pic_arrays_free(HEVCContext *s)
115 {
116     av_freep(&s->sao);
117     av_freep(&s->deblock);
118
119     av_freep(&s->skip_flag);
120     av_freep(&s->tab_ct_depth);
121
122     av_freep(&s->tab_ipm);
123     av_freep(&s->cbf_luma);
124     av_freep(&s->is_pcm);
125
126     av_freep(&s->qp_y_tab);
127     av_freep(&s->tab_slice_address);
128     av_freep(&s->filter_slice_edges);
129
130     av_freep(&s->horizontal_bs);
131     av_freep(&s->vertical_bs);
132
133     av_buffer_pool_uninit(&s->tab_mvf_pool);
134     av_buffer_pool_uninit(&s->rpl_tab_pool);
135 }
136
137 /* allocate arrays that depend on frame dimensions */
138 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
139 {
140     int log2_min_cb_size = sps->log2_min_cb_size;
141     int width            = sps->width;
142     int height           = sps->height;
143     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
144                            ((height >> log2_min_cb_size) + 1);
145     int ctb_count        = sps->ctb_width * sps->ctb_height;
146     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
147
148     s->bs_width  = width  >> 3;
149     s->bs_height = height >> 3;
150
151     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
152     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
153     if (!s->sao || !s->deblock)
154         goto fail;
155
156     s->skip_flag    = av_malloc(pic_size_in_ctb);
157     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
158     if (!s->skip_flag || !s->tab_ct_depth)
159         goto fail;
160
161     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
162     s->tab_ipm  = av_mallocz(min_pu_size);
163     s->is_pcm   = av_malloc(min_pu_size);
164     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
165         goto fail;
166
167     s->filter_slice_edges = av_malloc(ctb_count);
168     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
169                                       sizeof(*s->tab_slice_address));
170     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
171                                       sizeof(*s->qp_y_tab));
172     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
173         goto fail;
174
175     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
176     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
177     if (!s->horizontal_bs || !s->vertical_bs)
178         goto fail;
179
180     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
181                                           av_buffer_alloc);
182     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
183                                           av_buffer_allocz);
184     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
185         goto fail;
186
187     return 0;
188
189 fail:
190     pic_arrays_free(s);
191     return AVERROR(ENOMEM);
192 }
193
194 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
195 {
196     int i = 0;
197     int j = 0;
198     uint8_t luma_weight_l0_flag[16];
199     uint8_t chroma_weight_l0_flag[16];
200     uint8_t luma_weight_l1_flag[16];
201     uint8_t chroma_weight_l1_flag[16];
202
203     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
204     if (s->ps.sps->chroma_format_idc != 0) {
205         int delta = get_se_golomb(gb);
206         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
207     }
208
209     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
210         luma_weight_l0_flag[i] = get_bits1(gb);
211         if (!luma_weight_l0_flag[i]) {
212             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
213             s->sh.luma_offset_l0[i] = 0;
214         }
215     }
216     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
217         for (i = 0; i < s->sh.nb_refs[L0]; i++)
218             chroma_weight_l0_flag[i] = get_bits1(gb);
219     } else {
220         for (i = 0; i < s->sh.nb_refs[L0]; i++)
221             chroma_weight_l0_flag[i] = 0;
222     }
223     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
224         if (luma_weight_l0_flag[i]) {
225             int delta_luma_weight_l0 = get_se_golomb(gb);
226             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
227             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
228         }
229         if (chroma_weight_l0_flag[i]) {
230             for (j = 0; j < 2; j++) {
231                 int delta_chroma_weight_l0 = get_se_golomb(gb);
232                 int delta_chroma_offset_l0 = get_se_golomb(gb);
233                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
234                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
235                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
236             }
237         } else {
238             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
239             s->sh.chroma_offset_l0[i][0] = 0;
240             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
241             s->sh.chroma_offset_l0[i][1] = 0;
242         }
243     }
244     if (s->sh.slice_type == B_SLICE) {
245         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
246             luma_weight_l1_flag[i] = get_bits1(gb);
247             if (!luma_weight_l1_flag[i]) {
248                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
249                 s->sh.luma_offset_l1[i] = 0;
250             }
251         }
252         if (s->ps.sps->chroma_format_idc != 0) {
253             for (i = 0; i < s->sh.nb_refs[L1]; i++)
254                 chroma_weight_l1_flag[i] = get_bits1(gb);
255         } else {
256             for (i = 0; i < s->sh.nb_refs[L1]; i++)
257                 chroma_weight_l1_flag[i] = 0;
258         }
259         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
260             if (luma_weight_l1_flag[i]) {
261                 int delta_luma_weight_l1 = get_se_golomb(gb);
262                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
263                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
264             }
265             if (chroma_weight_l1_flag[i]) {
266                 for (j = 0; j < 2; j++) {
267                     int delta_chroma_weight_l1 = get_se_golomb(gb);
268                     int delta_chroma_offset_l1 = get_se_golomb(gb);
269                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
270                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
271                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
272                 }
273             } else {
274                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
275                 s->sh.chroma_offset_l1[i][0] = 0;
276                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
277                 s->sh.chroma_offset_l1[i][1] = 0;
278             }
279         }
280     }
281 }
282
283 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
284 {
285     const HEVCSPS *sps = s->ps.sps;
286     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
287     int prev_delta_msb = 0;
288     unsigned int nb_sps = 0, nb_sh;
289     int i;
290
291     rps->nb_refs = 0;
292     if (!sps->long_term_ref_pics_present_flag)
293         return 0;
294
295     if (sps->num_long_term_ref_pics_sps > 0)
296         nb_sps = get_ue_golomb_long(gb);
297     nb_sh = get_ue_golomb_long(gb);
298
299     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
300         return AVERROR_INVALIDDATA;
301
302     rps->nb_refs = nb_sh + nb_sps;
303
304     for (i = 0; i < rps->nb_refs; i++) {
305         uint8_t delta_poc_msb_present;
306
307         if (i < nb_sps) {
308             uint8_t lt_idx_sps = 0;
309
310             if (sps->num_long_term_ref_pics_sps > 1)
311                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
312
313             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
314             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
315         } else {
316             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
317             rps->used[i] = get_bits1(gb);
318         }
319
320         delta_poc_msb_present = get_bits1(gb);
321         if (delta_poc_msb_present) {
322             int delta = get_ue_golomb_long(gb);
323
324             if (i && i != nb_sps)
325                 delta += prev_delta_msb;
326
327             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
328             prev_delta_msb = delta;
329         }
330     }
331
332     return 0;
333 }
334
335 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
336                                  const HEVCSPS *sps)
337 {
338     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
339     unsigned int num = 0, den = 0;
340
341     avctx->pix_fmt             = sps->pix_fmt;
342     avctx->coded_width         = sps->width;
343     avctx->coded_height        = sps->height;
344     avctx->width               = sps->output_width;
345     avctx->height              = sps->output_height;
346     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
347     avctx->profile             = sps->ptl.general_ptl.profile_idc;
348     avctx->level               = sps->ptl.general_ptl.level_idc;
349
350     ff_set_sar(avctx, sps->vui.sar);
351
352     if (sps->vui.video_signal_type_present_flag)
353         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
354                                                             : AVCOL_RANGE_MPEG;
355     else
356         avctx->color_range = AVCOL_RANGE_MPEG;
357
358     if (sps->vui.colour_description_present_flag) {
359         avctx->color_primaries = sps->vui.colour_primaries;
360         avctx->color_trc       = sps->vui.transfer_characteristic;
361         avctx->colorspace      = sps->vui.matrix_coeffs;
362     } else {
363         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
364         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
365         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
366     }
367
368     if (vps->vps_timing_info_present_flag) {
369         num = vps->vps_num_units_in_tick;
370         den = vps->vps_time_scale;
371     } else if (sps->vui.vui_timing_info_present_flag) {
372         num = sps->vui.vui_num_units_in_tick;
373         den = sps->vui.vui_time_scale;
374     }
375
376     if (num != 0 && den != 0)
377         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
378                   num, den, 1 << 30);
379 }
380
381 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
382 {
383     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
384     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
385     int ret;
386
387     pic_arrays_free(s);
388     s->ps.sps = NULL;
389     s->ps.vps = NULL;
390
391     if (!sps)
392         return 0;
393
394     ret = pic_arrays_init(s, sps);
395     if (ret < 0)
396         goto fail;
397
398     export_stream_params(s->avctx, &s->ps, sps);
399
400     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P ||
401         sps->pix_fmt == AV_PIX_FMT_YUV420P10) {
402 #if CONFIG_HEVC_DXVA2_HWACCEL
403         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
404 #endif
405     }
406     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
407 #if CONFIG_HEVC_D3D11VA_HWACCEL
408         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
409 #endif
410 #if CONFIG_HEVC_VDPAU_HWACCEL
411         *fmt++ = AV_PIX_FMT_VDPAU;
412 #endif
413     }
414
415     *fmt++ = sps->pix_fmt;
416     *fmt = AV_PIX_FMT_NONE;
417
418     ret = ff_get_format(s->avctx, pix_fmts);
419     if (ret < 0)
420         goto fail;
421     s->avctx->pix_fmt = ret;
422
423     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
424     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
425     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
426
427     if (sps->sao_enabled && !s->avctx->hwaccel) {
428         av_frame_unref(s->tmp_frame);
429         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
430         if (ret < 0)
431             goto fail;
432         s->frame = s->tmp_frame;
433     }
434
435     s->ps.sps = sps;
436     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
437
438     return 0;
439
440 fail:
441     pic_arrays_free(s);
442     s->ps.sps = NULL;
443     return ret;
444 }
445
446 static int hls_slice_header(HEVCContext *s)
447 {
448     GetBitContext *gb = &s->HEVClc.gb;
449     SliceHeader *sh   = &s->sh;
450     int i, ret;
451
452     // Coded parameters
453     sh->first_slice_in_pic_flag = get_bits1(gb);
454     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
455         s->seq_decode = (s->seq_decode + 1) & 0xff;
456         s->max_ra     = INT_MAX;
457         if (IS_IDR(s))
458             ff_hevc_clear_refs(s);
459     }
460     if (IS_IRAP(s))
461         sh->no_output_of_prior_pics_flag = get_bits1(gb);
462
463     sh->pps_id = get_ue_golomb_long(gb);
464     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
465         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
466         return AVERROR_INVALIDDATA;
467     }
468     if (!sh->first_slice_in_pic_flag &&
469         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
470         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
471         return AVERROR_INVALIDDATA;
472     }
473     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
474
475     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
476         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
477
478         ff_hevc_clear_refs(s);
479         ret = set_sps(s, s->ps.sps);
480         if (ret < 0)
481             return ret;
482
483         s->seq_decode = (s->seq_decode + 1) & 0xff;
484         s->max_ra     = INT_MAX;
485     }
486
487     sh->dependent_slice_segment_flag = 0;
488     if (!sh->first_slice_in_pic_flag) {
489         int slice_address_length;
490
491         if (s->ps.pps->dependent_slice_segments_enabled_flag)
492             sh->dependent_slice_segment_flag = get_bits1(gb);
493
494         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
495                                             s->ps.sps->ctb_height);
496         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
497         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
498             av_log(s->avctx, AV_LOG_ERROR,
499                    "Invalid slice segment address: %u.\n",
500                    sh->slice_segment_addr);
501             return AVERROR_INVALIDDATA;
502         }
503
504         if (!sh->dependent_slice_segment_flag) {
505             sh->slice_addr = sh->slice_segment_addr;
506             s->slice_idx++;
507         }
508     } else {
509         sh->slice_segment_addr = sh->slice_addr = 0;
510         s->slice_idx           = 0;
511         s->slice_initialized   = 0;
512     }
513
514     if (!sh->dependent_slice_segment_flag) {
515         s->slice_initialized = 0;
516
517         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
518             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
519
520         sh->slice_type = get_ue_golomb_long(gb);
521         if (!(sh->slice_type == I_SLICE ||
522               sh->slice_type == P_SLICE ||
523               sh->slice_type == B_SLICE)) {
524             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
525                    sh->slice_type);
526             return AVERROR_INVALIDDATA;
527         }
528         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
529             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
530             return AVERROR_INVALIDDATA;
531         }
532
533         // when flag is not present, picture is inferred to be output
534         sh->pic_output_flag = 1;
535         if (s->ps.pps->output_flag_present_flag)
536             sh->pic_output_flag = get_bits1(gb);
537
538         if (s->ps.sps->separate_colour_plane_flag)
539             sh->colour_plane_id = get_bits(gb, 2);
540
541         if (!IS_IDR(s)) {
542             int poc, pos;
543
544             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
545             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
546             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
547                 av_log(s->avctx, AV_LOG_WARNING,
548                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
549                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
550                     return AVERROR_INVALIDDATA;
551                 poc = s->poc;
552             }
553             s->poc = poc;
554
555             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
556             pos = get_bits_left(gb);
557             if (!sh->short_term_ref_pic_set_sps_flag) {
558                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
559                 if (ret < 0)
560                     return ret;
561
562                 sh->short_term_rps = &sh->slice_rps;
563             } else {
564                 int numbits, rps_idx;
565
566                 if (!s->ps.sps->nb_st_rps) {
567                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
568                     return AVERROR_INVALIDDATA;
569                 }
570
571                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
572                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
573                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
574             }
575             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
576
577             pos = get_bits_left(gb);
578             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
579             if (ret < 0) {
580                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
581                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
582                     return AVERROR_INVALIDDATA;
583             }
584             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
585
586             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
587                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
588             else
589                 sh->slice_temporal_mvp_enabled_flag = 0;
590         } else {
591             s->sh.short_term_rps = NULL;
592             s->poc               = 0;
593         }
594
595         /* 8.3.1 */
596         if (s->temporal_id == 0 &&
597             s->nal_unit_type != NAL_TRAIL_N &&
598             s->nal_unit_type != NAL_TSA_N   &&
599             s->nal_unit_type != NAL_STSA_N  &&
600             s->nal_unit_type != NAL_RADL_N  &&
601             s->nal_unit_type != NAL_RADL_R  &&
602             s->nal_unit_type != NAL_RASL_N  &&
603             s->nal_unit_type != NAL_RASL_R)
604             s->pocTid0 = s->poc;
605
606         if (s->ps.sps->sao_enabled) {
607             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
608             sh->slice_sample_adaptive_offset_flag[1] =
609             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
610         } else {
611             sh->slice_sample_adaptive_offset_flag[0] = 0;
612             sh->slice_sample_adaptive_offset_flag[1] = 0;
613             sh->slice_sample_adaptive_offset_flag[2] = 0;
614         }
615
616         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
617         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
618             int nb_refs;
619
620             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
621             if (sh->slice_type == B_SLICE)
622                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
623
624             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
625                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
626                 if (sh->slice_type == B_SLICE)
627                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
628             }
629             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
630                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
631                        sh->nb_refs[L0], sh->nb_refs[L1]);
632                 return AVERROR_INVALIDDATA;
633             }
634
635             sh->rpl_modification_flag[0] = 0;
636             sh->rpl_modification_flag[1] = 0;
637             nb_refs = ff_hevc_frame_nb_refs(s);
638             if (!nb_refs) {
639                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
640                 return AVERROR_INVALIDDATA;
641             }
642
643             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
644                 sh->rpl_modification_flag[0] = get_bits1(gb);
645                 if (sh->rpl_modification_flag[0]) {
646                     for (i = 0; i < sh->nb_refs[L0]; i++)
647                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
648                 }
649
650                 if (sh->slice_type == B_SLICE) {
651                     sh->rpl_modification_flag[1] = get_bits1(gb);
652                     if (sh->rpl_modification_flag[1] == 1)
653                         for (i = 0; i < sh->nb_refs[L1]; i++)
654                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
655                 }
656             }
657
658             if (sh->slice_type == B_SLICE)
659                 sh->mvd_l1_zero_flag = get_bits1(gb);
660
661             if (s->ps.pps->cabac_init_present_flag)
662                 sh->cabac_init_flag = get_bits1(gb);
663             else
664                 sh->cabac_init_flag = 0;
665
666             sh->collocated_ref_idx = 0;
667             if (sh->slice_temporal_mvp_enabled_flag) {
668                 sh->collocated_list = L0;
669                 if (sh->slice_type == B_SLICE)
670                     sh->collocated_list = !get_bits1(gb);
671
672                 if (sh->nb_refs[sh->collocated_list] > 1) {
673                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
674                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
675                         av_log(s->avctx, AV_LOG_ERROR,
676                                "Invalid collocated_ref_idx: %d.\n",
677                                sh->collocated_ref_idx);
678                         return AVERROR_INVALIDDATA;
679                     }
680                 }
681             }
682
683             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
684                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
685                 pred_weight_table(s, gb);
686             }
687
688             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
689             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
690                 av_log(s->avctx, AV_LOG_ERROR,
691                        "Invalid number of merging MVP candidates: %d.\n",
692                        sh->max_num_merge_cand);
693                 return AVERROR_INVALIDDATA;
694             }
695         }
696
697         sh->slice_qp_delta = get_se_golomb(gb);
698
699         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
700             sh->slice_cb_qp_offset = get_se_golomb(gb);
701             sh->slice_cr_qp_offset = get_se_golomb(gb);
702         } else {
703             sh->slice_cb_qp_offset = 0;
704             sh->slice_cr_qp_offset = 0;
705         }
706
707         if (s->ps.pps->deblocking_filter_control_present_flag) {
708             int deblocking_filter_override_flag = 0;
709
710             if (s->ps.pps->deblocking_filter_override_enabled_flag)
711                 deblocking_filter_override_flag = get_bits1(gb);
712
713             if (deblocking_filter_override_flag) {
714                 sh->disable_deblocking_filter_flag = get_bits1(gb);
715                 if (!sh->disable_deblocking_filter_flag) {
716                     sh->beta_offset = get_se_golomb(gb) * 2;
717                     sh->tc_offset   = get_se_golomb(gb) * 2;
718                 }
719             } else {
720                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
721                 sh->beta_offset                    = s->ps.pps->beta_offset;
722                 sh->tc_offset                      = s->ps.pps->tc_offset;
723             }
724         } else {
725             sh->disable_deblocking_filter_flag = 0;
726             sh->beta_offset                    = 0;
727             sh->tc_offset                      = 0;
728         }
729
730         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
731             (sh->slice_sample_adaptive_offset_flag[0] ||
732              sh->slice_sample_adaptive_offset_flag[1] ||
733              !sh->disable_deblocking_filter_flag)) {
734             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
735         } else {
736             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
737         }
738     } else if (!s->slice_initialized) {
739         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
740         return AVERROR_INVALIDDATA;
741     }
742
743     sh->num_entry_point_offsets = 0;
744     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
745         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
746         if (sh->num_entry_point_offsets > 0) {
747             int offset_len = get_ue_golomb_long(gb) + 1;
748
749             for (i = 0; i < sh->num_entry_point_offsets; i++)
750                 skip_bits(gb, offset_len);
751         }
752     }
753
754     if (s->ps.pps->slice_header_extension_present_flag) {
755         unsigned int length = get_ue_golomb_long(gb);
756         for (i = 0; i < length; i++)
757             skip_bits(gb, 8);  // slice_header_extension_data_byte
758     }
759
760     // Inferred parameters
761     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
762     if (sh->slice_qp > 51 ||
763         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
764         av_log(s->avctx, AV_LOG_ERROR,
765                "The slice_qp %d is outside the valid range "
766                "[%d, 51].\n",
767                sh->slice_qp,
768                -s->ps.sps->qp_bd_offset);
769         return AVERROR_INVALIDDATA;
770     }
771
772     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
773
774     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
775         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
776         return AVERROR_INVALIDDATA;
777     }
778
779     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
780
781     if (!s->ps.pps->cu_qp_delta_enabled_flag)
782         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
783                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
784
785     s->slice_initialized = 1;
786
787     return 0;
788 }
789
790 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
791
792 #define SET_SAO(elem, value)                            \
793 do {                                                    \
794     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
795         sao->elem = value;                              \
796     else if (sao_merge_left_flag)                       \
797         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
798     else if (sao_merge_up_flag)                         \
799         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
800     else                                                \
801         sao->elem = 0;                                  \
802 } while (0)
803
804 static void hls_sao_param(HEVCContext *s, int rx, int ry)
805 {
806     HEVCLocalContext *lc    = &s->HEVClc;
807     int sao_merge_left_flag = 0;
808     int sao_merge_up_flag   = 0;
809     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
810     SAOParams *sao          = &CTB(s->sao, rx, ry);
811     int c_idx, i;
812
813     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
814         s->sh.slice_sample_adaptive_offset_flag[1]) {
815         if (rx > 0) {
816             if (lc->ctb_left_flag)
817                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
818         }
819         if (ry > 0 && !sao_merge_left_flag) {
820             if (lc->ctb_up_flag)
821                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
822         }
823     }
824
825     for (c_idx = 0; c_idx < 3; c_idx++) {
826         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
827             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
828             continue;
829         }
830
831         if (c_idx == 2) {
832             sao->type_idx[2] = sao->type_idx[1];
833             sao->eo_class[2] = sao->eo_class[1];
834         } else {
835             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
836         }
837
838         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
839             continue;
840
841         for (i = 0; i < 4; i++)
842             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
843
844         if (sao->type_idx[c_idx] == SAO_BAND) {
845             for (i = 0; i < 4; i++) {
846                 if (sao->offset_abs[c_idx][i]) {
847                     SET_SAO(offset_sign[c_idx][i],
848                             ff_hevc_sao_offset_sign_decode(s));
849                 } else {
850                     sao->offset_sign[c_idx][i] = 0;
851                 }
852             }
853             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
854         } else if (c_idx != 2) {
855             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
856         }
857
858         // Inferred parameters
859         sao->offset_val[c_idx][0] = 0;
860         for (i = 0; i < 4; i++) {
861             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
862             if (sao->type_idx[c_idx] == SAO_EDGE) {
863                 if (i > 1)
864                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
865             } else if (sao->offset_sign[c_idx][i]) {
866                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
867             }
868         }
869     }
870 }
871
872 #undef SET_SAO
873 #undef CTB
874
875 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
876                                 int log2_trafo_size, enum ScanType scan_idx,
877                                 int c_idx)
878 {
879 #define GET_COORD(offset, n)                                    \
880     do {                                                        \
881         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
882         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
883     } while (0)
884     HEVCLocalContext *lc    = &s->HEVClc;
885     int transform_skip_flag = 0;
886
887     int last_significant_coeff_x, last_significant_coeff_y;
888     int last_scan_pos;
889     int n_end;
890     int num_coeff    = 0;
891     int greater1_ctx = 1;
892
893     int num_last_subset;
894     int x_cg_last_sig, y_cg_last_sig;
895
896     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
897
898     ptrdiff_t stride = s->frame->linesize[c_idx];
899     int hshift       = s->ps.sps->hshift[c_idx];
900     int vshift       = s->ps.sps->vshift[c_idx];
901     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
902                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
903     DECLARE_ALIGNED(32, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
904     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
905
906     int trafo_size = 1 << log2_trafo_size;
907     int i, qp, shift, add, scale, scale_m;
908     static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
909     const uint8_t *scale_matrix;
910     uint8_t dc_scale;
911
912     // Derive QP for dequant
913     if (!lc->cu.cu_transquant_bypass_flag) {
914         static const int qp_c[] = {
915             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
916         };
917
918         static const uint8_t rem6[51 + 2 * 6 + 1] = {
919             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
920             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
921             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
922         };
923
924         static const uint8_t div6[51 + 2 * 6 + 1] = {
925             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
926             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
927             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
928         };
929         int qp_y = lc->qp_y;
930
931         if (c_idx == 0) {
932             qp = qp_y + s->ps.sps->qp_bd_offset;
933         } else {
934             int qp_i, offset;
935
936             if (c_idx == 1)
937                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
938             else
939                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
940
941             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
942             if (qp_i < 30)
943                 qp = qp_i;
944             else if (qp_i > 43)
945                 qp = qp_i - 6;
946             else
947                 qp = qp_c[qp_i - 30];
948
949             qp += s->ps.sps->qp_bd_offset;
950         }
951
952         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
953         add      = 1 << (shift - 1);
954         scale    = level_scale[rem6[qp]] << (div6[qp]);
955         scale_m  = 16; // default when no custom scaling lists.
956         dc_scale = 16;
957
958         if (s->ps.sps->scaling_list_enable_flag) {
959             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
960                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
961             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
962
963             if (log2_trafo_size != 5)
964                 matrix_id = 3 * matrix_id + c_idx;
965
966             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
967             if (log2_trafo_size >= 4)
968                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
969         }
970     }
971
972     if (s->ps.pps->transform_skip_enabled_flag &&
973         !lc->cu.cu_transquant_bypass_flag   &&
974         log2_trafo_size == 2) {
975         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
976     }
977
978     last_significant_coeff_x =
979         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
980     last_significant_coeff_y =
981         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
982
983     if (last_significant_coeff_x > 3) {
984         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
985         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
986                                    (2 + (last_significant_coeff_x & 1)) +
987                                    suffix;
988     }
989
990     if (last_significant_coeff_y > 3) {
991         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
992         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
993                                    (2 + (last_significant_coeff_y & 1)) +
994                                    suffix;
995     }
996
997     if (scan_idx == SCAN_VERT)
998         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
999
1000     x_cg_last_sig = last_significant_coeff_x >> 2;
1001     y_cg_last_sig = last_significant_coeff_y >> 2;
1002
1003     switch (scan_idx) {
1004     case SCAN_DIAG: {
1005         int last_x_c = last_significant_coeff_x & 3;
1006         int last_y_c = last_significant_coeff_y & 3;
1007
1008         scan_x_off = ff_hevc_diag_scan4x4_x;
1009         scan_y_off = ff_hevc_diag_scan4x4_y;
1010         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1011         if (trafo_size == 4) {
1012             scan_x_cg = scan_1x1;
1013             scan_y_cg = scan_1x1;
1014         } else if (trafo_size == 8) {
1015             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1016             scan_x_cg  = diag_scan2x2_x;
1017             scan_y_cg  = diag_scan2x2_y;
1018         } else if (trafo_size == 16) {
1019             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1020             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1021             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1022         } else { // trafo_size == 32
1023             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1024             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1025             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1026         }
1027         break;
1028     }
1029     case SCAN_HORIZ:
1030         scan_x_cg  = horiz_scan2x2_x;
1031         scan_y_cg  = horiz_scan2x2_y;
1032         scan_x_off = horiz_scan4x4_x;
1033         scan_y_off = horiz_scan4x4_y;
1034         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1035         break;
1036     default: //SCAN_VERT
1037         scan_x_cg  = horiz_scan2x2_y;
1038         scan_y_cg  = horiz_scan2x2_x;
1039         scan_x_off = horiz_scan4x4_y;
1040         scan_y_off = horiz_scan4x4_x;
1041         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1042         break;
1043     }
1044     num_coeff++;
1045     num_last_subset = (num_coeff - 1) >> 4;
1046
1047     for (i = num_last_subset; i >= 0; i--) {
1048         int n, m;
1049         int x_cg, y_cg, x_c, y_c;
1050         int implicit_non_zero_coeff = 0;
1051         int64_t trans_coeff_level;
1052         int prev_sig = 0;
1053         int offset   = i << 4;
1054
1055         uint8_t significant_coeff_flag_idx[16];
1056         uint8_t nb_significant_coeff_flag = 0;
1057
1058         x_cg = scan_x_cg[i];
1059         y_cg = scan_y_cg[i];
1060
1061         if (i < num_last_subset && i > 0) {
1062             int ctx_cg = 0;
1063             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1064                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1065             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1066                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1067
1068             significant_coeff_group_flag[x_cg][y_cg] =
1069                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1070             implicit_non_zero_coeff = 1;
1071         } else {
1072             significant_coeff_group_flag[x_cg][y_cg] =
1073                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1074                  (x_cg == 0 && y_cg == 0));
1075         }
1076
1077         last_scan_pos = num_coeff - offset - 1;
1078
1079         if (i == num_last_subset) {
1080             n_end                         = last_scan_pos - 1;
1081             significant_coeff_flag_idx[0] = last_scan_pos;
1082             nb_significant_coeff_flag     = 1;
1083         } else {
1084             n_end = 15;
1085         }
1086
1087         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1088             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1089         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1090             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1091
1092         for (n = n_end; n >= 0; n--) {
1093             GET_COORD(offset, n);
1094
1095             if (significant_coeff_group_flag[x_cg][y_cg] &&
1096                 (n > 0 || implicit_non_zero_coeff == 0)) {
1097                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1098                                                           log2_trafo_size,
1099                                                           scan_idx,
1100                                                           prev_sig) == 1) {
1101                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1102                     nb_significant_coeff_flag++;
1103                     implicit_non_zero_coeff = 0;
1104                 }
1105             } else {
1106                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1107                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1108                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1109                     nb_significant_coeff_flag++;
1110                 }
1111             }
1112         }
1113
1114         n_end = nb_significant_coeff_flag;
1115
1116         if (n_end) {
1117             int first_nz_pos_in_cg = 16;
1118             int last_nz_pos_in_cg = -1;
1119             int c_rice_param = 0;
1120             int first_greater1_coeff_idx = -1;
1121             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1122             uint16_t coeff_sign_flag;
1123             int sum_abs = 0;
1124             int sign_hidden = 0;
1125
1126             // initialize first elem of coeff_bas_level_greater1_flag
1127             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1128
1129             if (!(i == num_last_subset) && greater1_ctx == 0)
1130                 ctx_set++;
1131             greater1_ctx      = 1;
1132             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1133
1134             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1135                 int n_idx = significant_coeff_flag_idx[m];
1136                 int inc   = (ctx_set << 2) + greater1_ctx;
1137                 coeff_abs_level_greater1_flag[n_idx] =
1138                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1139                 if (coeff_abs_level_greater1_flag[n_idx]) {
1140                     greater1_ctx = 0;
1141                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1142                     greater1_ctx++;
1143                 }
1144
1145                 if (coeff_abs_level_greater1_flag[n_idx] &&
1146                     first_greater1_coeff_idx == -1)
1147                     first_greater1_coeff_idx = n_idx;
1148             }
1149             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1150             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1151                                  !lc->cu.cu_transquant_bypass_flag;
1152
1153             if (first_greater1_coeff_idx != -1) {
1154                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1155             }
1156             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1157                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1158             } else {
1159                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1160             }
1161
1162             for (m = 0; m < n_end; m++) {
1163                 n = significant_coeff_flag_idx[m];
1164                 GET_COORD(offset, n);
1165                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1166                 if (trans_coeff_level == ((m < 8) ?
1167                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1168                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1169
1170                     trans_coeff_level += last_coeff_abs_level_remaining;
1171                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1172                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1173                 }
1174                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1175                     sum_abs += trans_coeff_level;
1176                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1177                         trans_coeff_level = -trans_coeff_level;
1178                 }
1179                 if (coeff_sign_flag >> 15)
1180                     trans_coeff_level = -trans_coeff_level;
1181                 coeff_sign_flag <<= 1;
1182                 if (!lc->cu.cu_transquant_bypass_flag) {
1183                     if (s->ps.sps->scaling_list_enable_flag) {
1184                         if (y_c || x_c || log2_trafo_size < 4) {
1185                             int pos;
1186                             switch (log2_trafo_size) {
1187                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1188                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1189                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1190                             default: pos = (y_c        << 2) +  x_c;
1191                             }
1192                             scale_m = scale_matrix[pos];
1193                         } else {
1194                             scale_m = dc_scale;
1195                         }
1196                     }
1197                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1198                     if(trans_coeff_level < 0) {
1199                         if((~trans_coeff_level) & 0xFffffffffff8000)
1200                             trans_coeff_level = -32768;
1201                     } else {
1202                         if (trans_coeff_level & 0xffffffffffff8000)
1203                             trans_coeff_level = 32767;
1204                     }
1205                 }
1206                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1207             }
1208         }
1209     }
1210
1211     if (!lc->cu.cu_transquant_bypass_flag) {
1212         if (transform_skip_flag)
1213             s->hevcdsp.dequant(coeffs);
1214         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1215                  log2_trafo_size == 2)
1216             s->hevcdsp.transform_4x4_luma(coeffs);
1217         else {
1218             int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
1219             if (max_xy == 0)
1220                 s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs);
1221             else {
1222                 int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
1223                 if (max_xy < 4)
1224                     col_limit = FFMIN(4, col_limit);
1225                 else if (max_xy < 8)
1226                     col_limit = FFMIN(8, col_limit);
1227                 else if (max_xy < 12)
1228                     col_limit = FFMIN(24, col_limit);
1229                 s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit);
1230             }
1231         }
1232     }
1233     s->hevcdsp.add_residual[log2_trafo_size - 2](dst, coeffs, stride);
1234 }
1235
1236 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1237                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1238                               int log2_cb_size, int log2_trafo_size,
1239                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1240 {
1241     HEVCLocalContext *lc = &s->HEVClc;
1242
1243     if (lc->cu.pred_mode == MODE_INTRA) {
1244         int trafo_size = 1 << log2_trafo_size;
1245         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1246
1247         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1248         if (log2_trafo_size > 2) {
1249             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1250             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1251             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1252             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1253         } else if (blk_idx == 3) {
1254             trafo_size = trafo_size << s->ps.sps->hshift[1];
1255             ff_hevc_set_neighbour_available(s, xBase, yBase,
1256                                             trafo_size, trafo_size);
1257             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1258             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1259         }
1260     }
1261
1262     if (cbf_luma || cbf_cb || cbf_cr) {
1263         int scan_idx   = SCAN_DIAG;
1264         int scan_idx_c = SCAN_DIAG;
1265
1266         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1267             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1268             if (lc->tu.cu_qp_delta != 0)
1269                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1270                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1271             lc->tu.is_cu_qp_delta_coded = 1;
1272
1273             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1274                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1275                 av_log(s->avctx, AV_LOG_ERROR,
1276                        "The cu_qp_delta %d is outside the valid range "
1277                        "[%d, %d].\n",
1278                        lc->tu.cu_qp_delta,
1279                        -(26 + s->ps.sps->qp_bd_offset / 2),
1280                         (25 + s->ps.sps->qp_bd_offset / 2));
1281                 return AVERROR_INVALIDDATA;
1282             }
1283
1284             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1285         }
1286
1287         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1288             if (lc->tu.cur_intra_pred_mode >= 6 &&
1289                 lc->tu.cur_intra_pred_mode <= 14) {
1290                 scan_idx = SCAN_VERT;
1291             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1292                        lc->tu.cur_intra_pred_mode <= 30) {
1293                 scan_idx = SCAN_HORIZ;
1294             }
1295
1296             if (lc->pu.intra_pred_mode_c >=  6 &&
1297                 lc->pu.intra_pred_mode_c <= 14) {
1298                 scan_idx_c = SCAN_VERT;
1299             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1300                        lc->pu.intra_pred_mode_c <= 30) {
1301                 scan_idx_c = SCAN_HORIZ;
1302             }
1303         }
1304
1305         if (cbf_luma)
1306             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1307         if (log2_trafo_size > 2) {
1308             if (cbf_cb)
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1310             if (cbf_cr)
1311                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1312         } else if (blk_idx == 3) {
1313             if (cbf_cb)
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1315             if (cbf_cr)
1316                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1317         }
1318     }
1319     return 0;
1320 }
1321
1322 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1323 {
1324     int cb_size          = 1 << log2_cb_size;
1325     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1326
1327     int min_pu_width     = s->ps.sps->min_pu_width;
1328     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1329     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1330     int i, j;
1331
1332     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1333         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1334             s->is_pcm[i + j * min_pu_width] = 2;
1335 }
1336
1337 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1338                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1339                               int log2_cb_size, int log2_trafo_size,
1340                               int trafo_depth, int blk_idx,
1341                               int cbf_cb, int cbf_cr)
1342 {
1343     HEVCLocalContext *lc = &s->HEVClc;
1344     uint8_t split_transform_flag;
1345     int ret;
1346
1347     if (lc->cu.intra_split_flag) {
1348         if (trafo_depth == 1)
1349             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1350     } else {
1351         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1352     }
1353
1354     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1355         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1356         trafo_depth     < lc->cu.max_trafo_depth       &&
1357         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1358         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1359     } else {
1360         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1361                           lc->cu.pred_mode == MODE_INTER &&
1362                           lc->cu.part_mode != PART_2Nx2N &&
1363                           trafo_depth == 0;
1364
1365         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1366                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1367                                inter_split;
1368     }
1369
1370     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1371         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1372     else if (log2_trafo_size > 2 || trafo_depth == 0)
1373         cbf_cb = 0;
1374     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1375         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1376     else if (log2_trafo_size > 2 || trafo_depth == 0)
1377         cbf_cr = 0;
1378
1379     if (split_transform_flag) {
1380         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1381         const int x1 = x0 + trafo_size_split;
1382         const int y1 = y0 + trafo_size_split;
1383
1384 #define SUBDIVIDE(x, y, idx)                                                    \
1385 do {                                                                            \
1386     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1387                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1388                              cbf_cb, cbf_cr);                                   \
1389     if (ret < 0)                                                                \
1390         return ret;                                                             \
1391 } while (0)
1392
1393         SUBDIVIDE(x0, y0, 0);
1394         SUBDIVIDE(x1, y0, 1);
1395         SUBDIVIDE(x0, y1, 2);
1396         SUBDIVIDE(x1, y1, 3);
1397
1398 #undef SUBDIVIDE
1399     } else {
1400         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1401         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1402         int min_tu_width     = s->ps.sps->min_tb_width;
1403         int cbf_luma         = 1;
1404
1405         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1406             cbf_cb || cbf_cr)
1407             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1408
1409         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1410                                  log2_cb_size, log2_trafo_size,
1411                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1412         if (ret < 0)
1413             return ret;
1414         // TODO: store cbf_luma somewhere else
1415         if (cbf_luma) {
1416             int i, j;
1417             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1418                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1419                     int x_tu = (x0 + j) >> log2_min_tu_size;
1420                     int y_tu = (y0 + i) >> log2_min_tu_size;
1421                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1422                 }
1423         }
1424         if (!s->sh.disable_deblocking_filter_flag) {
1425             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1426             if (s->ps.pps->transquant_bypass_enable_flag &&
1427                 lc->cu.cu_transquant_bypass_flag)
1428                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1429         }
1430     }
1431     return 0;
1432 }
1433
1434 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1435 {
1436     //TODO: non-4:2:0 support
1437     HEVCLocalContext *lc = &s->HEVClc;
1438     GetBitContext gb;
1439     int cb_size   = 1 << log2_cb_size;
1440     int stride0   = s->frame->linesize[0];
1441     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1442     int   stride1 = s->frame->linesize[1];
1443     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1444     int   stride2 = s->frame->linesize[2];
1445     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1446
1447     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1448     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1449     int ret;
1450
1451     if (!s->sh.disable_deblocking_filter_flag)
1452         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1453
1454     ret = init_get_bits(&gb, pcm, length);
1455     if (ret < 0)
1456         return ret;
1457
1458     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1459     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1460     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1461     return 0;
1462 }
1463
1464 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1465 {
1466     HEVCLocalContext *lc = &s->HEVClc;
1467     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1468     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1469
1470     if (x)
1471         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1472     if (y)
1473         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1474
1475     switch (x) {
1476     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1477     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1478     case 0: lc->pu.mvd.x = 0;                               break;
1479     }
1480
1481     switch (y) {
1482     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1483     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1484     case 0: lc->pu.mvd.y = 0;                               break;
1485     }
1486 }
1487
1488 /**
1489  * 8.5.3.2.2.1 Luma sample interpolation process
1490  *
1491  * @param s HEVC decoding context
1492  * @param dst target buffer for block data at block position
1493  * @param dststride stride of the dst buffer
1494  * @param ref reference picture buffer at origin (0, 0)
1495  * @param mv motion vector (relative to block position) to get pixel data from
1496  * @param x_off horizontal position of block from origin (0, 0)
1497  * @param y_off vertical position of block from origin (0, 0)
1498  * @param block_w width of block
1499  * @param block_h height of block
1500  */
1501 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1502                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1503                     int block_w, int block_h, int pred_idx)
1504 {
1505     HEVCLocalContext *lc = &s->HEVClc;
1506     uint8_t *src         = ref->data[0];
1507     ptrdiff_t srcstride  = ref->linesize[0];
1508     int pic_width        = s->ps.sps->width;
1509     int pic_height       = s->ps.sps->height;
1510
1511     int mx         = mv->x & 3;
1512     int my         = mv->y & 3;
1513     int extra_left = ff_hevc_qpel_extra_before[mx];
1514     int extra_top  = ff_hevc_qpel_extra_before[my];
1515
1516     x_off += mv->x >> 2;
1517     y_off += mv->y >> 2;
1518     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1519
1520     if (x_off < extra_left || y_off < extra_top ||
1521         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1522         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1523         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1524         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1525         int buf_offset = extra_top *
1526                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1527
1528         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1529                                  edge_emu_stride, srcstride,
1530                                  block_w + ff_hevc_qpel_extra[mx],
1531                                  block_h + ff_hevc_qpel_extra[my],
1532                                  x_off - extra_left, y_off - extra_top,
1533                                  pic_width, pic_height);
1534         src = lc->edge_emu_buffer + buf_offset;
1535         srcstride = edge_emu_stride;
1536     }
1537     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1538                                                    block_h, mx, my, lc->mc_buffer);
1539 }
1540
1541 /**
1542  * 8.5.3.2.2.2 Chroma sample interpolation process
1543  *
1544  * @param s HEVC decoding context
1545  * @param dst1 target buffer for block data at block position (U plane)
1546  * @param dst2 target buffer for block data at block position (V plane)
1547  * @param dststride stride of the dst1 and dst2 buffers
1548  * @param ref reference picture buffer at origin (0, 0)
1549  * @param mv motion vector (relative to block position) to get pixel data from
1550  * @param x_off horizontal position of block from origin (0, 0)
1551  * @param y_off vertical position of block from origin (0, 0)
1552  * @param block_w width of block
1553  * @param block_h height of block
1554  */
1555 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1556                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1557                       int x_off, int y_off, int block_w, int block_h, int pred_idx)
1558 {
1559     HEVCLocalContext *lc = &s->HEVClc;
1560     uint8_t *src1        = ref->data[1];
1561     uint8_t *src2        = ref->data[2];
1562     ptrdiff_t src1stride = ref->linesize[1];
1563     ptrdiff_t src2stride = ref->linesize[2];
1564     int pic_width        = s->ps.sps->width >> 1;
1565     int pic_height       = s->ps.sps->height >> 1;
1566
1567     int mx = mv->x & 7;
1568     int my = mv->y & 7;
1569
1570     x_off += mv->x >> 3;
1571     y_off += mv->y >> 3;
1572     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1573     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1574
1575     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1576         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1577         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1578         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1579         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1580         int buf_offset1 = EPEL_EXTRA_BEFORE *
1581                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1582         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1583         int buf_offset2 = EPEL_EXTRA_BEFORE *
1584                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1585
1586         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1587                                  edge_emu_stride, src1stride,
1588                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1589                                  x_off - EPEL_EXTRA_BEFORE,
1590                                  y_off - EPEL_EXTRA_BEFORE,
1591                                  pic_width, pic_height);
1592
1593         src1 = lc->edge_emu_buffer + buf_offset1;
1594         src1stride = edge_emu_stride;
1595         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1596                                                        block_h, mx, my, lc->mc_buffer);
1597
1598         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1599                                  edge_emu_stride, src2stride,
1600                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1601                                  x_off - EPEL_EXTRA_BEFORE,
1602                                  y_off - EPEL_EXTRA_BEFORE,
1603                                  pic_width, pic_height);
1604         src2 = lc->edge_emu_buffer + buf_offset2;
1605         src2stride = edge_emu_stride;
1606
1607         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1608                                                        block_h, mx, my, lc->mc_buffer);
1609     } else {
1610         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1611                                                        block_h, mx, my, lc->mc_buffer);
1612         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1613                                                        block_h, mx, my, lc->mc_buffer);
1614     }
1615 }
1616
1617 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1618                                 const Mv *mv, int y0, int height)
1619 {
1620     int y = (mv->y >> 2) + y0 + height + 9;
1621     ff_thread_await_progress(&ref->tf, y, 0);
1622 }
1623
1624 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1625                                   int nPbH, int log2_cb_size, int part_idx,
1626                                   int merge_idx, MvField *mv)
1627 {
1628     HEVCLocalContext *lc             = &s->HEVClc;
1629     enum InterPredIdc inter_pred_idc = PRED_L0;
1630     int mvp_flag;
1631
1632     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1633     if (s->sh.slice_type == B_SLICE)
1634         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1635
1636     if (inter_pred_idc != PRED_L1) {
1637         if (s->sh.nb_refs[L0])
1638             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1639
1640         mv->pred_flag[0] = 1;
1641         hls_mvd_coding(s, x0, y0, 0);
1642         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1643         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1644                                  part_idx, merge_idx, mv, mvp_flag, 0);
1645         mv->mv[0].x += lc->pu.mvd.x;
1646         mv->mv[0].y += lc->pu.mvd.y;
1647     }
1648
1649     if (inter_pred_idc != PRED_L0) {
1650         if (s->sh.nb_refs[L1])
1651             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1652
1653         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1654             AV_ZERO32(&lc->pu.mvd);
1655         } else {
1656             hls_mvd_coding(s, x0, y0, 1);
1657         }
1658
1659         mv->pred_flag[1] = 1;
1660         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1661         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1662                                  part_idx, merge_idx, mv, mvp_flag, 1);
1663         mv->mv[1].x += lc->pu.mvd.x;
1664         mv->mv[1].y += lc->pu.mvd.y;
1665     }
1666 }
1667
1668 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1669                                 int nPbW, int nPbH,
1670                                 int log2_cb_size, int partIdx)
1671 {
1672     static const int pred_indices[] = {
1673         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1674     };
1675     const int pred_idx = pred_indices[nPbW];
1676
1677 #define POS(c_idx, x, y)                                                              \
1678     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1679                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1680     HEVCLocalContext *lc = &s->HEVClc;
1681     int merge_idx = 0;
1682     struct MvField current_mv = {{{ 0 }}};
1683
1684     int min_pu_width = s->ps.sps->min_pu_width;
1685
1686     MvField *tab_mvf = s->ref->tab_mvf;
1687     RefPicList  *refPicList = s->ref->refPicList;
1688     HEVCFrame *ref0, *ref1;
1689
1690     int tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1691
1692     uint8_t *dst0 = POS(0, x0, y0);
1693     uint8_t *dst1 = POS(1, x0, y0);
1694     uint8_t *dst2 = POS(2, x0, y0);
1695     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1696     int min_cb_width     = s->ps.sps->min_cb_width;
1697     int x_cb             = x0 >> log2_min_cb_size;
1698     int y_cb             = y0 >> log2_min_cb_size;
1699     int x_pu, y_pu;
1700     int i, j;
1701
1702     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1703
1704     if (!skip_flag)
1705         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1706
1707     if (skip_flag || lc->pu.merge_flag) {
1708         if (s->sh.max_num_merge_cand > 1)
1709             merge_idx = ff_hevc_merge_idx_decode(s);
1710         else
1711             merge_idx = 0;
1712
1713         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1714                                    partIdx, merge_idx, &current_mv);
1715     } else {
1716         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1717                               partIdx, merge_idx, &current_mv);
1718     }
1719
1720     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1721     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1722
1723     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1724         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1725             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1726
1727     if (current_mv.pred_flag[0]) {
1728         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1729         if (!ref0)
1730             return;
1731         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1732     }
1733     if (current_mv.pred_flag[1]) {
1734         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1735         if (!ref1)
1736             return;
1737         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1738     }
1739
1740     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1741         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1742         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1743
1744         luma_mc(s, tmp, tmpstride, ref0->frame,
1745                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1746
1747         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1748             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1749             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1750                                                s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1751                                                s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1752                                                dst0, s->frame->linesize[0], tmp,
1753                                                tmpstride, nPbH);
1754         } else {
1755             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1756         }
1757         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1758                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1759
1760         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1761             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1762             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1763                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1764                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1765                                                       dst1, s->frame->linesize[1], tmp, tmpstride,
1766                                                       nPbH / 2);
1767             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1768                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1769                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1770                                                       dst2, s->frame->linesize[2], tmp2, tmpstride,
1771                                                       nPbH / 2);
1772         } else {
1773             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1774             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1775         }
1776     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1777         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1778         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1779
1780         luma_mc(s, tmp, tmpstride, ref1->frame,
1781                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1782
1783         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1784             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1785             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1786                                                s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1787                                                s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1788                                                dst0, s->frame->linesize[0], tmp, tmpstride,
1789                                                nPbH);
1790         } else {
1791             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1792         }
1793
1794         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1795                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1796
1797         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1798             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1799             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1800                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1801                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1802                                                       dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1803             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1804                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1805                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1806                                                       dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1807         } else {
1808             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1809             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1810         }
1811     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1812         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1813         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1814         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1815         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1816
1817         luma_mc(s, tmp, tmpstride, ref0->frame,
1818                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1819         luma_mc(s, tmp2, tmpstride, ref1->frame,
1820                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1821
1822         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1823             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1824             s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1825                                                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1826                                                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1827                                                    s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1828                                                    s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1829                                                    dst0, s->frame->linesize[0],
1830                                                    tmp, tmp2, tmpstride, nPbH);
1831         } else {
1832             s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1833                                                          tmp, tmp2, tmpstride, nPbH);
1834         }
1835
1836         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1837                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1838         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1839                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1840
1841         if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1842             (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
1843             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1844                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1845                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1846                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1847                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1848                                                           dst1, s->frame->linesize[1], tmp, tmp3,
1849                                                           tmpstride, nPbH / 2);
1850             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1851                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1852                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1853                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1854                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1855                                                           dst2, s->frame->linesize[2], tmp2, tmp4,
1856                                                           tmpstride, nPbH / 2);
1857         } else {
1858             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
1859             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1860         }
1861     }
1862 }
1863
1864 /**
1865  * 8.4.1
1866  */
1867 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1868                                 int prev_intra_luma_pred_flag)
1869 {
1870     HEVCLocalContext *lc = &s->HEVClc;
1871     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1872     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1873     int min_pu_width     = s->ps.sps->min_pu_width;
1874     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1875     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1876     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1877
1878     int cand_up   = (lc->ctb_up_flag || y0b) ?
1879                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1880     int cand_left = (lc->ctb_left_flag || x0b) ?
1881                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1882
1883     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1884
1885     MvField *tab_mvf = s->ref->tab_mvf;
1886     int intra_pred_mode;
1887     int candidate[3];
1888     int i, j;
1889
1890     // intra_pred_mode prediction does not cross vertical CTB boundaries
1891     if ((y0 - 1) < y_ctb)
1892         cand_up = INTRA_DC;
1893
1894     if (cand_left == cand_up) {
1895         if (cand_left < 2) {
1896             candidate[0] = INTRA_PLANAR;
1897             candidate[1] = INTRA_DC;
1898             candidate[2] = INTRA_ANGULAR_26;
1899         } else {
1900             candidate[0] = cand_left;
1901             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1902             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1903         }
1904     } else {
1905         candidate[0] = cand_left;
1906         candidate[1] = cand_up;
1907         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1908             candidate[2] = INTRA_PLANAR;
1909         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1910             candidate[2] = INTRA_DC;
1911         } else {
1912             candidate[2] = INTRA_ANGULAR_26;
1913         }
1914     }
1915
1916     if (prev_intra_luma_pred_flag) {
1917         intra_pred_mode = candidate[lc->pu.mpm_idx];
1918     } else {
1919         if (candidate[0] > candidate[1])
1920             FFSWAP(uint8_t, candidate[0], candidate[1]);
1921         if (candidate[0] > candidate[2])
1922             FFSWAP(uint8_t, candidate[0], candidate[2]);
1923         if (candidate[1] > candidate[2])
1924             FFSWAP(uint8_t, candidate[1], candidate[2]);
1925
1926         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1927         for (i = 0; i < 3; i++)
1928             if (intra_pred_mode >= candidate[i])
1929                 intra_pred_mode++;
1930     }
1931
1932     /* write the intra prediction units into the mv array */
1933     if (!size_in_pus)
1934         size_in_pus = 1;
1935     for (i = 0; i < size_in_pus; i++) {
1936         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1937                intra_pred_mode, size_in_pus);
1938
1939         for (j = 0; j < size_in_pus; j++) {
1940             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1941             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1942             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1943             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1944             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1945             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1946             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1947             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1948             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1949         }
1950     }
1951
1952     return intra_pred_mode;
1953 }
1954
1955 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1956                                           int log2_cb_size, int ct_depth)
1957 {
1958     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1959     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1960     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1961     int y;
1962
1963     for (y = 0; y < length; y++)
1964         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1965                ct_depth, length);
1966 }
1967
1968 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1969                                   int log2_cb_size)
1970 {
1971     HEVCLocalContext *lc = &s->HEVClc;
1972     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1973     uint8_t prev_intra_luma_pred_flag[4];
1974     int split   = lc->cu.part_mode == PART_NxN;
1975     int pb_size = (1 << log2_cb_size) >> split;
1976     int side    = split + 1;
1977     int chroma_mode;
1978     int i, j;
1979
1980     for (i = 0; i < side; i++)
1981         for (j = 0; j < side; j++)
1982             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1983
1984     for (i = 0; i < side; i++) {
1985         for (j = 0; j < side; j++) {
1986             if (prev_intra_luma_pred_flag[2 * i + j])
1987                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1988             else
1989                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1990
1991             lc->pu.intra_pred_mode[2 * i + j] =
1992                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1993                                      prev_intra_luma_pred_flag[2 * i + j]);
1994         }
1995     }
1996
1997     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1998     if (chroma_mode != 4) {
1999         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2000             lc->pu.intra_pred_mode_c = 34;
2001         else
2002             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2003     } else {
2004         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2005     }
2006 }
2007
2008 static void intra_prediction_unit_default_value(HEVCContext *s,
2009                                                 int x0, int y0,
2010                                                 int log2_cb_size)
2011 {
2012     HEVCLocalContext *lc = &s->HEVClc;
2013     int pb_size          = 1 << log2_cb_size;
2014     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2015     int min_pu_width     = s->ps.sps->min_pu_width;
2016     MvField *tab_mvf     = s->ref->tab_mvf;
2017     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2018     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2019     int j, k;
2020
2021     if (size_in_pus == 0)
2022         size_in_pus = 1;
2023     for (j = 0; j < size_in_pus; j++) {
2024         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2025         for (k = 0; k < size_in_pus; k++)
2026             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2027     }
2028 }
2029
2030 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2031 {
2032     int cb_size          = 1 << log2_cb_size;
2033     HEVCLocalContext *lc = &s->HEVClc;
2034     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2035     int length           = cb_size >> log2_min_cb_size;
2036     int min_cb_width     = s->ps.sps->min_cb_width;
2037     int x_cb             = x0 >> log2_min_cb_size;
2038     int y_cb             = y0 >> log2_min_cb_size;
2039     int x, y, ret;
2040
2041     lc->cu.x                = x0;
2042     lc->cu.y                = y0;
2043     lc->cu.pred_mode        = MODE_INTRA;
2044     lc->cu.part_mode        = PART_2Nx2N;
2045     lc->cu.intra_split_flag = 0;
2046
2047     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2048     for (x = 0; x < 4; x++)
2049         lc->pu.intra_pred_mode[x] = 1;
2050     if (s->ps.pps->transquant_bypass_enable_flag) {
2051         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2052         if (lc->cu.cu_transquant_bypass_flag)
2053             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2054     } else
2055         lc->cu.cu_transquant_bypass_flag = 0;
2056
2057     if (s->sh.slice_type != I_SLICE) {
2058         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2059
2060         x = y_cb * min_cb_width + x_cb;
2061         for (y = 0; y < length; y++) {
2062             memset(&s->skip_flag[x], skip_flag, length);
2063             x += min_cb_width;
2064         }
2065         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2066     }
2067
2068     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2069         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2070         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2071
2072         if (!s->sh.disable_deblocking_filter_flag)
2073             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2074     } else {
2075         int pcm_flag = 0;
2076
2077         if (s->sh.slice_type != I_SLICE)
2078             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2079         if (lc->cu.pred_mode != MODE_INTRA ||
2080             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2081             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2082             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2083                                       lc->cu.pred_mode == MODE_INTRA;
2084         }
2085
2086         if (lc->cu.pred_mode == MODE_INTRA) {
2087             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2088                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2089                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2090                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2091             }
2092             if (pcm_flag) {
2093                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2094                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2095                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2096                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2097
2098                 if (ret < 0)
2099                     return ret;
2100             } else {
2101                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2102             }
2103         } else {
2104             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2105             switch (lc->cu.part_mode) {
2106             case PART_2Nx2N:
2107                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2108                 break;
2109             case PART_2NxN:
2110                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2111                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2112                 break;
2113             case PART_Nx2N:
2114                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2115                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2116                 break;
2117             case PART_2NxnU:
2118                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2119                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2120                 break;
2121             case PART_2NxnD:
2122                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2123                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2124                 break;
2125             case PART_nLx2N:
2126                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2127                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2128                 break;
2129             case PART_nRx2N:
2130                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2131                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2132                 break;
2133             case PART_NxN:
2134                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2135                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2136                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2137                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2138                 break;
2139             }
2140         }
2141
2142         if (!pcm_flag) {
2143             int rqt_root_cbf = 1;
2144
2145             if (lc->cu.pred_mode != MODE_INTRA &&
2146                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2147                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2148             }
2149             if (rqt_root_cbf) {
2150                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2151                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2152                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2153                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2154                                          log2_cb_size,
2155                                          log2_cb_size, 0, 0, 0, 0);
2156                 if (ret < 0)
2157                     return ret;
2158             } else {
2159                 if (!s->sh.disable_deblocking_filter_flag)
2160                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2161             }
2162         }
2163     }
2164
2165     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2166         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2167
2168     x = y_cb * min_cb_width + x_cb;
2169     for (y = 0; y < length; y++) {
2170         memset(&s->qp_y_tab[x], lc->qp_y, length);
2171         x += min_cb_width;
2172     }
2173
2174     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2175
2176     return 0;
2177 }
2178
2179 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2180                                int log2_cb_size, int cb_depth)
2181 {
2182     HEVCLocalContext *lc = &s->HEVClc;
2183     const int cb_size    = 1 << log2_cb_size;
2184     int split_cu;
2185
2186     lc->ct.depth = cb_depth;
2187     if (x0 + cb_size <= s->ps.sps->width  &&
2188         y0 + cb_size <= s->ps.sps->height &&
2189         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2190         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2191     } else {
2192         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2193     }
2194     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2195         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2196         lc->tu.is_cu_qp_delta_coded = 0;
2197         lc->tu.cu_qp_delta          = 0;
2198     }
2199
2200     if (split_cu) {
2201         const int cb_size_split = cb_size >> 1;
2202         const int x1 = x0 + cb_size_split;
2203         const int y1 = y0 + cb_size_split;
2204
2205         log2_cb_size--;
2206         cb_depth++;
2207
2208 #define SUBDIVIDE(x, y)                                                \
2209 do {                                                                   \
2210     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2211         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2212         if (ret < 0)                                                   \
2213             return ret;                                                \
2214     }                                                                  \
2215 } while (0)
2216
2217         SUBDIVIDE(x0, y0);
2218         SUBDIVIDE(x1, y0);
2219         SUBDIVIDE(x0, y1);
2220         SUBDIVIDE(x1, y1);
2221     } else {
2222         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2223         if (ret < 0)
2224             return ret;
2225     }
2226
2227     return 0;
2228 }
2229
2230 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2231                                  int ctb_addr_ts)
2232 {
2233     HEVCLocalContext *lc  = &s->HEVClc;
2234     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2235     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2236     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2237
2238     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2239
2240     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2241         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2242             lc->first_qp_group = 1;
2243         lc->end_of_tiles_x = s->ps.sps->width;
2244     } else if (s->ps.pps->tiles_enabled_flag) {
2245         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2246             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2247             lc->start_of_tiles_x = x_ctb;
2248             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2249             lc->first_qp_group   = 1;
2250         }
2251     } else {
2252         lc->end_of_tiles_x = s->ps.sps->width;
2253     }
2254
2255     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2256
2257     lc->boundary_flags = 0;
2258     if (s->ps.pps->tiles_enabled_flag) {
2259         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2260             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2261         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2262             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2263         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2264             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2265         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2266             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2267     } else {
2268         if (!ctb_addr_in_slice)
2269             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2270         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2271             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2272     }
2273
2274     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2275     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2276     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2277     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2278 }
2279
2280 static int hls_slice_data(HEVCContext *s)
2281 {
2282     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2283     int more_data   = 1;
2284     int x_ctb       = 0;
2285     int y_ctb       = 0;
2286     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2287     int ret;
2288
2289     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2290         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2291
2292         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2293         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2294         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2295
2296         ff_hevc_cabac_init(s, ctb_addr_ts);
2297
2298         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2299
2300         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2301         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2302         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2303
2304         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2305         if (ret < 0)
2306             return ret;
2307         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2308
2309         ctb_addr_ts++;
2310         ff_hevc_save_states(s, ctb_addr_ts);
2311         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2312     }
2313
2314     if (x_ctb + ctb_size >= s->ps.sps->width &&
2315         y_ctb + ctb_size >= s->ps.sps->height)
2316         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2317
2318     return ctb_addr_ts;
2319 }
2320
2321 static void restore_tqb_pixels(HEVCContext *s)
2322 {
2323     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2324     int x, y, c_idx;
2325
2326     for (c_idx = 0; c_idx < 3; c_idx++) {
2327         ptrdiff_t stride = s->frame->linesize[c_idx];
2328         int hshift       = s->ps.sps->hshift[c_idx];
2329         int vshift       = s->ps.sps->vshift[c_idx];
2330         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2331             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2332                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2333                     int n;
2334                     int len      = min_pu_size >> hshift;
2335                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2336                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2337                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2338                         memcpy(dst, src, len);
2339                         src += stride;
2340                         dst += stride;
2341                     }
2342                 }
2343             }
2344         }
2345     }
2346 }
2347
2348 static int set_side_data(HEVCContext *s)
2349 {
2350     AVFrame *out = s->ref->frame;
2351
2352     if (s->sei_frame_packing_present &&
2353         s->frame_packing_arrangement_type >= 3 &&
2354         s->frame_packing_arrangement_type <= 5 &&
2355         s->content_interpretation_type > 0 &&
2356         s->content_interpretation_type < 3) {
2357         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2358         if (!stereo)
2359             return AVERROR(ENOMEM);
2360
2361         switch (s->frame_packing_arrangement_type) {
2362         case 3:
2363             if (s->quincunx_subsampling)
2364                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2365             else
2366                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2367             break;
2368         case 4:
2369             stereo->type = AV_STEREO3D_TOPBOTTOM;
2370             break;
2371         case 5:
2372             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2373             break;
2374         }
2375
2376         if (s->content_interpretation_type == 2)
2377             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2378     }
2379
2380     if (s->sei_display_orientation_present &&
2381         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2382         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2383         AVFrameSideData *rotation = av_frame_new_side_data(out,
2384                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2385                                                            sizeof(int32_t) * 9);
2386         if (!rotation)
2387             return AVERROR(ENOMEM);
2388
2389         av_display_rotation_set((int32_t *)rotation->data, angle);
2390         av_display_matrix_flip((int32_t *)rotation->data,
2391                                s->sei_hflip, s->sei_vflip);
2392     }
2393
2394     return 0;
2395 }
2396
2397 static int hevc_frame_start(HEVCContext *s)
2398 {
2399     HEVCLocalContext *lc = &s->HEVClc;
2400     int ret;
2401
2402     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2403     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2404     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2405     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2406
2407     lc->start_of_tiles_x = 0;
2408     s->is_decoded        = 0;
2409     s->first_nal_type    = s->nal_unit_type;
2410
2411     if (s->ps.pps->tiles_enabled_flag)
2412         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2413
2414     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2415                               s->poc);
2416     if (ret < 0)
2417         goto fail;
2418
2419     ret = ff_hevc_frame_rps(s);
2420     if (ret < 0) {
2421         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2422         goto fail;
2423     }
2424
2425     s->ref->frame->key_frame = IS_IRAP(s);
2426
2427     ret = set_side_data(s);
2428     if (ret < 0)
2429         goto fail;
2430
2431     av_frame_unref(s->output_frame);
2432     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2433     if (ret < 0)
2434         goto fail;
2435
2436     ff_thread_finish_setup(s->avctx);
2437
2438     return 0;
2439
2440 fail:
2441     if (s->ref)
2442         ff_hevc_unref_frame(s, s->ref, ~0);
2443     s->ref = NULL;
2444     return ret;
2445 }
2446
2447 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2448 {
2449     HEVCLocalContext *lc = &s->HEVClc;
2450     GetBitContext *gb    = &lc->gb;
2451     int ctb_addr_ts, ret;
2452
2453     *gb              = nal->gb;
2454     s->nal_unit_type = nal->type;
2455     s->temporal_id   = nal->temporal_id;
2456
2457     switch (s->nal_unit_type) {
2458     case NAL_VPS:
2459         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2460         if (ret < 0)
2461             goto fail;
2462         break;
2463     case NAL_SPS:
2464         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2465                                      s->apply_defdispwin);
2466         if (ret < 0)
2467             goto fail;
2468         break;
2469     case NAL_PPS:
2470         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2471         if (ret < 0)
2472             goto fail;
2473         break;
2474     case NAL_SEI_PREFIX:
2475     case NAL_SEI_SUFFIX:
2476         ret = ff_hevc_decode_nal_sei(s);
2477         if (ret < 0)
2478             goto fail;
2479         break;
2480     case NAL_TRAIL_R:
2481     case NAL_TRAIL_N:
2482     case NAL_TSA_N:
2483     case NAL_TSA_R:
2484     case NAL_STSA_N:
2485     case NAL_STSA_R:
2486     case NAL_BLA_W_LP:
2487     case NAL_BLA_W_RADL:
2488     case NAL_BLA_N_LP:
2489     case NAL_IDR_W_RADL:
2490     case NAL_IDR_N_LP:
2491     case NAL_CRA_NUT:
2492     case NAL_RADL_N:
2493     case NAL_RADL_R:
2494     case NAL_RASL_N:
2495     case NAL_RASL_R:
2496         ret = hls_slice_header(s);
2497         if (ret < 0)
2498             return ret;
2499
2500         if (s->max_ra == INT_MAX) {
2501             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2502                 s->max_ra = s->poc;
2503             } else {
2504                 if (IS_IDR(s))
2505                     s->max_ra = INT_MIN;
2506             }
2507         }
2508
2509         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2510             s->poc <= s->max_ra) {
2511             s->is_decoded = 0;
2512             break;
2513         } else {
2514             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2515                 s->max_ra = INT_MIN;
2516         }
2517
2518         if (s->sh.first_slice_in_pic_flag) {
2519             ret = hevc_frame_start(s);
2520             if (ret < 0)
2521                 return ret;
2522         } else if (!s->ref) {
2523             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2524             goto fail;
2525         }
2526
2527         if (s->nal_unit_type != s->first_nal_type) {
2528             av_log(s->avctx, AV_LOG_ERROR,
2529                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2530                    s->first_nal_type, s->nal_unit_type);
2531             return AVERROR_INVALIDDATA;
2532         }
2533
2534         if (!s->sh.dependent_slice_segment_flag &&
2535             s->sh.slice_type != I_SLICE) {
2536             ret = ff_hevc_slice_rpl(s);
2537             if (ret < 0) {
2538                 av_log(s->avctx, AV_LOG_WARNING,
2539                        "Error constructing the reference lists for the current slice.\n");
2540                 goto fail;
2541             }
2542         }
2543
2544         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2545             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2546             if (ret < 0)
2547                 goto fail;
2548         }
2549
2550         if (s->avctx->hwaccel) {
2551             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2552             if (ret < 0)
2553                 goto fail;
2554         } else {
2555             ctb_addr_ts = hls_slice_data(s);
2556             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2557                 s->is_decoded = 1;
2558                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2559                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2560                     s->ps.sps->sao_enabled)
2561                     restore_tqb_pixels(s);
2562             }
2563
2564             if (ctb_addr_ts < 0) {
2565                 ret = ctb_addr_ts;
2566                 goto fail;
2567             }
2568         }
2569         break;
2570     case NAL_EOS_NUT:
2571     case NAL_EOB_NUT:
2572         s->seq_decode = (s->seq_decode + 1) & 0xff;
2573         s->max_ra     = INT_MAX;
2574         break;
2575     case NAL_AUD:
2576     case NAL_FD_NUT:
2577         break;
2578     default:
2579         av_log(s->avctx, AV_LOG_INFO,
2580                "Skipping NAL unit %d\n", s->nal_unit_type);
2581     }
2582
2583     return 0;
2584 fail:
2585     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2586         return ret;
2587     return 0;
2588 }
2589
2590 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2591 {
2592     int i, ret = 0;
2593
2594     s->ref = NULL;
2595     s->eos = 0;
2596
2597     /* split the input packet into NAL units, so we know the upper bound on the
2598      * number of slices in the frame */
2599     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2600                                 s->nal_length_size, s->avctx->codec_id);
2601     if (ret < 0) {
2602         av_log(s->avctx, AV_LOG_ERROR,
2603                "Error splitting the input into NAL units.\n");
2604         return ret;
2605     }
2606
2607     for (i = 0; i < s->pkt.nb_nals; i++) {
2608         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2609             s->pkt.nals[i].type == NAL_EOS_NUT)
2610             s->eos = 1;
2611     }
2612
2613     /* decode the NAL units */
2614     for (i = 0; i < s->pkt.nb_nals; i++) {
2615         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2616         if (ret < 0) {
2617             av_log(s->avctx, AV_LOG_WARNING,
2618                    "Error parsing NAL unit #%d.\n", i);
2619             goto fail;
2620         }
2621     }
2622
2623 fail:
2624     if (s->ref)
2625         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2626
2627     return ret;
2628 }
2629
2630 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2631 {
2632     int i;
2633     for (i = 0; i < 16; i++)
2634         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2635 }
2636
2637 static int verify_md5(HEVCContext *s, AVFrame *frame)
2638 {
2639     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2640     int pixel_shift;
2641     int i, j;
2642
2643     if (!desc)
2644         return AVERROR(EINVAL);
2645
2646     pixel_shift = desc->comp[0].depth > 8;
2647
2648     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2649            s->poc);
2650
2651     /* the checksums are LE, so we have to byteswap for >8bpp formats
2652      * on BE arches */
2653 #if HAVE_BIGENDIAN
2654     if (pixel_shift && !s->checksum_buf) {
2655         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2656                        FFMAX3(frame->linesize[0], frame->linesize[1],
2657                               frame->linesize[2]));
2658         if (!s->checksum_buf)
2659             return AVERROR(ENOMEM);
2660     }
2661 #endif
2662
2663     for (i = 0; frame->data[i]; i++) {
2664         int width  = s->avctx->coded_width;
2665         int height = s->avctx->coded_height;
2666         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2667         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2668         uint8_t md5[16];
2669
2670         av_md5_init(s->md5_ctx);
2671         for (j = 0; j < h; j++) {
2672             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2673 #if HAVE_BIGENDIAN
2674             if (pixel_shift) {
2675                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2676                                     (const uint16_t *) src, w);
2677                 src = s->checksum_buf;
2678             }
2679 #endif
2680             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2681         }
2682         av_md5_final(s->md5_ctx, md5);
2683
2684         if (!memcmp(md5, s->md5[i], 16)) {
2685             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2686             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2687             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2688         } else {
2689             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2690             print_md5(s->avctx, AV_LOG_ERROR, md5);
2691             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2692             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2693             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2694             return AVERROR_INVALIDDATA;
2695         }
2696     }
2697
2698     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2699
2700     return 0;
2701 }
2702
2703 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2704                              AVPacket *avpkt)
2705 {
2706     int ret;
2707     HEVCContext *s = avctx->priv_data;
2708
2709     if (!avpkt->size) {
2710         ret = ff_hevc_output_frame(s, data, 1);
2711         if (ret < 0)
2712             return ret;
2713
2714         *got_output = ret;
2715         return 0;
2716     }
2717
2718     s->ref = NULL;
2719     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2720     if (ret < 0)
2721         return ret;
2722
2723     if (avctx->hwaccel) {
2724         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2725             av_log(avctx, AV_LOG_ERROR,
2726                    "hardware accelerator failed to decode picture\n");
2727     } else {
2728         /* verify the SEI checksum */
2729         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2730             s->is_md5) {
2731             ret = verify_md5(s, s->ref->frame);
2732             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2733                 ff_hevc_unref_frame(s, s->ref, ~0);
2734                 return ret;
2735             }
2736         }
2737     }
2738     s->is_md5 = 0;
2739
2740     if (s->is_decoded) {
2741         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2742         s->is_decoded = 0;
2743     }
2744
2745     if (s->output_frame->buf[0]) {
2746         av_frame_move_ref(data, s->output_frame);
2747         *got_output = 1;
2748     }
2749
2750     return avpkt->size;
2751 }
2752
2753 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2754 {
2755     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2756     if (ret < 0)
2757         return ret;
2758
2759     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2760     if (!dst->tab_mvf_buf)
2761         goto fail;
2762     dst->tab_mvf = src->tab_mvf;
2763
2764     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2765     if (!dst->rpl_tab_buf)
2766         goto fail;
2767     dst->rpl_tab = src->rpl_tab;
2768
2769     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2770     if (!dst->rpl_buf)
2771         goto fail;
2772
2773     dst->poc        = src->poc;
2774     dst->ctb_count  = src->ctb_count;
2775     dst->window     = src->window;
2776     dst->flags      = src->flags;
2777     dst->sequence   = src->sequence;
2778
2779     if (src->hwaccel_picture_private) {
2780         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2781         if (!dst->hwaccel_priv_buf)
2782             goto fail;
2783         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2784     }
2785
2786     return 0;
2787 fail:
2788     ff_hevc_unref_frame(s, dst, ~0);
2789     return AVERROR(ENOMEM);
2790 }
2791
2792 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2793 {
2794     HEVCContext       *s = avctx->priv_data;
2795     int i;
2796
2797     pic_arrays_free(s);
2798
2799     av_freep(&s->md5_ctx);
2800
2801     av_frame_free(&s->tmp_frame);
2802     av_frame_free(&s->output_frame);
2803
2804     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2805         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2806         av_frame_free(&s->DPB[i].frame);
2807     }
2808
2809     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2810         av_buffer_unref(&s->ps.vps_list[i]);
2811     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2812         av_buffer_unref(&s->ps.sps_list[i]);
2813     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2814         av_buffer_unref(&s->ps.pps_list[i]);
2815
2816     ff_h2645_packet_uninit(&s->pkt);
2817
2818     return 0;
2819 }
2820
2821 static av_cold int hevc_init_context(AVCodecContext *avctx)
2822 {
2823     HEVCContext *s = avctx->priv_data;
2824     int i;
2825
2826     s->avctx = avctx;
2827
2828     s->tmp_frame = av_frame_alloc();
2829     if (!s->tmp_frame)
2830         goto fail;
2831
2832     s->output_frame = av_frame_alloc();
2833     if (!s->output_frame)
2834         goto fail;
2835
2836     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2837         s->DPB[i].frame = av_frame_alloc();
2838         if (!s->DPB[i].frame)
2839             goto fail;
2840         s->DPB[i].tf.f = s->DPB[i].frame;
2841     }
2842
2843     s->max_ra = INT_MAX;
2844
2845     s->md5_ctx = av_md5_alloc();
2846     if (!s->md5_ctx)
2847         goto fail;
2848
2849     ff_bswapdsp_init(&s->bdsp);
2850
2851     s->context_initialized = 1;
2852
2853     return 0;
2854
2855 fail:
2856     hevc_decode_free(avctx);
2857     return AVERROR(ENOMEM);
2858 }
2859
2860 static int hevc_update_thread_context(AVCodecContext *dst,
2861                                       const AVCodecContext *src)
2862 {
2863     HEVCContext *s  = dst->priv_data;
2864     HEVCContext *s0 = src->priv_data;
2865     int i, ret;
2866
2867     if (!s->context_initialized) {
2868         ret = hevc_init_context(dst);
2869         if (ret < 0)
2870             return ret;
2871     }
2872
2873     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2874         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2875         if (s0->DPB[i].frame->buf[0]) {
2876             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2877             if (ret < 0)
2878                 return ret;
2879         }
2880     }
2881
2882     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2883         av_buffer_unref(&s->ps.vps_list[i]);
2884         if (s0->ps.vps_list[i]) {
2885             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2886             if (!s->ps.vps_list[i])
2887                 return AVERROR(ENOMEM);
2888         }
2889     }
2890
2891     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2892         av_buffer_unref(&s->ps.sps_list[i]);
2893         if (s0->ps.sps_list[i]) {
2894             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2895             if (!s->ps.sps_list[i])
2896                 return AVERROR(ENOMEM);
2897         }
2898     }
2899
2900     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2901         av_buffer_unref(&s->ps.pps_list[i]);
2902         if (s0->ps.pps_list[i]) {
2903             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2904             if (!s->ps.pps_list[i])
2905                 return AVERROR(ENOMEM);
2906         }
2907     }
2908
2909     if (s->ps.sps != s0->ps.sps)
2910         ret = set_sps(s, s0->ps.sps);
2911
2912     s->seq_decode = s0->seq_decode;
2913     s->seq_output = s0->seq_output;
2914     s->pocTid0    = s0->pocTid0;
2915     s->max_ra     = s0->max_ra;
2916
2917     s->is_nalff        = s0->is_nalff;
2918     s->nal_length_size = s0->nal_length_size;
2919
2920     if (s0->eos) {
2921         s->seq_decode = (s->seq_decode + 1) & 0xff;
2922         s->max_ra = INT_MAX;
2923     }
2924
2925     return 0;
2926 }
2927
2928 static int hevc_decode_extradata(HEVCContext *s)
2929 {
2930     AVCodecContext *avctx = s->avctx;
2931     GetByteContext gb;
2932     int ret, i;
2933
2934     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
2935
2936     if (avctx->extradata_size > 3 &&
2937         (avctx->extradata[0] || avctx->extradata[1] ||
2938          avctx->extradata[2] > 1)) {
2939         /* It seems the extradata is encoded as hvcC format.
2940          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2941          * is finalized. When finalized, configurationVersion will be 1 and we
2942          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2943         int i, j, num_arrays, nal_len_size;
2944
2945         s->is_nalff = 1;
2946
2947         bytestream2_skip(&gb, 21);
2948         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2949         num_arrays   = bytestream2_get_byte(&gb);
2950
2951         /* nal units in the hvcC always have length coded with 2 bytes,
2952          * so put a fake nal_length_size = 2 while parsing them */
2953         s->nal_length_size = 2;
2954
2955         /* Decode nal units from hvcC. */
2956         for (i = 0; i < num_arrays; i++) {
2957             int type = bytestream2_get_byte(&gb) & 0x3f;
2958             int cnt  = bytestream2_get_be16(&gb);
2959
2960             for (j = 0; j < cnt; j++) {
2961                 // +2 for the nal size field
2962                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2963                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2964                     av_log(s->avctx, AV_LOG_ERROR,
2965                            "Invalid NAL unit size in extradata.\n");
2966                     return AVERROR_INVALIDDATA;
2967                 }
2968
2969                 ret = decode_nal_units(s, gb.buffer, nalsize);
2970                 if (ret < 0) {
2971                     av_log(avctx, AV_LOG_ERROR,
2972                            "Decoding nal unit %d %d from hvcC failed\n",
2973                            type, i);
2974                     return ret;
2975                 }
2976                 bytestream2_skip(&gb, nalsize);
2977             }
2978         }
2979
2980         /* Now store right nal length size, that will be used to parse
2981          * all other nals */
2982         s->nal_length_size = nal_len_size;
2983     } else {
2984         s->is_nalff = 0;
2985         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
2986         if (ret < 0)
2987             return ret;
2988     }
2989
2990     /* export stream parameters from the first SPS */
2991     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2992         if (s->ps.sps_list[i]) {
2993             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2994             export_stream_params(s->avctx, &s->ps, sps);
2995             break;
2996         }
2997     }
2998
2999     return 0;
3000 }
3001
3002 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3003 {
3004     HEVCContext *s = avctx->priv_data;
3005     int ret;
3006
3007     avctx->internal->allocate_progress = 1;
3008
3009     ret = hevc_init_context(avctx);
3010     if (ret < 0)
3011         return ret;
3012
3013     if (avctx->extradata_size > 0 && avctx->extradata) {
3014         ret = hevc_decode_extradata(s);
3015         if (ret < 0) {
3016             hevc_decode_free(avctx);
3017             return ret;
3018         }
3019     }
3020
3021     return 0;
3022 }
3023
3024 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3025 {
3026     HEVCContext *s = avctx->priv_data;
3027     int ret;
3028
3029     memset(s, 0, sizeof(*s));
3030
3031     ret = hevc_init_context(avctx);
3032     if (ret < 0)
3033         return ret;
3034
3035     return 0;
3036 }
3037
3038 static void hevc_decode_flush(AVCodecContext *avctx)
3039 {
3040     HEVCContext *s = avctx->priv_data;
3041     ff_hevc_flush_dpb(s);
3042     s->max_ra = INT_MAX;
3043 }
3044
3045 #define OFFSET(x) offsetof(HEVCContext, x)
3046 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3047
3048 static const AVOption options[] = {
3049     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3050         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3051     { NULL },
3052 };
3053
3054 static const AVClass hevc_decoder_class = {
3055     .class_name = "HEVC decoder",
3056     .item_name  = av_default_item_name,
3057     .option     = options,
3058     .version    = LIBAVUTIL_VERSION_INT,
3059 };
3060
3061 AVCodec ff_hevc_decoder = {
3062     .name                  = "hevc",
3063     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3064     .type                  = AVMEDIA_TYPE_VIDEO,
3065     .id                    = AV_CODEC_ID_HEVC,
3066     .priv_data_size        = sizeof(HEVCContext),
3067     .priv_class            = &hevc_decoder_class,
3068     .init                  = hevc_decode_init,
3069     .close                 = hevc_decode_free,
3070     .decode                = hevc_decode_frame,
3071     .flush                 = hevc_decode_flush,
3072     .update_thread_context = hevc_update_thread_context,
3073     .init_thread_copy      = hevc_init_thread_copy,
3074     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3075                              AV_CODEC_CAP_FRAME_THREADS,
3076     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3077 };