]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
utvideodec: Convert to the new bitstream reader
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40 #include "hevc_data.h"
41 #include "hevcdec.h"
42 #include "profiles.h"
43
44 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
45 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
46 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
47
48 static const uint8_t scan_1x1[1] = { 0 };
49
50 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
51
52 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
53
54 static const uint8_t horiz_scan4x4_x[16] = {
55     0, 1, 2, 3,
56     0, 1, 2, 3,
57     0, 1, 2, 3,
58     0, 1, 2, 3,
59 };
60
61 static const uint8_t horiz_scan4x4_y[16] = {
62     0, 0, 0, 0,
63     1, 1, 1, 1,
64     2, 2, 2, 2,
65     3, 3, 3, 3,
66 };
67
68 static const uint8_t horiz_scan8x8_inv[8][8] = {
69     {  0,  1,  2,  3, 16, 17, 18, 19, },
70     {  4,  5,  6,  7, 20, 21, 22, 23, },
71     {  8,  9, 10, 11, 24, 25, 26, 27, },
72     { 12, 13, 14, 15, 28, 29, 30, 31, },
73     { 32, 33, 34, 35, 48, 49, 50, 51, },
74     { 36, 37, 38, 39, 52, 53, 54, 55, },
75     { 40, 41, 42, 43, 56, 57, 58, 59, },
76     { 44, 45, 46, 47, 60, 61, 62, 63, },
77 };
78
79 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
80
81 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
82
83 static const uint8_t diag_scan2x2_inv[2][2] = {
84     { 0, 2, },
85     { 1, 3, },
86 };
87
88 static const uint8_t diag_scan4x4_inv[4][4] = {
89     { 0,  2,  5,  9, },
90     { 1,  4,  8, 12, },
91     { 3,  7, 11, 14, },
92     { 6, 10, 13, 15, },
93 };
94
95 static const uint8_t diag_scan8x8_inv[8][8] = {
96     {  0,  2,  5,  9, 14, 20, 27, 35, },
97     {  1,  4,  8, 13, 19, 26, 34, 42, },
98     {  3,  7, 12, 18, 25, 33, 41, 48, },
99     {  6, 11, 17, 24, 32, 40, 47, 53, },
100     { 10, 16, 23, 31, 39, 46, 52, 57, },
101     { 15, 22, 30, 38, 45, 51, 56, 60, },
102     { 21, 29, 37, 44, 50, 55, 59, 62, },
103     { 28, 36, 43, 49, 54, 58, 61, 63, },
104 };
105
106 /**
107  * NOTE: Each function hls_foo correspond to the function foo in the
108  * specification (HLS stands for High Level Syntax).
109  */
110
111 /**
112  * Section 5.7
113  */
114
115 /* free everything allocated  by pic_arrays_init() */
116 static void pic_arrays_free(HEVCContext *s)
117 {
118     av_freep(&s->sao);
119     av_freep(&s->deblock);
120
121     av_freep(&s->skip_flag);
122     av_freep(&s->tab_ct_depth);
123
124     av_freep(&s->tab_ipm);
125     av_freep(&s->cbf_luma);
126     av_freep(&s->is_pcm);
127
128     av_freep(&s->qp_y_tab);
129     av_freep(&s->tab_slice_address);
130     av_freep(&s->filter_slice_edges);
131
132     av_freep(&s->horizontal_bs);
133     av_freep(&s->vertical_bs);
134
135     av_buffer_pool_uninit(&s->tab_mvf_pool);
136     av_buffer_pool_uninit(&s->rpl_tab_pool);
137 }
138
139 /* allocate arrays that depend on frame dimensions */
140 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
141 {
142     int log2_min_cb_size = sps->log2_min_cb_size;
143     int width            = sps->width;
144     int height           = sps->height;
145     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
146                            ((height >> log2_min_cb_size) + 1);
147     int ctb_count        = sps->ctb_width * sps->ctb_height;
148     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
149
150     s->bs_width  = width  >> 3;
151     s->bs_height = height >> 3;
152
153     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
154     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
155     if (!s->sao || !s->deblock)
156         goto fail;
157
158     s->skip_flag    = av_malloc(pic_size_in_ctb);
159     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
160     if (!s->skip_flag || !s->tab_ct_depth)
161         goto fail;
162
163     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
164     s->tab_ipm  = av_mallocz(min_pu_size);
165     s->is_pcm   = av_malloc(min_pu_size);
166     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
167         goto fail;
168
169     s->filter_slice_edges = av_malloc(ctb_count);
170     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
171                                       sizeof(*s->tab_slice_address));
172     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
173                                       sizeof(*s->qp_y_tab));
174     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
175         goto fail;
176
177     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
178     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
179     if (!s->horizontal_bs || !s->vertical_bs)
180         goto fail;
181
182     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
183                                           av_buffer_alloc);
184     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
185                                           av_buffer_allocz);
186     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
187         goto fail;
188
189     return 0;
190
191 fail:
192     pic_arrays_free(s);
193     return AVERROR(ENOMEM);
194 }
195
196 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
197 {
198     int i = 0;
199     int j = 0;
200     uint8_t luma_weight_l0_flag[16];
201     uint8_t chroma_weight_l0_flag[16];
202     uint8_t luma_weight_l1_flag[16];
203     uint8_t chroma_weight_l1_flag[16];
204
205     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
206     if (s->ps.sps->chroma_format_idc != 0) {
207         int delta = get_se_golomb(gb);
208         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
209     }
210
211     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
212         luma_weight_l0_flag[i] = get_bits1(gb);
213         if (!luma_weight_l0_flag[i]) {
214             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
215             s->sh.luma_offset_l0[i] = 0;
216         }
217     }
218     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
219         for (i = 0; i < s->sh.nb_refs[L0]; i++)
220             chroma_weight_l0_flag[i] = get_bits1(gb);
221     } else {
222         for (i = 0; i < s->sh.nb_refs[L0]; i++)
223             chroma_weight_l0_flag[i] = 0;
224     }
225     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
226         if (luma_weight_l0_flag[i]) {
227             int delta_luma_weight_l0 = get_se_golomb(gb);
228             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
229             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
230         }
231         if (chroma_weight_l0_flag[i]) {
232             for (j = 0; j < 2; j++) {
233                 int delta_chroma_weight_l0 = get_se_golomb(gb);
234                 int delta_chroma_offset_l0 = get_se_golomb(gb);
235                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
236                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
237                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
238             }
239         } else {
240             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
241             s->sh.chroma_offset_l0[i][0] = 0;
242             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
243             s->sh.chroma_offset_l0[i][1] = 0;
244         }
245     }
246     if (s->sh.slice_type == HEVC_SLICE_B) {
247         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
248             luma_weight_l1_flag[i] = get_bits1(gb);
249             if (!luma_weight_l1_flag[i]) {
250                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
251                 s->sh.luma_offset_l1[i] = 0;
252             }
253         }
254         if (s->ps.sps->chroma_format_idc != 0) {
255             for (i = 0; i < s->sh.nb_refs[L1]; i++)
256                 chroma_weight_l1_flag[i] = get_bits1(gb);
257         } else {
258             for (i = 0; i < s->sh.nb_refs[L1]; i++)
259                 chroma_weight_l1_flag[i] = 0;
260         }
261         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
262             if (luma_weight_l1_flag[i]) {
263                 int delta_luma_weight_l1 = get_se_golomb(gb);
264                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
265                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
266             }
267             if (chroma_weight_l1_flag[i]) {
268                 for (j = 0; j < 2; j++) {
269                     int delta_chroma_weight_l1 = get_se_golomb(gb);
270                     int delta_chroma_offset_l1 = get_se_golomb(gb);
271                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
272                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
273                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
274                 }
275             } else {
276                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
277                 s->sh.chroma_offset_l1[i][0] = 0;
278                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
279                 s->sh.chroma_offset_l1[i][1] = 0;
280             }
281         }
282     }
283 }
284
285 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
286 {
287     const HEVCSPS *sps = s->ps.sps;
288     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
289     int prev_delta_msb = 0;
290     unsigned int nb_sps = 0, nb_sh;
291     int i;
292
293     rps->nb_refs = 0;
294     if (!sps->long_term_ref_pics_present_flag)
295         return 0;
296
297     if (sps->num_long_term_ref_pics_sps > 0)
298         nb_sps = get_ue_golomb_long(gb);
299     nb_sh = get_ue_golomb_long(gb);
300
301     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
302         return AVERROR_INVALIDDATA;
303
304     rps->nb_refs = nb_sh + nb_sps;
305
306     for (i = 0; i < rps->nb_refs; i++) {
307         uint8_t delta_poc_msb_present;
308
309         if (i < nb_sps) {
310             uint8_t lt_idx_sps = 0;
311
312             if (sps->num_long_term_ref_pics_sps > 1)
313                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
314
315             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
316             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
317         } else {
318             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
319             rps->used[i] = get_bits1(gb);
320         }
321
322         delta_poc_msb_present = get_bits1(gb);
323         if (delta_poc_msb_present) {
324             int delta = get_ue_golomb_long(gb);
325
326             if (i && i != nb_sps)
327                 delta += prev_delta_msb;
328
329             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
330             prev_delta_msb = delta;
331         }
332     }
333
334     return 0;
335 }
336
337 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
338                                  const HEVCSPS *sps)
339 {
340     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
341     unsigned int num = 0, den = 0;
342
343     avctx->pix_fmt             = sps->pix_fmt;
344     avctx->coded_width         = sps->width;
345     avctx->coded_height        = sps->height;
346     avctx->width               = sps->output_width;
347     avctx->height              = sps->output_height;
348     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
349     avctx->profile             = sps->ptl.general_ptl.profile_idc;
350     avctx->level               = sps->ptl.general_ptl.level_idc;
351
352     ff_set_sar(avctx, sps->vui.sar);
353
354     if (sps->vui.video_signal_type_present_flag)
355         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
356                                                             : AVCOL_RANGE_MPEG;
357     else
358         avctx->color_range = AVCOL_RANGE_MPEG;
359
360     if (sps->vui.colour_description_present_flag) {
361         avctx->color_primaries = sps->vui.colour_primaries;
362         avctx->color_trc       = sps->vui.transfer_characteristic;
363         avctx->colorspace      = sps->vui.matrix_coeffs;
364     } else {
365         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
366         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
367         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
368     }
369
370     if (vps->vps_timing_info_present_flag) {
371         num = vps->vps_num_units_in_tick;
372         den = vps->vps_time_scale;
373     } else if (sps->vui.vui_timing_info_present_flag) {
374         num = sps->vui.vui_num_units_in_tick;
375         den = sps->vui.vui_time_scale;
376     }
377
378     if (num != 0 && den != 0)
379         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
380                   num, den, 1 << 30);
381 }
382
383 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
384 {
385     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
386     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
387     int ret;
388
389     pic_arrays_free(s);
390     s->ps.sps = NULL;
391     s->ps.vps = NULL;
392
393     if (!sps)
394         return 0;
395
396     ret = pic_arrays_init(s, sps);
397     if (ret < 0)
398         goto fail;
399
400     export_stream_params(s->avctx, &s->ps, sps);
401
402     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P ||
403         sps->pix_fmt == AV_PIX_FMT_YUV420P10) {
404 #if CONFIG_HEVC_DXVA2_HWACCEL
405         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
406 #endif
407     }
408     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
409 #if CONFIG_HEVC_D3D11VA_HWACCEL
410         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
411 #endif
412 #if CONFIG_HEVC_VDPAU_HWACCEL
413         *fmt++ = AV_PIX_FMT_VDPAU;
414 #endif
415     }
416
417     *fmt++ = sps->pix_fmt;
418     *fmt = AV_PIX_FMT_NONE;
419
420     ret = ff_get_format(s->avctx, pix_fmts);
421     if (ret < 0)
422         goto fail;
423     s->avctx->pix_fmt = ret;
424
425     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
426     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
427     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
428
429     if (sps->sao_enabled && !s->avctx->hwaccel) {
430         av_frame_unref(s->tmp_frame);
431         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
432         if (ret < 0)
433             goto fail;
434         s->frame = s->tmp_frame;
435     }
436
437     s->ps.sps = sps;
438     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
439
440     return 0;
441
442 fail:
443     pic_arrays_free(s);
444     s->ps.sps = NULL;
445     return ret;
446 }
447
448 static int hls_slice_header(HEVCContext *s)
449 {
450     GetBitContext *gb = &s->HEVClc.gb;
451     SliceHeader *sh   = &s->sh;
452     int i, ret;
453
454     // Coded parameters
455     sh->first_slice_in_pic_flag = get_bits1(gb);
456     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
457         s->seq_decode = (s->seq_decode + 1) & 0xff;
458         s->max_ra     = INT_MAX;
459         if (IS_IDR(s))
460             ff_hevc_clear_refs(s);
461     }
462     if (IS_IRAP(s))
463         sh->no_output_of_prior_pics_flag = get_bits1(gb);
464
465     sh->pps_id = get_ue_golomb_long(gb);
466     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
467         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
468         return AVERROR_INVALIDDATA;
469     }
470     if (!sh->first_slice_in_pic_flag &&
471         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
472         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
473         return AVERROR_INVALIDDATA;
474     }
475     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
476
477     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
478         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
479
480         ff_hevc_clear_refs(s);
481         ret = set_sps(s, s->ps.sps);
482         if (ret < 0)
483             return ret;
484
485         s->seq_decode = (s->seq_decode + 1) & 0xff;
486         s->max_ra     = INT_MAX;
487     }
488
489     sh->dependent_slice_segment_flag = 0;
490     if (!sh->first_slice_in_pic_flag) {
491         int slice_address_length;
492
493         if (s->ps.pps->dependent_slice_segments_enabled_flag)
494             sh->dependent_slice_segment_flag = get_bits1(gb);
495
496         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
497                                             s->ps.sps->ctb_height);
498         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
499         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
500             av_log(s->avctx, AV_LOG_ERROR,
501                    "Invalid slice segment address: %u.\n",
502                    sh->slice_segment_addr);
503             return AVERROR_INVALIDDATA;
504         }
505
506         if (!sh->dependent_slice_segment_flag) {
507             sh->slice_addr = sh->slice_segment_addr;
508             s->slice_idx++;
509         }
510     } else {
511         sh->slice_segment_addr = sh->slice_addr = 0;
512         s->slice_idx           = 0;
513         s->slice_initialized   = 0;
514     }
515
516     if (!sh->dependent_slice_segment_flag) {
517         s->slice_initialized = 0;
518
519         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
520             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
521
522         sh->slice_type = get_ue_golomb_long(gb);
523         if (!(sh->slice_type == HEVC_SLICE_I ||
524               sh->slice_type == HEVC_SLICE_P ||
525               sh->slice_type == HEVC_SLICE_B)) {
526             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
527                    sh->slice_type);
528             return AVERROR_INVALIDDATA;
529         }
530         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
531             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
532             return AVERROR_INVALIDDATA;
533         }
534
535         // when flag is not present, picture is inferred to be output
536         sh->pic_output_flag = 1;
537         if (s->ps.pps->output_flag_present_flag)
538             sh->pic_output_flag = get_bits1(gb);
539
540         if (s->ps.sps->separate_colour_plane_flag)
541             sh->colour_plane_id = get_bits(gb, 2);
542
543         if (!IS_IDR(s)) {
544             int poc, pos;
545
546             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
547             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
548             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
549                 av_log(s->avctx, AV_LOG_WARNING,
550                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
551                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
552                     return AVERROR_INVALIDDATA;
553                 poc = s->poc;
554             }
555             s->poc = poc;
556
557             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
558             pos = get_bits_left(gb);
559             if (!sh->short_term_ref_pic_set_sps_flag) {
560                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
561                 if (ret < 0)
562                     return ret;
563
564                 sh->short_term_rps = &sh->slice_rps;
565             } else {
566                 int numbits, rps_idx;
567
568                 if (!s->ps.sps->nb_st_rps) {
569                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
570                     return AVERROR_INVALIDDATA;
571                 }
572
573                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
574                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
575                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
576             }
577             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
578
579             pos = get_bits_left(gb);
580             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
581             if (ret < 0) {
582                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
583                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
584                     return AVERROR_INVALIDDATA;
585             }
586             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
587
588             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
589                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
590             else
591                 sh->slice_temporal_mvp_enabled_flag = 0;
592         } else {
593             s->sh.short_term_rps = NULL;
594             s->poc               = 0;
595         }
596
597         /* 8.3.1 */
598         if (s->temporal_id == 0 &&
599             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
600             s->nal_unit_type != HEVC_NAL_TSA_N   &&
601             s->nal_unit_type != HEVC_NAL_STSA_N  &&
602             s->nal_unit_type != HEVC_NAL_RADL_N  &&
603             s->nal_unit_type != HEVC_NAL_RADL_R  &&
604             s->nal_unit_type != HEVC_NAL_RASL_N  &&
605             s->nal_unit_type != HEVC_NAL_RASL_R)
606             s->pocTid0 = s->poc;
607
608         if (s->ps.sps->sao_enabled) {
609             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
610             sh->slice_sample_adaptive_offset_flag[1] =
611             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
612         } else {
613             sh->slice_sample_adaptive_offset_flag[0] = 0;
614             sh->slice_sample_adaptive_offset_flag[1] = 0;
615             sh->slice_sample_adaptive_offset_flag[2] = 0;
616         }
617
618         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
619         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
620             int nb_refs;
621
622             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
623             if (sh->slice_type == HEVC_SLICE_B)
624                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
625
626             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
627                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
628                 if (sh->slice_type == HEVC_SLICE_B)
629                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
630             }
631             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
632                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
633                        sh->nb_refs[L0], sh->nb_refs[L1]);
634                 return AVERROR_INVALIDDATA;
635             }
636
637             sh->rpl_modification_flag[0] = 0;
638             sh->rpl_modification_flag[1] = 0;
639             nb_refs = ff_hevc_frame_nb_refs(s);
640             if (!nb_refs) {
641                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
642                 return AVERROR_INVALIDDATA;
643             }
644
645             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
646                 sh->rpl_modification_flag[0] = get_bits1(gb);
647                 if (sh->rpl_modification_flag[0]) {
648                     for (i = 0; i < sh->nb_refs[L0]; i++)
649                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
650                 }
651
652                 if (sh->slice_type == HEVC_SLICE_B) {
653                     sh->rpl_modification_flag[1] = get_bits1(gb);
654                     if (sh->rpl_modification_flag[1] == 1)
655                         for (i = 0; i < sh->nb_refs[L1]; i++)
656                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658             }
659
660             if (sh->slice_type == HEVC_SLICE_B)
661                 sh->mvd_l1_zero_flag = get_bits1(gb);
662
663             if (s->ps.pps->cabac_init_present_flag)
664                 sh->cabac_init_flag = get_bits1(gb);
665             else
666                 sh->cabac_init_flag = 0;
667
668             sh->collocated_ref_idx = 0;
669             if (sh->slice_temporal_mvp_enabled_flag) {
670                 sh->collocated_list = L0;
671                 if (sh->slice_type == HEVC_SLICE_B)
672                     sh->collocated_list = !get_bits1(gb);
673
674                 if (sh->nb_refs[sh->collocated_list] > 1) {
675                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
676                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
677                         av_log(s->avctx, AV_LOG_ERROR,
678                                "Invalid collocated_ref_idx: %d.\n",
679                                sh->collocated_ref_idx);
680                         return AVERROR_INVALIDDATA;
681                     }
682                 }
683             }
684
685             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
686                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
687                 pred_weight_table(s, gb);
688             }
689
690             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
691             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
692                 av_log(s->avctx, AV_LOG_ERROR,
693                        "Invalid number of merging MVP candidates: %d.\n",
694                        sh->max_num_merge_cand);
695                 return AVERROR_INVALIDDATA;
696             }
697         }
698
699         sh->slice_qp_delta = get_se_golomb(gb);
700
701         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
702             sh->slice_cb_qp_offset = get_se_golomb(gb);
703             sh->slice_cr_qp_offset = get_se_golomb(gb);
704         } else {
705             sh->slice_cb_qp_offset = 0;
706             sh->slice_cr_qp_offset = 0;
707         }
708
709         if (s->ps.pps->deblocking_filter_control_present_flag) {
710             int deblocking_filter_override_flag = 0;
711
712             if (s->ps.pps->deblocking_filter_override_enabled_flag)
713                 deblocking_filter_override_flag = get_bits1(gb);
714
715             if (deblocking_filter_override_flag) {
716                 sh->disable_deblocking_filter_flag = get_bits1(gb);
717                 if (!sh->disable_deblocking_filter_flag) {
718                     sh->beta_offset = get_se_golomb(gb) * 2;
719                     sh->tc_offset   = get_se_golomb(gb) * 2;
720                 }
721             } else {
722                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
723                 sh->beta_offset                    = s->ps.pps->beta_offset;
724                 sh->tc_offset                      = s->ps.pps->tc_offset;
725             }
726         } else {
727             sh->disable_deblocking_filter_flag = 0;
728             sh->beta_offset                    = 0;
729             sh->tc_offset                      = 0;
730         }
731
732         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
733             (sh->slice_sample_adaptive_offset_flag[0] ||
734              sh->slice_sample_adaptive_offset_flag[1] ||
735              !sh->disable_deblocking_filter_flag)) {
736             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
737         } else {
738             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
739         }
740     } else if (!s->slice_initialized) {
741         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
742         return AVERROR_INVALIDDATA;
743     }
744
745     sh->num_entry_point_offsets = 0;
746     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
747         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
748         if (sh->num_entry_point_offsets > 0) {
749             int offset_len = get_ue_golomb_long(gb) + 1;
750
751             for (i = 0; i < sh->num_entry_point_offsets; i++)
752                 skip_bits(gb, offset_len);
753         }
754     }
755
756     if (s->ps.pps->slice_header_extension_present_flag) {
757         unsigned int length = get_ue_golomb_long(gb);
758         for (i = 0; i < length; i++)
759             skip_bits(gb, 8);  // slice_header_extension_data_byte
760     }
761
762     // Inferred parameters
763     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
764     if (sh->slice_qp > 51 ||
765         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
766         av_log(s->avctx, AV_LOG_ERROR,
767                "The slice_qp %d is outside the valid range "
768                "[%d, 51].\n",
769                sh->slice_qp,
770                -s->ps.sps->qp_bd_offset);
771         return AVERROR_INVALIDDATA;
772     }
773
774     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
775
776     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
777         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
778         return AVERROR_INVALIDDATA;
779     }
780
781     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
782
783     if (!s->ps.pps->cu_qp_delta_enabled_flag)
784         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
785                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
786
787     s->slice_initialized = 1;
788
789     return 0;
790 }
791
792 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
793
794 #define SET_SAO(elem, value)                            \
795 do {                                                    \
796     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
797         sao->elem = value;                              \
798     else if (sao_merge_left_flag)                       \
799         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
800     else if (sao_merge_up_flag)                         \
801         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
802     else                                                \
803         sao->elem = 0;                                  \
804 } while (0)
805
806 static void hls_sao_param(HEVCContext *s, int rx, int ry)
807 {
808     HEVCLocalContext *lc    = &s->HEVClc;
809     int sao_merge_left_flag = 0;
810     int sao_merge_up_flag   = 0;
811     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
812     SAOParams *sao          = &CTB(s->sao, rx, ry);
813     int c_idx, i;
814
815     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
816         s->sh.slice_sample_adaptive_offset_flag[1]) {
817         if (rx > 0) {
818             if (lc->ctb_left_flag)
819                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
820         }
821         if (ry > 0 && !sao_merge_left_flag) {
822             if (lc->ctb_up_flag)
823                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
824         }
825     }
826
827     for (c_idx = 0; c_idx < 3; c_idx++) {
828         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
829             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
830             continue;
831         }
832
833         if (c_idx == 2) {
834             sao->type_idx[2] = sao->type_idx[1];
835             sao->eo_class[2] = sao->eo_class[1];
836         } else {
837             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
838         }
839
840         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
841             continue;
842
843         for (i = 0; i < 4; i++)
844             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
845
846         if (sao->type_idx[c_idx] == SAO_BAND) {
847             for (i = 0; i < 4; i++) {
848                 if (sao->offset_abs[c_idx][i]) {
849                     SET_SAO(offset_sign[c_idx][i],
850                             ff_hevc_sao_offset_sign_decode(s));
851                 } else {
852                     sao->offset_sign[c_idx][i] = 0;
853                 }
854             }
855             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
856         } else if (c_idx != 2) {
857             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
858         }
859
860         // Inferred parameters
861         sao->offset_val[c_idx][0] = 0;
862         for (i = 0; i < 4; i++) {
863             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
864             if (sao->type_idx[c_idx] == SAO_EDGE) {
865                 if (i > 1)
866                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
867             } else if (sao->offset_sign[c_idx][i]) {
868                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
869             }
870         }
871     }
872 }
873
874 #undef SET_SAO
875 #undef CTB
876
877 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
878                                 int log2_trafo_size, enum ScanType scan_idx,
879                                 int c_idx)
880 {
881 #define GET_COORD(offset, n)                                    \
882     do {                                                        \
883         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
884         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
885     } while (0)
886     HEVCLocalContext *lc    = &s->HEVClc;
887     int transform_skip_flag = 0;
888
889     int last_significant_coeff_x, last_significant_coeff_y;
890     int last_scan_pos;
891     int n_end;
892     int num_coeff    = 0;
893     int greater1_ctx = 1;
894
895     int num_last_subset;
896     int x_cg_last_sig, y_cg_last_sig;
897
898     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
899
900     ptrdiff_t stride = s->frame->linesize[c_idx];
901     int hshift       = s->ps.sps->hshift[c_idx];
902     int vshift       = s->ps.sps->vshift[c_idx];
903     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
904                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
905     DECLARE_ALIGNED(32, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
906     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
907
908     int trafo_size = 1 << log2_trafo_size;
909     int i, qp, shift, add, scale, scale_m;
910     static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
911     const uint8_t *scale_matrix;
912     uint8_t dc_scale;
913
914     // Derive QP for dequant
915     if (!lc->cu.cu_transquant_bypass_flag) {
916         static const int qp_c[] = {
917             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
918         };
919
920         static const uint8_t rem6[51 + 2 * 6 + 1] = {
921             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
922             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
923             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
924         };
925
926         static const uint8_t div6[51 + 2 * 6 + 1] = {
927             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
928             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
929             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
930         };
931         int qp_y = lc->qp_y;
932
933         if (c_idx == 0) {
934             qp = qp_y + s->ps.sps->qp_bd_offset;
935         } else {
936             int qp_i, offset;
937
938             if (c_idx == 1)
939                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
940             else
941                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
942
943             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
944             if (qp_i < 30)
945                 qp = qp_i;
946             else if (qp_i > 43)
947                 qp = qp_i - 6;
948             else
949                 qp = qp_c[qp_i - 30];
950
951             qp += s->ps.sps->qp_bd_offset;
952         }
953
954         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
955         add      = 1 << (shift - 1);
956         scale    = level_scale[rem6[qp]] << (div6[qp]);
957         scale_m  = 16; // default when no custom scaling lists.
958         dc_scale = 16;
959
960         if (s->ps.sps->scaling_list_enable_flag) {
961             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
962                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
963             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
964
965             if (log2_trafo_size != 5)
966                 matrix_id = 3 * matrix_id + c_idx;
967
968             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
969             if (log2_trafo_size >= 4)
970                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
971         }
972     }
973
974     if (s->ps.pps->transform_skip_enabled_flag &&
975         !lc->cu.cu_transquant_bypass_flag   &&
976         log2_trafo_size == 2) {
977         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
978     }
979
980     last_significant_coeff_x =
981         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
982     last_significant_coeff_y =
983         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
984
985     if (last_significant_coeff_x > 3) {
986         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
987         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
988                                    (2 + (last_significant_coeff_x & 1)) +
989                                    suffix;
990     }
991
992     if (last_significant_coeff_y > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
994         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_y & 1)) +
996                                    suffix;
997     }
998
999     if (scan_idx == SCAN_VERT)
1000         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1001
1002     x_cg_last_sig = last_significant_coeff_x >> 2;
1003     y_cg_last_sig = last_significant_coeff_y >> 2;
1004
1005     switch (scan_idx) {
1006     case SCAN_DIAG: {
1007         int last_x_c = last_significant_coeff_x & 3;
1008         int last_y_c = last_significant_coeff_y & 3;
1009
1010         scan_x_off = ff_hevc_diag_scan4x4_x;
1011         scan_y_off = ff_hevc_diag_scan4x4_y;
1012         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1013         if (trafo_size == 4) {
1014             scan_x_cg = scan_1x1;
1015             scan_y_cg = scan_1x1;
1016         } else if (trafo_size == 8) {
1017             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1018             scan_x_cg  = diag_scan2x2_x;
1019             scan_y_cg  = diag_scan2x2_y;
1020         } else if (trafo_size == 16) {
1021             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1022             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1023             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1024         } else { // trafo_size == 32
1025             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1026             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1027             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1028         }
1029         break;
1030     }
1031     case SCAN_HORIZ:
1032         scan_x_cg  = horiz_scan2x2_x;
1033         scan_y_cg  = horiz_scan2x2_y;
1034         scan_x_off = horiz_scan4x4_x;
1035         scan_y_off = horiz_scan4x4_y;
1036         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1037         break;
1038     default: //SCAN_VERT
1039         scan_x_cg  = horiz_scan2x2_y;
1040         scan_y_cg  = horiz_scan2x2_x;
1041         scan_x_off = horiz_scan4x4_y;
1042         scan_y_off = horiz_scan4x4_x;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1044         break;
1045     }
1046     num_coeff++;
1047     num_last_subset = (num_coeff - 1) >> 4;
1048
1049     for (i = num_last_subset; i >= 0; i--) {
1050         int n, m;
1051         int x_cg, y_cg, x_c, y_c;
1052         int implicit_non_zero_coeff = 0;
1053         int64_t trans_coeff_level;
1054         int prev_sig = 0;
1055         int offset   = i << 4;
1056
1057         uint8_t significant_coeff_flag_idx[16];
1058         uint8_t nb_significant_coeff_flag = 0;
1059
1060         x_cg = scan_x_cg[i];
1061         y_cg = scan_y_cg[i];
1062
1063         if (i < num_last_subset && i > 0) {
1064             int ctx_cg = 0;
1065             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1066                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1067             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1068                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1069
1070             significant_coeff_group_flag[x_cg][y_cg] =
1071                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1072             implicit_non_zero_coeff = 1;
1073         } else {
1074             significant_coeff_group_flag[x_cg][y_cg] =
1075                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1076                  (x_cg == 0 && y_cg == 0));
1077         }
1078
1079         last_scan_pos = num_coeff - offset - 1;
1080
1081         if (i == num_last_subset) {
1082             n_end                         = last_scan_pos - 1;
1083             significant_coeff_flag_idx[0] = last_scan_pos;
1084             nb_significant_coeff_flag     = 1;
1085         } else {
1086             n_end = 15;
1087         }
1088
1089         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1090             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1091         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1092             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1093
1094         for (n = n_end; n >= 0; n--) {
1095             GET_COORD(offset, n);
1096
1097             if (significant_coeff_group_flag[x_cg][y_cg] &&
1098                 (n > 0 || implicit_non_zero_coeff == 0)) {
1099                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1100                                                           log2_trafo_size,
1101                                                           scan_idx,
1102                                                           prev_sig) == 1) {
1103                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1104                     nb_significant_coeff_flag++;
1105                     implicit_non_zero_coeff = 0;
1106                 }
1107             } else {
1108                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1109                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                 }
1113             }
1114         }
1115
1116         n_end = nb_significant_coeff_flag;
1117
1118         if (n_end) {
1119             int first_nz_pos_in_cg = 16;
1120             int last_nz_pos_in_cg = -1;
1121             int c_rice_param = 0;
1122             int first_greater1_coeff_idx = -1;
1123             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1124             uint16_t coeff_sign_flag;
1125             int sum_abs = 0;
1126             int sign_hidden = 0;
1127
1128             // initialize first elem of coeff_bas_level_greater1_flag
1129             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1130
1131             if (!(i == num_last_subset) && greater1_ctx == 0)
1132                 ctx_set++;
1133             greater1_ctx      = 1;
1134             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1135
1136             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1137                 int n_idx = significant_coeff_flag_idx[m];
1138                 int inc   = (ctx_set << 2) + greater1_ctx;
1139                 coeff_abs_level_greater1_flag[n_idx] =
1140                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1141                 if (coeff_abs_level_greater1_flag[n_idx]) {
1142                     greater1_ctx = 0;
1143                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1144                     greater1_ctx++;
1145                 }
1146
1147                 if (coeff_abs_level_greater1_flag[n_idx] &&
1148                     first_greater1_coeff_idx == -1)
1149                     first_greater1_coeff_idx = n_idx;
1150             }
1151             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1152             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1153                                  !lc->cu.cu_transquant_bypass_flag;
1154
1155             if (first_greater1_coeff_idx != -1) {
1156                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1157             }
1158             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1159                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1160             } else {
1161                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1162             }
1163
1164             for (m = 0; m < n_end; m++) {
1165                 n = significant_coeff_flag_idx[m];
1166                 GET_COORD(offset, n);
1167                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1168                 if (trans_coeff_level == ((m < 8) ?
1169                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1170                     int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1171
1172                     trans_coeff_level += last_coeff_abs_level_remaining;
1173                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1174                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1175                 }
1176                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1177                     sum_abs += trans_coeff_level;
1178                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1179                         trans_coeff_level = -trans_coeff_level;
1180                 }
1181                 if (coeff_sign_flag >> 15)
1182                     trans_coeff_level = -trans_coeff_level;
1183                 coeff_sign_flag <<= 1;
1184                 if (!lc->cu.cu_transquant_bypass_flag) {
1185                     if (s->ps.sps->scaling_list_enable_flag) {
1186                         if (y_c || x_c || log2_trafo_size < 4) {
1187                             int pos;
1188                             switch (log2_trafo_size) {
1189                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1190                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1191                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1192                             default: pos = (y_c        << 2) +  x_c;
1193                             }
1194                             scale_m = scale_matrix[pos];
1195                         } else {
1196                             scale_m = dc_scale;
1197                         }
1198                     }
1199                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1200                     if(trans_coeff_level < 0) {
1201                         if((~trans_coeff_level) & 0xFffffffffff8000)
1202                             trans_coeff_level = -32768;
1203                     } else {
1204                         if (trans_coeff_level & 0xffffffffffff8000)
1205                             trans_coeff_level = 32767;
1206                     }
1207                 }
1208                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1209             }
1210         }
1211     }
1212
1213     if (!lc->cu.cu_transquant_bypass_flag) {
1214         if (transform_skip_flag)
1215             s->hevcdsp.dequant(coeffs);
1216         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1217                  log2_trafo_size == 2)
1218             s->hevcdsp.transform_4x4_luma(coeffs);
1219         else {
1220             int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
1221             if (max_xy == 0)
1222                 s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs);
1223             else {
1224                 int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
1225                 if (max_xy < 4)
1226                     col_limit = FFMIN(4, col_limit);
1227                 else if (max_xy < 8)
1228                     col_limit = FFMIN(8, col_limit);
1229                 else if (max_xy < 12)
1230                     col_limit = FFMIN(24, col_limit);
1231                 s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit);
1232             }
1233         }
1234     }
1235     s->hevcdsp.add_residual[log2_trafo_size - 2](dst, coeffs, stride);
1236 }
1237
1238 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1239                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1240                               int log2_cb_size, int log2_trafo_size,
1241                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1242 {
1243     HEVCLocalContext *lc = &s->HEVClc;
1244
1245     if (lc->cu.pred_mode == MODE_INTRA) {
1246         int trafo_size = 1 << log2_trafo_size;
1247         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1248
1249         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1250         if (log2_trafo_size > 2) {
1251             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1252             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1253             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1254             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1255         } else if (blk_idx == 3) {
1256             trafo_size = trafo_size << s->ps.sps->hshift[1];
1257             ff_hevc_set_neighbour_available(s, xBase, yBase,
1258                                             trafo_size, trafo_size);
1259             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1260             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1261         }
1262     }
1263
1264     if (cbf_luma || cbf_cb || cbf_cr) {
1265         int scan_idx   = SCAN_DIAG;
1266         int scan_idx_c = SCAN_DIAG;
1267
1268         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1269             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1270             if (lc->tu.cu_qp_delta != 0)
1271                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1272                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1273             lc->tu.is_cu_qp_delta_coded = 1;
1274
1275             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1276                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1277                 av_log(s->avctx, AV_LOG_ERROR,
1278                        "The cu_qp_delta %d is outside the valid range "
1279                        "[%d, %d].\n",
1280                        lc->tu.cu_qp_delta,
1281                        -(26 + s->ps.sps->qp_bd_offset / 2),
1282                         (25 + s->ps.sps->qp_bd_offset / 2));
1283                 return AVERROR_INVALIDDATA;
1284             }
1285
1286             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1287         }
1288
1289         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1290             if (lc->tu.cur_intra_pred_mode >= 6 &&
1291                 lc->tu.cur_intra_pred_mode <= 14) {
1292                 scan_idx = SCAN_VERT;
1293             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1294                        lc->tu.cur_intra_pred_mode <= 30) {
1295                 scan_idx = SCAN_HORIZ;
1296             }
1297
1298             if (lc->pu.intra_pred_mode_c >=  6 &&
1299                 lc->pu.intra_pred_mode_c <= 14) {
1300                 scan_idx_c = SCAN_VERT;
1301             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1302                        lc->pu.intra_pred_mode_c <= 30) {
1303                 scan_idx_c = SCAN_HORIZ;
1304             }
1305         }
1306
1307         if (cbf_luma)
1308             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1309         if (log2_trafo_size > 2) {
1310             if (cbf_cb)
1311                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1312             if (cbf_cr)
1313                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1314         } else if (blk_idx == 3) {
1315             if (cbf_cb)
1316                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1317             if (cbf_cr)
1318                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1319         }
1320     }
1321     return 0;
1322 }
1323
1324 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1325 {
1326     int cb_size          = 1 << log2_cb_size;
1327     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1328
1329     int min_pu_width     = s->ps.sps->min_pu_width;
1330     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1331     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1332     int i, j;
1333
1334     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1335         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1336             s->is_pcm[i + j * min_pu_width] = 2;
1337 }
1338
1339 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1340                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1341                               int log2_cb_size, int log2_trafo_size,
1342                               int trafo_depth, int blk_idx,
1343                               int cbf_cb, int cbf_cr)
1344 {
1345     HEVCLocalContext *lc = &s->HEVClc;
1346     uint8_t split_transform_flag;
1347     int ret;
1348
1349     if (lc->cu.intra_split_flag) {
1350         if (trafo_depth == 1)
1351             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1352     } else {
1353         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1354     }
1355
1356     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1357         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1358         trafo_depth     < lc->cu.max_trafo_depth       &&
1359         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1360         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1361     } else {
1362         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1363                           lc->cu.pred_mode == MODE_INTER &&
1364                           lc->cu.part_mode != PART_2Nx2N &&
1365                           trafo_depth == 0;
1366
1367         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1368                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1369                                inter_split;
1370     }
1371
1372     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1373         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1374     else if (log2_trafo_size > 2 || trafo_depth == 0)
1375         cbf_cb = 0;
1376     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1377         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1378     else if (log2_trafo_size > 2 || trafo_depth == 0)
1379         cbf_cr = 0;
1380
1381     if (split_transform_flag) {
1382         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1383         const int x1 = x0 + trafo_size_split;
1384         const int y1 = y0 + trafo_size_split;
1385
1386 #define SUBDIVIDE(x, y, idx)                                                    \
1387 do {                                                                            \
1388     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1389                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1390                              cbf_cb, cbf_cr);                                   \
1391     if (ret < 0)                                                                \
1392         return ret;                                                             \
1393 } while (0)
1394
1395         SUBDIVIDE(x0, y0, 0);
1396         SUBDIVIDE(x1, y0, 1);
1397         SUBDIVIDE(x0, y1, 2);
1398         SUBDIVIDE(x1, y1, 3);
1399
1400 #undef SUBDIVIDE
1401     } else {
1402         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1403         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1404         int min_tu_width     = s->ps.sps->min_tb_width;
1405         int cbf_luma         = 1;
1406
1407         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1408             cbf_cb || cbf_cr)
1409             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1410
1411         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1412                                  log2_cb_size, log2_trafo_size,
1413                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1414         if (ret < 0)
1415             return ret;
1416         // TODO: store cbf_luma somewhere else
1417         if (cbf_luma) {
1418             int i, j;
1419             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1420                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1421                     int x_tu = (x0 + j) >> log2_min_tu_size;
1422                     int y_tu = (y0 + i) >> log2_min_tu_size;
1423                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1424                 }
1425         }
1426         if (!s->sh.disable_deblocking_filter_flag) {
1427             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1428             if (s->ps.pps->transquant_bypass_enable_flag &&
1429                 lc->cu.cu_transquant_bypass_flag)
1430                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1431         }
1432     }
1433     return 0;
1434 }
1435
1436 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1437 {
1438     //TODO: non-4:2:0 support
1439     HEVCLocalContext *lc = &s->HEVClc;
1440     GetBitContext gb;
1441     int cb_size   = 1 << log2_cb_size;
1442     ptrdiff_t stride0 = s->frame->linesize[0];
1443     ptrdiff_t stride1 = s->frame->linesize[1];
1444     ptrdiff_t stride2 = s->frame->linesize[2];
1445     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1446     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1447     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1448
1449     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1450     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1451     int ret;
1452
1453     if (!s->sh.disable_deblocking_filter_flag)
1454         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1455
1456     ret = init_get_bits(&gb, pcm, length);
1457     if (ret < 0)
1458         return ret;
1459
1460     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1461     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1462     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1463     return 0;
1464 }
1465
1466 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1467 {
1468     HEVCLocalContext *lc = &s->HEVClc;
1469     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1470     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1471
1472     if (x)
1473         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1474     if (y)
1475         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1476
1477     switch (x) {
1478     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1479     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1480     case 0: lc->pu.mvd.x = 0;                               break;
1481     }
1482
1483     switch (y) {
1484     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1485     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1486     case 0: lc->pu.mvd.y = 0;                               break;
1487     }
1488 }
1489
1490 /**
1491  * 8.5.3.2.2.1 Luma sample interpolation process
1492  *
1493  * @param s HEVC decoding context
1494  * @param dst target buffer for block data at block position
1495  * @param dststride stride of the dst buffer
1496  * @param ref reference picture buffer at origin (0, 0)
1497  * @param mv motion vector (relative to block position) to get pixel data from
1498  * @param x_off horizontal position of block from origin (0, 0)
1499  * @param y_off vertical position of block from origin (0, 0)
1500  * @param block_w width of block
1501  * @param block_h height of block
1502  */
1503 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1504                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1505                     int block_w, int block_h, int pred_idx)
1506 {
1507     HEVCLocalContext *lc = &s->HEVClc;
1508     uint8_t *src         = ref->data[0];
1509     ptrdiff_t srcstride  = ref->linesize[0];
1510     int pic_width        = s->ps.sps->width;
1511     int pic_height       = s->ps.sps->height;
1512
1513     int mx         = mv->x & 3;
1514     int my         = mv->y & 3;
1515     int extra_left = ff_hevc_qpel_extra_before[mx];
1516     int extra_top  = ff_hevc_qpel_extra_before[my];
1517
1518     x_off += mv->x >> 2;
1519     y_off += mv->y >> 2;
1520     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1521
1522     if (x_off < extra_left || y_off < extra_top ||
1523         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1524         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1525         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1526         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1527         int buf_offset = extra_top *
1528                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1529
1530         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1531                                  edge_emu_stride, srcstride,
1532                                  block_w + ff_hevc_qpel_extra[mx],
1533                                  block_h + ff_hevc_qpel_extra[my],
1534                                  x_off - extra_left, y_off - extra_top,
1535                                  pic_width, pic_height);
1536         src = lc->edge_emu_buffer + buf_offset;
1537         srcstride = edge_emu_stride;
1538     }
1539     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1540                                                    block_h, mx, my, lc->mc_buffer);
1541 }
1542
1543 /**
1544  * 8.5.3.2.2.2 Chroma sample interpolation process
1545  *
1546  * @param s HEVC decoding context
1547  * @param dst1 target buffer for block data at block position (U plane)
1548  * @param dst2 target buffer for block data at block position (V plane)
1549  * @param dststride stride of the dst1 and dst2 buffers
1550  * @param ref reference picture buffer at origin (0, 0)
1551  * @param mv motion vector (relative to block position) to get pixel data from
1552  * @param x_off horizontal position of block from origin (0, 0)
1553  * @param y_off vertical position of block from origin (0, 0)
1554  * @param block_w width of block
1555  * @param block_h height of block
1556  */
1557 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1558                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1559                       int x_off, int y_off, int block_w, int block_h, int pred_idx)
1560 {
1561     HEVCLocalContext *lc = &s->HEVClc;
1562     uint8_t *src1        = ref->data[1];
1563     uint8_t *src2        = ref->data[2];
1564     ptrdiff_t src1stride = ref->linesize[1];
1565     ptrdiff_t src2stride = ref->linesize[2];
1566     int pic_width        = s->ps.sps->width >> 1;
1567     int pic_height       = s->ps.sps->height >> 1;
1568
1569     int mx = mv->x & 7;
1570     int my = mv->y & 7;
1571
1572     x_off += mv->x >> 3;
1573     y_off += mv->y >> 3;
1574     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1575     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1576
1577     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1578         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1579         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1580         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1581         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1582         int buf_offset1 = EPEL_EXTRA_BEFORE *
1583                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1584         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1585         int buf_offset2 = EPEL_EXTRA_BEFORE *
1586                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1587
1588         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1589                                  edge_emu_stride, src1stride,
1590                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1591                                  x_off - EPEL_EXTRA_BEFORE,
1592                                  y_off - EPEL_EXTRA_BEFORE,
1593                                  pic_width, pic_height);
1594
1595         src1 = lc->edge_emu_buffer + buf_offset1;
1596         src1stride = edge_emu_stride;
1597         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1598                                                        block_h, mx, my, lc->mc_buffer);
1599
1600         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1601                                  edge_emu_stride, src2stride,
1602                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1603                                  x_off - EPEL_EXTRA_BEFORE,
1604                                  y_off - EPEL_EXTRA_BEFORE,
1605                                  pic_width, pic_height);
1606         src2 = lc->edge_emu_buffer + buf_offset2;
1607         src2stride = edge_emu_stride;
1608
1609         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1610                                                        block_h, mx, my, lc->mc_buffer);
1611     } else {
1612         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1613                                                        block_h, mx, my, lc->mc_buffer);
1614         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1615                                                        block_h, mx, my, lc->mc_buffer);
1616     }
1617 }
1618
1619 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1620                                 const Mv *mv, int y0, int height)
1621 {
1622     int y = (mv->y >> 2) + y0 + height + 9;
1623     ff_thread_await_progress(&ref->tf, y, 0);
1624 }
1625
1626 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1627                                   int nPbH, int log2_cb_size, int part_idx,
1628                                   int merge_idx, MvField *mv)
1629 {
1630     HEVCLocalContext *lc             = &s->HEVClc;
1631     enum InterPredIdc inter_pred_idc = PRED_L0;
1632     int mvp_flag;
1633
1634     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1635     if (s->sh.slice_type == HEVC_SLICE_B)
1636         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1637
1638     if (inter_pred_idc != PRED_L1) {
1639         if (s->sh.nb_refs[L0])
1640             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1641
1642         mv->pred_flag[0] = 1;
1643         hls_mvd_coding(s, x0, y0, 0);
1644         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1645         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1646                                  part_idx, merge_idx, mv, mvp_flag, 0);
1647         mv->mv[0].x += lc->pu.mvd.x;
1648         mv->mv[0].y += lc->pu.mvd.y;
1649     }
1650
1651     if (inter_pred_idc != PRED_L0) {
1652         if (s->sh.nb_refs[L1])
1653             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1654
1655         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1656             AV_ZERO32(&lc->pu.mvd);
1657         } else {
1658             hls_mvd_coding(s, x0, y0, 1);
1659         }
1660
1661         mv->pred_flag[1] = 1;
1662         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1663         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1664                                  part_idx, merge_idx, mv, mvp_flag, 1);
1665         mv->mv[1].x += lc->pu.mvd.x;
1666         mv->mv[1].y += lc->pu.mvd.y;
1667     }
1668 }
1669
1670 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1671                                 int nPbW, int nPbH,
1672                                 int log2_cb_size, int partIdx)
1673 {
1674     static const int pred_indices[] = {
1675         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1676     };
1677     const int pred_idx = pred_indices[nPbW];
1678
1679 #define POS(c_idx, x, y)                                                              \
1680     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1681                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1682     HEVCLocalContext *lc = &s->HEVClc;
1683     int merge_idx = 0;
1684     struct MvField current_mv = {{{ 0 }}};
1685
1686     int min_pu_width = s->ps.sps->min_pu_width;
1687     int weighted_pred = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1688                         (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1689
1690     MvField *tab_mvf = s->ref->tab_mvf;
1691     RefPicList  *refPicList = s->ref->refPicList;
1692     HEVCFrame *ref0, *ref1;
1693
1694     ptrdiff_t tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1695
1696     uint8_t *dst0 = POS(0, x0, y0);
1697     uint8_t *dst1 = POS(1, x0, y0);
1698     uint8_t *dst2 = POS(2, x0, y0);
1699     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1700     int min_cb_width     = s->ps.sps->min_cb_width;
1701     int x_cb             = x0 >> log2_min_cb_size;
1702     int y_cb             = y0 >> log2_min_cb_size;
1703     int x_pu, y_pu;
1704     int i, j;
1705
1706     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1707
1708     if (!skip_flag)
1709         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1710
1711     if (skip_flag || lc->pu.merge_flag) {
1712         if (s->sh.max_num_merge_cand > 1)
1713             merge_idx = ff_hevc_merge_idx_decode(s);
1714         else
1715             merge_idx = 0;
1716
1717         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1718                                    partIdx, merge_idx, &current_mv);
1719     } else {
1720         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1721                               partIdx, merge_idx, &current_mv);
1722     }
1723
1724     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1725     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1726
1727     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1728         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1729             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1730
1731     if (current_mv.pred_flag[0]) {
1732         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1733         if (!ref0)
1734             return;
1735         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1736     }
1737     if (current_mv.pred_flag[1]) {
1738         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1739         if (!ref1)
1740             return;
1741         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1742     }
1743
1744     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1745         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1746         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1747
1748         luma_mc(s, tmp, tmpstride, ref0->frame,
1749                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1750
1751         if (weighted_pred) {
1752             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1753                                                s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1754                                                s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1755                                                dst0, s->frame->linesize[0], tmp,
1756                                                tmpstride, nPbH);
1757         } else {
1758             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1759         }
1760         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1761                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1762
1763         if (weighted_pred) {
1764             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1765                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1766                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1767                                                       dst1, s->frame->linesize[1], tmp, tmpstride,
1768                                                       nPbH / 2);
1769             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1770                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1771                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1772                                                       dst2, s->frame->linesize[2], tmp2, tmpstride,
1773                                                       nPbH / 2);
1774         } else {
1775             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1776             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1777         }
1778     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1779         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1780         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1781
1782         luma_mc(s, tmp, tmpstride, ref1->frame,
1783                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1784
1785         if (weighted_pred) {
1786             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1787                                                s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1788                                                s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1789                                                dst0, s->frame->linesize[0], tmp, tmpstride,
1790                                                nPbH);
1791         } else {
1792             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1793         }
1794
1795         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1796                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1797
1798         if (weighted_pred) {
1799             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1800                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1801                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1802                                                       dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1803             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1804                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1805                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1806                                                       dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1807         } else {
1808             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1809             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1810         }
1811     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1812         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1813         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1814         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1815         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1816
1817         luma_mc(s, tmp, tmpstride, ref0->frame,
1818                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1819         luma_mc(s, tmp2, tmpstride, ref1->frame,
1820                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1821
1822         if (weighted_pred) {
1823             s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1824                                                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1825                                                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1826                                                    s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1827                                                    s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1828                                                    dst0, s->frame->linesize[0],
1829                                                    tmp, tmp2, tmpstride, nPbH);
1830         } else {
1831             s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1832                                                          tmp, tmp2, tmpstride, nPbH);
1833         }
1834
1835         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1836                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1837         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1838                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1839
1840         if (weighted_pred) {
1841             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1842                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1843                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1844                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1845                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1846                                                           dst1, s->frame->linesize[1], tmp, tmp3,
1847                                                           tmpstride, nPbH / 2);
1848             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1849                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1850                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1851                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1852                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1853                                                           dst2, s->frame->linesize[2], tmp2, tmp4,
1854                                                           tmpstride, nPbH / 2);
1855         } else {
1856             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
1857             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1858         }
1859     }
1860 }
1861
1862 /**
1863  * 8.4.1
1864  */
1865 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1866                                 int prev_intra_luma_pred_flag)
1867 {
1868     HEVCLocalContext *lc = &s->HEVClc;
1869     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1870     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1871     int min_pu_width     = s->ps.sps->min_pu_width;
1872     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1873     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1874     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1875
1876     int cand_up   = (lc->ctb_up_flag || y0b) ?
1877                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1878     int cand_left = (lc->ctb_left_flag || x0b) ?
1879                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1880
1881     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1882
1883     MvField *tab_mvf = s->ref->tab_mvf;
1884     int intra_pred_mode;
1885     int candidate[3];
1886     int i, j;
1887
1888     // intra_pred_mode prediction does not cross vertical CTB boundaries
1889     if ((y0 - 1) < y_ctb)
1890         cand_up = INTRA_DC;
1891
1892     if (cand_left == cand_up) {
1893         if (cand_left < 2) {
1894             candidate[0] = INTRA_PLANAR;
1895             candidate[1] = INTRA_DC;
1896             candidate[2] = INTRA_ANGULAR_26;
1897         } else {
1898             candidate[0] = cand_left;
1899             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1900             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1901         }
1902     } else {
1903         candidate[0] = cand_left;
1904         candidate[1] = cand_up;
1905         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1906             candidate[2] = INTRA_PLANAR;
1907         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1908             candidate[2] = INTRA_DC;
1909         } else {
1910             candidate[2] = INTRA_ANGULAR_26;
1911         }
1912     }
1913
1914     if (prev_intra_luma_pred_flag) {
1915         intra_pred_mode = candidate[lc->pu.mpm_idx];
1916     } else {
1917         if (candidate[0] > candidate[1])
1918             FFSWAP(uint8_t, candidate[0], candidate[1]);
1919         if (candidate[0] > candidate[2])
1920             FFSWAP(uint8_t, candidate[0], candidate[2]);
1921         if (candidate[1] > candidate[2])
1922             FFSWAP(uint8_t, candidate[1], candidate[2]);
1923
1924         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1925         for (i = 0; i < 3; i++)
1926             if (intra_pred_mode >= candidate[i])
1927                 intra_pred_mode++;
1928     }
1929
1930     /* write the intra prediction units into the mv array */
1931     if (!size_in_pus)
1932         size_in_pus = 1;
1933     for (i = 0; i < size_in_pus; i++) {
1934         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1935                intra_pred_mode, size_in_pus);
1936
1937         for (j = 0; j < size_in_pus; j++) {
1938             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1939             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1940             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1941             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1942             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1943             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1944             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1945             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1946             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1947         }
1948     }
1949
1950     return intra_pred_mode;
1951 }
1952
1953 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1954                                           int log2_cb_size, int ct_depth)
1955 {
1956     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1957     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1958     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1959     int y;
1960
1961     for (y = 0; y < length; y++)
1962         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1963                ct_depth, length);
1964 }
1965
1966 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1967                                   int log2_cb_size)
1968 {
1969     HEVCLocalContext *lc = &s->HEVClc;
1970     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1971     uint8_t prev_intra_luma_pred_flag[4];
1972     int split   = lc->cu.part_mode == PART_NxN;
1973     int pb_size = (1 << log2_cb_size) >> split;
1974     int side    = split + 1;
1975     int chroma_mode;
1976     int i, j;
1977
1978     for (i = 0; i < side; i++)
1979         for (j = 0; j < side; j++)
1980             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1981
1982     for (i = 0; i < side; i++) {
1983         for (j = 0; j < side; j++) {
1984             if (prev_intra_luma_pred_flag[2 * i + j])
1985                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1986             else
1987                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1988
1989             lc->pu.intra_pred_mode[2 * i + j] =
1990                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1991                                      prev_intra_luma_pred_flag[2 * i + j]);
1992         }
1993     }
1994
1995     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1996     if (chroma_mode != 4) {
1997         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1998             lc->pu.intra_pred_mode_c = 34;
1999         else
2000             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
2001     } else {
2002         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2003     }
2004 }
2005
2006 static void intra_prediction_unit_default_value(HEVCContext *s,
2007                                                 int x0, int y0,
2008                                                 int log2_cb_size)
2009 {
2010     HEVCLocalContext *lc = &s->HEVClc;
2011     int pb_size          = 1 << log2_cb_size;
2012     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2013     int min_pu_width     = s->ps.sps->min_pu_width;
2014     MvField *tab_mvf     = s->ref->tab_mvf;
2015     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2016     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2017     int j, k;
2018
2019     if (size_in_pus == 0)
2020         size_in_pus = 1;
2021     for (j = 0; j < size_in_pus; j++) {
2022         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2023         for (k = 0; k < size_in_pus; k++)
2024             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2025     }
2026 }
2027
2028 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2029 {
2030     int cb_size          = 1 << log2_cb_size;
2031     HEVCLocalContext *lc = &s->HEVClc;
2032     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2033     int length           = cb_size >> log2_min_cb_size;
2034     int min_cb_width     = s->ps.sps->min_cb_width;
2035     int x_cb             = x0 >> log2_min_cb_size;
2036     int y_cb             = y0 >> log2_min_cb_size;
2037     int x, y, ret;
2038
2039     lc->cu.x                = x0;
2040     lc->cu.y                = y0;
2041     lc->cu.pred_mode        = MODE_INTRA;
2042     lc->cu.part_mode        = PART_2Nx2N;
2043     lc->cu.intra_split_flag = 0;
2044
2045     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2046     for (x = 0; x < 4; x++)
2047         lc->pu.intra_pred_mode[x] = 1;
2048     if (s->ps.pps->transquant_bypass_enable_flag) {
2049         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2050         if (lc->cu.cu_transquant_bypass_flag)
2051             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2052     } else
2053         lc->cu.cu_transquant_bypass_flag = 0;
2054
2055     if (s->sh.slice_type != HEVC_SLICE_I) {
2056         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2057
2058         x = y_cb * min_cb_width + x_cb;
2059         for (y = 0; y < length; y++) {
2060             memset(&s->skip_flag[x], skip_flag, length);
2061             x += min_cb_width;
2062         }
2063         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2064     }
2065
2066     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2067         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2068         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069
2070         if (!s->sh.disable_deblocking_filter_flag)
2071             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2072     } else {
2073         int pcm_flag = 0;
2074
2075         if (s->sh.slice_type != HEVC_SLICE_I)
2076             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2077         if (lc->cu.pred_mode != MODE_INTRA ||
2078             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2079             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2080             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2081                                       lc->cu.pred_mode == MODE_INTRA;
2082         }
2083
2084         if (lc->cu.pred_mode == MODE_INTRA) {
2085             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2086                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2087                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2088                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2089             }
2090             if (pcm_flag) {
2091                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2092                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2093                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2094                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2095
2096                 if (ret < 0)
2097                     return ret;
2098             } else {
2099                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2100             }
2101         } else {
2102             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2103             switch (lc->cu.part_mode) {
2104             case PART_2Nx2N:
2105                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2106                 break;
2107             case PART_2NxN:
2108                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2109                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2110                 break;
2111             case PART_Nx2N:
2112                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2113                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2114                 break;
2115             case PART_2NxnU:
2116                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2117                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2118                 break;
2119             case PART_2NxnD:
2120                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2121                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2122                 break;
2123             case PART_nLx2N:
2124                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2125                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2126                 break;
2127             case PART_nRx2N:
2128                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2129                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2130                 break;
2131             case PART_NxN:
2132                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2133                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2134                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2135                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2136                 break;
2137             }
2138         }
2139
2140         if (!pcm_flag) {
2141             int rqt_root_cbf = 1;
2142
2143             if (lc->cu.pred_mode != MODE_INTRA &&
2144                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2145                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2146             }
2147             if (rqt_root_cbf) {
2148                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2149                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2150                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2151                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2152                                          log2_cb_size,
2153                                          log2_cb_size, 0, 0, 0, 0);
2154                 if (ret < 0)
2155                     return ret;
2156             } else {
2157                 if (!s->sh.disable_deblocking_filter_flag)
2158                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2159             }
2160         }
2161     }
2162
2163     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2164         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2165
2166     x = y_cb * min_cb_width + x_cb;
2167     for (y = 0; y < length; y++) {
2168         memset(&s->qp_y_tab[x], lc->qp_y, length);
2169         x += min_cb_width;
2170     }
2171
2172     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2173
2174     return 0;
2175 }
2176
2177 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2178                                int log2_cb_size, int cb_depth)
2179 {
2180     HEVCLocalContext *lc = &s->HEVClc;
2181     const int cb_size    = 1 << log2_cb_size;
2182     int split_cu;
2183
2184     lc->ct.depth = cb_depth;
2185     if (x0 + cb_size <= s->ps.sps->width  &&
2186         y0 + cb_size <= s->ps.sps->height &&
2187         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2188         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2189     } else {
2190         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2191     }
2192     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2193         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2194         lc->tu.is_cu_qp_delta_coded = 0;
2195         lc->tu.cu_qp_delta          = 0;
2196     }
2197
2198     if (split_cu) {
2199         const int cb_size_split = cb_size >> 1;
2200         const int x1 = x0 + cb_size_split;
2201         const int y1 = y0 + cb_size_split;
2202
2203         log2_cb_size--;
2204         cb_depth++;
2205
2206 #define SUBDIVIDE(x, y)                                                \
2207 do {                                                                   \
2208     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2209         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2210         if (ret < 0)                                                   \
2211             return ret;                                                \
2212     }                                                                  \
2213 } while (0)
2214
2215         SUBDIVIDE(x0, y0);
2216         SUBDIVIDE(x1, y0);
2217         SUBDIVIDE(x0, y1);
2218         SUBDIVIDE(x1, y1);
2219     } else {
2220         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2221         if (ret < 0)
2222             return ret;
2223     }
2224
2225     return 0;
2226 }
2227
2228 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2229                                  int ctb_addr_ts)
2230 {
2231     HEVCLocalContext *lc  = &s->HEVClc;
2232     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2233     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2234     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2235
2236     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2237
2238     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2239         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2240             lc->first_qp_group = 1;
2241         lc->end_of_tiles_x = s->ps.sps->width;
2242     } else if (s->ps.pps->tiles_enabled_flag) {
2243         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2244             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2245             lc->start_of_tiles_x = x_ctb;
2246             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2247             lc->first_qp_group   = 1;
2248         }
2249     } else {
2250         lc->end_of_tiles_x = s->ps.sps->width;
2251     }
2252
2253     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2254
2255     lc->boundary_flags = 0;
2256     if (s->ps.pps->tiles_enabled_flag) {
2257         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2258             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2259         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2260             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2261         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2262             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2263         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2264             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2265     } else {
2266         if (!ctb_addr_in_slice)
2267             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2268         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2269             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2270     }
2271
2272     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2273     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2274     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2275     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2276 }
2277
2278 static int hls_slice_data(HEVCContext *s)
2279 {
2280     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2281     int more_data   = 1;
2282     int x_ctb       = 0;
2283     int y_ctb       = 0;
2284     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2285     int ret;
2286
2287     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2288         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2289
2290         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2291         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2292         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2293
2294         ff_hevc_cabac_init(s, ctb_addr_ts);
2295
2296         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2297
2298         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2299         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2300         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2301
2302         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2303         if (ret < 0)
2304             return ret;
2305         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2306
2307         ctb_addr_ts++;
2308         ff_hevc_save_states(s, ctb_addr_ts);
2309         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2310     }
2311
2312     if (x_ctb + ctb_size >= s->ps.sps->width &&
2313         y_ctb + ctb_size >= s->ps.sps->height)
2314         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2315
2316     return ctb_addr_ts;
2317 }
2318
2319 static void restore_tqb_pixels(HEVCContext *s)
2320 {
2321     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2322     int x, y, c_idx;
2323
2324     for (c_idx = 0; c_idx < 3; c_idx++) {
2325         ptrdiff_t stride = s->frame->linesize[c_idx];
2326         int hshift       = s->ps.sps->hshift[c_idx];
2327         int vshift       = s->ps.sps->vshift[c_idx];
2328         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2329             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2330                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2331                     int n;
2332                     int len      = min_pu_size >> hshift;
2333                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2334                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2335                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2336                         memcpy(dst, src, len);
2337                         src += stride;
2338                         dst += stride;
2339                     }
2340                 }
2341             }
2342         }
2343     }
2344 }
2345
2346 static int set_side_data(HEVCContext *s)
2347 {
2348     AVFrame *out = s->ref->frame;
2349
2350     if (s->sei_frame_packing_present &&
2351         s->frame_packing_arrangement_type >= 3 &&
2352         s->frame_packing_arrangement_type <= 5 &&
2353         s->content_interpretation_type > 0 &&
2354         s->content_interpretation_type < 3) {
2355         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2356         if (!stereo)
2357             return AVERROR(ENOMEM);
2358
2359         switch (s->frame_packing_arrangement_type) {
2360         case 3:
2361             if (s->quincunx_subsampling)
2362                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2363             else
2364                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2365             break;
2366         case 4:
2367             stereo->type = AV_STEREO3D_TOPBOTTOM;
2368             break;
2369         case 5:
2370             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2371             break;
2372         }
2373
2374         if (s->content_interpretation_type == 2)
2375             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2376     }
2377
2378     if (s->sei_display_orientation_present &&
2379         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2380         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2381         AVFrameSideData *rotation = av_frame_new_side_data(out,
2382                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2383                                                            sizeof(int32_t) * 9);
2384         if (!rotation)
2385             return AVERROR(ENOMEM);
2386
2387         av_display_rotation_set((int32_t *)rotation->data, angle);
2388         av_display_matrix_flip((int32_t *)rotation->data,
2389                                s->sei_hflip, s->sei_vflip);
2390     }
2391
2392     return 0;
2393 }
2394
2395 static int hevc_frame_start(HEVCContext *s)
2396 {
2397     HEVCLocalContext *lc = &s->HEVClc;
2398     int ret;
2399
2400     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2401     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2402     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2403     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2404
2405     lc->start_of_tiles_x = 0;
2406     s->is_decoded        = 0;
2407     s->first_nal_type    = s->nal_unit_type;
2408
2409     if (s->ps.pps->tiles_enabled_flag)
2410         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2411
2412     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2413                               s->poc);
2414     if (ret < 0)
2415         goto fail;
2416
2417     ret = ff_hevc_frame_rps(s);
2418     if (ret < 0) {
2419         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2420         goto fail;
2421     }
2422
2423     s->ref->frame->key_frame = IS_IRAP(s);
2424
2425     ret = set_side_data(s);
2426     if (ret < 0)
2427         goto fail;
2428
2429     av_frame_unref(s->output_frame);
2430     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2431     if (ret < 0)
2432         goto fail;
2433
2434     ff_thread_finish_setup(s->avctx);
2435
2436     return 0;
2437
2438 fail:
2439     if (s->ref)
2440         ff_hevc_unref_frame(s, s->ref, ~0);
2441     s->ref = NULL;
2442     return ret;
2443 }
2444
2445 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2446 {
2447     HEVCLocalContext *lc = &s->HEVClc;
2448     GetBitContext *gb    = &lc->gb;
2449     int ctb_addr_ts, ret;
2450
2451     *gb              = nal->gb;
2452     s->nal_unit_type = nal->type;
2453     s->temporal_id   = nal->temporal_id;
2454
2455     switch (s->nal_unit_type) {
2456     case HEVC_NAL_VPS:
2457         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2458         if (ret < 0)
2459             goto fail;
2460         break;
2461     case HEVC_NAL_SPS:
2462         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2463                                      s->apply_defdispwin);
2464         if (ret < 0)
2465             goto fail;
2466         break;
2467     case HEVC_NAL_PPS:
2468         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2469         if (ret < 0)
2470             goto fail;
2471         break;
2472     case HEVC_NAL_SEI_PREFIX:
2473     case HEVC_NAL_SEI_SUFFIX:
2474         ret = ff_hevc_decode_nal_sei(s);
2475         if (ret < 0)
2476             goto fail;
2477         break;
2478     case HEVC_NAL_TRAIL_R:
2479     case HEVC_NAL_TRAIL_N:
2480     case HEVC_NAL_TSA_N:
2481     case HEVC_NAL_TSA_R:
2482     case HEVC_NAL_STSA_N:
2483     case HEVC_NAL_STSA_R:
2484     case HEVC_NAL_BLA_W_LP:
2485     case HEVC_NAL_BLA_W_RADL:
2486     case HEVC_NAL_BLA_N_LP:
2487     case HEVC_NAL_IDR_W_RADL:
2488     case HEVC_NAL_IDR_N_LP:
2489     case HEVC_NAL_CRA_NUT:
2490     case HEVC_NAL_RADL_N:
2491     case HEVC_NAL_RADL_R:
2492     case HEVC_NAL_RASL_N:
2493     case HEVC_NAL_RASL_R:
2494         ret = hls_slice_header(s);
2495         if (ret < 0)
2496             return ret;
2497
2498         if (s->max_ra == INT_MAX) {
2499             if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2500                 s->max_ra = s->poc;
2501             } else {
2502                 if (IS_IDR(s))
2503                     s->max_ra = INT_MIN;
2504             }
2505         }
2506
2507         if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2508             s->poc <= s->max_ra) {
2509             s->is_decoded = 0;
2510             break;
2511         } else {
2512             if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2513                 s->max_ra = INT_MIN;
2514         }
2515
2516         if (s->sh.first_slice_in_pic_flag) {
2517             ret = hevc_frame_start(s);
2518             if (ret < 0)
2519                 return ret;
2520         } else if (!s->ref) {
2521             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2522             goto fail;
2523         }
2524
2525         if (s->nal_unit_type != s->first_nal_type) {
2526             av_log(s->avctx, AV_LOG_ERROR,
2527                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2528                    s->first_nal_type, s->nal_unit_type);
2529             return AVERROR_INVALIDDATA;
2530         }
2531
2532         if (!s->sh.dependent_slice_segment_flag &&
2533             s->sh.slice_type != HEVC_SLICE_I) {
2534             ret = ff_hevc_slice_rpl(s);
2535             if (ret < 0) {
2536                 av_log(s->avctx, AV_LOG_WARNING,
2537                        "Error constructing the reference lists for the current slice.\n");
2538                 goto fail;
2539             }
2540         }
2541
2542         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2543             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2544             if (ret < 0)
2545                 goto fail;
2546         }
2547
2548         if (s->avctx->hwaccel) {
2549             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2550             if (ret < 0)
2551                 goto fail;
2552         } else {
2553             ctb_addr_ts = hls_slice_data(s);
2554             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2555                 s->is_decoded = 1;
2556                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2557                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2558                     s->ps.sps->sao_enabled)
2559                     restore_tqb_pixels(s);
2560             }
2561
2562             if (ctb_addr_ts < 0) {
2563                 ret = ctb_addr_ts;
2564                 goto fail;
2565             }
2566         }
2567         break;
2568     case HEVC_NAL_EOS_NUT:
2569     case HEVC_NAL_EOB_NUT:
2570         s->seq_decode = (s->seq_decode + 1) & 0xff;
2571         s->max_ra     = INT_MAX;
2572         break;
2573     case HEVC_NAL_AUD:
2574     case HEVC_NAL_FD_NUT:
2575         break;
2576     default:
2577         av_log(s->avctx, AV_LOG_INFO,
2578                "Skipping NAL unit %d\n", s->nal_unit_type);
2579     }
2580
2581     return 0;
2582 fail:
2583     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2584         return ret;
2585     return 0;
2586 }
2587
2588 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2589 {
2590     int i, ret = 0;
2591
2592     s->ref = NULL;
2593     s->eos = 0;
2594
2595     /* split the input packet into NAL units, so we know the upper bound on the
2596      * number of slices in the frame */
2597     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2598                                 s->nal_length_size, s->avctx->codec_id);
2599     if (ret < 0) {
2600         av_log(s->avctx, AV_LOG_ERROR,
2601                "Error splitting the input into NAL units.\n");
2602         return ret;
2603     }
2604
2605     for (i = 0; i < s->pkt.nb_nals; i++) {
2606         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
2607             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT)
2608             s->eos = 1;
2609     }
2610
2611     /* decode the NAL units */
2612     for (i = 0; i < s->pkt.nb_nals; i++) {
2613         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2614         if (ret < 0) {
2615             av_log(s->avctx, AV_LOG_WARNING,
2616                    "Error parsing NAL unit #%d.\n", i);
2617             goto fail;
2618         }
2619     }
2620
2621 fail:
2622     if (s->ref)
2623         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2624
2625     return ret;
2626 }
2627
2628 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2629 {
2630     int i;
2631     for (i = 0; i < 16; i++)
2632         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2633 }
2634
2635 static int verify_md5(HEVCContext *s, AVFrame *frame)
2636 {
2637     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2638     int pixel_shift;
2639     int i, j;
2640
2641     if (!desc)
2642         return AVERROR(EINVAL);
2643
2644     pixel_shift = desc->comp[0].depth > 8;
2645
2646     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2647            s->poc);
2648
2649     /* the checksums are LE, so we have to byteswap for >8bpp formats
2650      * on BE arches */
2651 #if HAVE_BIGENDIAN
2652     if (pixel_shift && !s->checksum_buf) {
2653         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2654                        FFMAX3(frame->linesize[0], frame->linesize[1],
2655                               frame->linesize[2]));
2656         if (!s->checksum_buf)
2657             return AVERROR(ENOMEM);
2658     }
2659 #endif
2660
2661     for (i = 0; frame->data[i]; i++) {
2662         int width  = s->avctx->coded_width;
2663         int height = s->avctx->coded_height;
2664         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2665         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2666         uint8_t md5[16];
2667
2668         av_md5_init(s->md5_ctx);
2669         for (j = 0; j < h; j++) {
2670             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2671 #if HAVE_BIGENDIAN
2672             if (pixel_shift) {
2673                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2674                                     (const uint16_t *) src, w);
2675                 src = s->checksum_buf;
2676             }
2677 #endif
2678             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2679         }
2680         av_md5_final(s->md5_ctx, md5);
2681
2682         if (!memcmp(md5, s->md5[i], 16)) {
2683             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2684             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2685             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2686         } else {
2687             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2688             print_md5(s->avctx, AV_LOG_ERROR, md5);
2689             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2690             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2691             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2692             return AVERROR_INVALIDDATA;
2693         }
2694     }
2695
2696     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2697
2698     return 0;
2699 }
2700
2701 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length)
2702 {
2703     AVCodecContext *avctx = s->avctx;
2704     GetByteContext gb;
2705     int ret, i;
2706
2707     bytestream2_init(&gb, buf, length);
2708
2709     if (length > 3 && (buf[0] || buf[1] || buf[2] > 1)) {
2710         /* It seems the extradata is encoded as hvcC format.
2711          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2712          * is finalized. When finalized, configurationVersion will be 1 and we
2713          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2714         int i, j, num_arrays, nal_len_size;
2715
2716         s->is_nalff = 1;
2717
2718         bytestream2_skip(&gb, 21);
2719         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2720         num_arrays   = bytestream2_get_byte(&gb);
2721
2722         /* nal units in the hvcC always have length coded with 2 bytes,
2723          * so put a fake nal_length_size = 2 while parsing them */
2724         s->nal_length_size = 2;
2725
2726         /* Decode nal units from hvcC. */
2727         for (i = 0; i < num_arrays; i++) {
2728             int type = bytestream2_get_byte(&gb) & 0x3f;
2729             int cnt  = bytestream2_get_be16(&gb);
2730
2731             for (j = 0; j < cnt; j++) {
2732                 // +2 for the nal size field
2733                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2734                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2735                     av_log(s->avctx, AV_LOG_ERROR,
2736                            "Invalid NAL unit size in extradata.\n");
2737                     return AVERROR_INVALIDDATA;
2738                 }
2739
2740                 ret = decode_nal_units(s, gb.buffer, nalsize);
2741                 if (ret < 0) {
2742                     av_log(avctx, AV_LOG_ERROR,
2743                            "Decoding nal unit %d %d from hvcC failed\n",
2744                            type, i);
2745                     return ret;
2746                 }
2747                 bytestream2_skip(&gb, nalsize);
2748             }
2749         }
2750
2751         /* Now store right nal length size, that will be used to parse
2752          * all other nals */
2753         s->nal_length_size = nal_len_size;
2754     } else {
2755         s->is_nalff = 0;
2756         ret = decode_nal_units(s, buf, length);
2757         if (ret < 0)
2758             return ret;
2759     }
2760
2761     /* export stream parameters from the first SPS */
2762     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2763         if (s->ps.sps_list[i]) {
2764             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2765             export_stream_params(s->avctx, &s->ps, sps);
2766             break;
2767         }
2768     }
2769
2770     return 0;
2771 }
2772
2773 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2774                              AVPacket *avpkt)
2775 {
2776     int ret;
2777     int new_extradata_size;
2778     uint8_t *new_extradata;
2779     HEVCContext *s = avctx->priv_data;
2780
2781     if (!avpkt->size) {
2782         ret = ff_hevc_output_frame(s, data, 1);
2783         if (ret < 0)
2784             return ret;
2785
2786         *got_output = ret;
2787         return 0;
2788     }
2789
2790     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
2791                                             &new_extradata_size);
2792     if (new_extradata && new_extradata_size > 0) {
2793         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size);
2794         if (ret < 0)
2795             return ret;
2796     }
2797
2798     s->ref = NULL;
2799     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2800     if (ret < 0)
2801         return ret;
2802
2803     if (avctx->hwaccel) {
2804         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2805             av_log(avctx, AV_LOG_ERROR,
2806                    "hardware accelerator failed to decode picture\n");
2807     } else {
2808         /* verify the SEI checksum */
2809         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2810             s->is_md5) {
2811             ret = verify_md5(s, s->ref->frame);
2812             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2813                 ff_hevc_unref_frame(s, s->ref, ~0);
2814                 return ret;
2815             }
2816         }
2817     }
2818     s->is_md5 = 0;
2819
2820     if (s->is_decoded) {
2821         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2822         s->is_decoded = 0;
2823     }
2824
2825     if (s->output_frame->buf[0]) {
2826         av_frame_move_ref(data, s->output_frame);
2827         *got_output = 1;
2828     }
2829
2830     return avpkt->size;
2831 }
2832
2833 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2834 {
2835     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2836     if (ret < 0)
2837         return ret;
2838
2839     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2840     if (!dst->tab_mvf_buf)
2841         goto fail;
2842     dst->tab_mvf = src->tab_mvf;
2843
2844     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2845     if (!dst->rpl_tab_buf)
2846         goto fail;
2847     dst->rpl_tab = src->rpl_tab;
2848
2849     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2850     if (!dst->rpl_buf)
2851         goto fail;
2852
2853     dst->poc        = src->poc;
2854     dst->ctb_count  = src->ctb_count;
2855     dst->window     = src->window;
2856     dst->flags      = src->flags;
2857     dst->sequence   = src->sequence;
2858
2859     if (src->hwaccel_picture_private) {
2860         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2861         if (!dst->hwaccel_priv_buf)
2862             goto fail;
2863         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2864     }
2865
2866     return 0;
2867 fail:
2868     ff_hevc_unref_frame(s, dst, ~0);
2869     return AVERROR(ENOMEM);
2870 }
2871
2872 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2873 {
2874     HEVCContext       *s = avctx->priv_data;
2875     int i;
2876
2877     pic_arrays_free(s);
2878
2879     av_freep(&s->md5_ctx);
2880
2881     av_frame_free(&s->tmp_frame);
2882     av_frame_free(&s->output_frame);
2883
2884     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2885         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2886         av_frame_free(&s->DPB[i].frame);
2887     }
2888
2889     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2890         av_buffer_unref(&s->ps.vps_list[i]);
2891     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2892         av_buffer_unref(&s->ps.sps_list[i]);
2893     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2894         av_buffer_unref(&s->ps.pps_list[i]);
2895
2896     ff_h2645_packet_uninit(&s->pkt);
2897
2898     return 0;
2899 }
2900
2901 static av_cold int hevc_init_context(AVCodecContext *avctx)
2902 {
2903     HEVCContext *s = avctx->priv_data;
2904     int i;
2905
2906     s->avctx = avctx;
2907
2908     s->tmp_frame = av_frame_alloc();
2909     if (!s->tmp_frame)
2910         goto fail;
2911
2912     s->output_frame = av_frame_alloc();
2913     if (!s->output_frame)
2914         goto fail;
2915
2916     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2917         s->DPB[i].frame = av_frame_alloc();
2918         if (!s->DPB[i].frame)
2919             goto fail;
2920         s->DPB[i].tf.f = s->DPB[i].frame;
2921     }
2922
2923     s->max_ra = INT_MAX;
2924
2925     s->md5_ctx = av_md5_alloc();
2926     if (!s->md5_ctx)
2927         goto fail;
2928
2929     ff_bswapdsp_init(&s->bdsp);
2930
2931     s->context_initialized = 1;
2932
2933     return 0;
2934
2935 fail:
2936     hevc_decode_free(avctx);
2937     return AVERROR(ENOMEM);
2938 }
2939
2940 static int hevc_update_thread_context(AVCodecContext *dst,
2941                                       const AVCodecContext *src)
2942 {
2943     HEVCContext *s  = dst->priv_data;
2944     HEVCContext *s0 = src->priv_data;
2945     int i, ret;
2946
2947     if (!s->context_initialized) {
2948         ret = hevc_init_context(dst);
2949         if (ret < 0)
2950             return ret;
2951     }
2952
2953     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2954         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2955         if (s0->DPB[i].frame->buf[0]) {
2956             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2957             if (ret < 0)
2958                 return ret;
2959         }
2960     }
2961
2962     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2963         av_buffer_unref(&s->ps.vps_list[i]);
2964         if (s0->ps.vps_list[i]) {
2965             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2966             if (!s->ps.vps_list[i])
2967                 return AVERROR(ENOMEM);
2968         }
2969     }
2970
2971     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2972         av_buffer_unref(&s->ps.sps_list[i]);
2973         if (s0->ps.sps_list[i]) {
2974             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2975             if (!s->ps.sps_list[i])
2976                 return AVERROR(ENOMEM);
2977         }
2978     }
2979
2980     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2981         av_buffer_unref(&s->ps.pps_list[i]);
2982         if (s0->ps.pps_list[i]) {
2983             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2984             if (!s->ps.pps_list[i])
2985                 return AVERROR(ENOMEM);
2986         }
2987     }
2988
2989     if (s->ps.sps != s0->ps.sps)
2990         ret = set_sps(s, s0->ps.sps);
2991
2992     s->seq_decode = s0->seq_decode;
2993     s->seq_output = s0->seq_output;
2994     s->pocTid0    = s0->pocTid0;
2995     s->max_ra     = s0->max_ra;
2996
2997     s->is_nalff        = s0->is_nalff;
2998     s->nal_length_size = s0->nal_length_size;
2999
3000     if (s0->eos) {
3001         s->seq_decode = (s->seq_decode + 1) & 0xff;
3002         s->max_ra = INT_MAX;
3003     }
3004
3005     return 0;
3006 }
3007
3008 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3009 {
3010     HEVCContext *s = avctx->priv_data;
3011     int ret;
3012
3013     avctx->internal->allocate_progress = 1;
3014
3015     ret = hevc_init_context(avctx);
3016     if (ret < 0)
3017         return ret;
3018
3019     if (avctx->extradata_size > 0 && avctx->extradata) {
3020         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size);
3021         if (ret < 0) {
3022             hevc_decode_free(avctx);
3023             return ret;
3024         }
3025     }
3026
3027     return 0;
3028 }
3029
3030 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3031 {
3032     HEVCContext *s = avctx->priv_data;
3033     int ret;
3034
3035     memset(s, 0, sizeof(*s));
3036
3037     ret = hevc_init_context(avctx);
3038     if (ret < 0)
3039         return ret;
3040
3041     return 0;
3042 }
3043
3044 static void hevc_decode_flush(AVCodecContext *avctx)
3045 {
3046     HEVCContext *s = avctx->priv_data;
3047     ff_hevc_flush_dpb(s);
3048     s->max_ra = INT_MAX;
3049 }
3050
3051 #define OFFSET(x) offsetof(HEVCContext, x)
3052 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3053
3054 static const AVOption options[] = {
3055     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3056         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3057     { NULL },
3058 };
3059
3060 static const AVClass hevc_decoder_class = {
3061     .class_name = "HEVC decoder",
3062     .item_name  = av_default_item_name,
3063     .option     = options,
3064     .version    = LIBAVUTIL_VERSION_INT,
3065 };
3066
3067 AVCodec ff_hevc_decoder = {
3068     .name                  = "hevc",
3069     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3070     .type                  = AVMEDIA_TYPE_VIDEO,
3071     .id                    = AV_CODEC_ID_HEVC,
3072     .priv_data_size        = sizeof(HEVCContext),
3073     .priv_class            = &hevc_decoder_class,
3074     .init                  = hevc_decode_init,
3075     .close                 = hevc_decode_free,
3076     .decode                = hevc_decode_frame,
3077     .flush                 = hevc_decode_flush,
3078     .update_thread_context = hevc_update_thread_context,
3079     .init_thread_copy      = hevc_init_thread_copy,
3080     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3081                              AV_CODEC_CAP_FRAME_THREADS,
3082     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3083 };