]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
libopusdec: fix out-of-bounds read
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/md5.h"
31 #include "libavutil/opt.h"
32 #include "libavutil/pixdesc.h"
33 #include "libavutil/stereo3d.h"
34
35 #include "bswapdsp.h"
36 #include "bytestream.h"
37 #include "cabac_functions.h"
38 #include "golomb.h"
39 #include "hevc.h"
40 #include "hevc_data.h"
41 #include "hevcdec.h"
42 #include "profiles.h"
43
44 const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
45 const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
46 const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
47
48 static const uint8_t scan_1x1[1] = { 0 };
49
50 static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
51
52 static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
53
54 static const uint8_t horiz_scan4x4_x[16] = {
55     0, 1, 2, 3,
56     0, 1, 2, 3,
57     0, 1, 2, 3,
58     0, 1, 2, 3,
59 };
60
61 static const uint8_t horiz_scan4x4_y[16] = {
62     0, 0, 0, 0,
63     1, 1, 1, 1,
64     2, 2, 2, 2,
65     3, 3, 3, 3,
66 };
67
68 static const uint8_t horiz_scan8x8_inv[8][8] = {
69     {  0,  1,  2,  3, 16, 17, 18, 19, },
70     {  4,  5,  6,  7, 20, 21, 22, 23, },
71     {  8,  9, 10, 11, 24, 25, 26, 27, },
72     { 12, 13, 14, 15, 28, 29, 30, 31, },
73     { 32, 33, 34, 35, 48, 49, 50, 51, },
74     { 36, 37, 38, 39, 52, 53, 54, 55, },
75     { 40, 41, 42, 43, 56, 57, 58, 59, },
76     { 44, 45, 46, 47, 60, 61, 62, 63, },
77 };
78
79 static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
80
81 static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
82
83 static const uint8_t diag_scan2x2_inv[2][2] = {
84     { 0, 2, },
85     { 1, 3, },
86 };
87
88 static const uint8_t diag_scan4x4_inv[4][4] = {
89     { 0,  2,  5,  9, },
90     { 1,  4,  8, 12, },
91     { 3,  7, 11, 14, },
92     { 6, 10, 13, 15, },
93 };
94
95 static const uint8_t diag_scan8x8_inv[8][8] = {
96     {  0,  2,  5,  9, 14, 20, 27, 35, },
97     {  1,  4,  8, 13, 19, 26, 34, 42, },
98     {  3,  7, 12, 18, 25, 33, 41, 48, },
99     {  6, 11, 17, 24, 32, 40, 47, 53, },
100     { 10, 16, 23, 31, 39, 46, 52, 57, },
101     { 15, 22, 30, 38, 45, 51, 56, 60, },
102     { 21, 29, 37, 44, 50, 55, 59, 62, },
103     { 28, 36, 43, 49, 54, 58, 61, 63, },
104 };
105
106 /**
107  * NOTE: Each function hls_foo correspond to the function foo in the
108  * specification (HLS stands for High Level Syntax).
109  */
110
111 /**
112  * Section 5.7
113  */
114
115 /* free everything allocated  by pic_arrays_init() */
116 static void pic_arrays_free(HEVCContext *s)
117 {
118     av_freep(&s->sao);
119     av_freep(&s->deblock);
120
121     av_freep(&s->skip_flag);
122     av_freep(&s->tab_ct_depth);
123
124     av_freep(&s->tab_ipm);
125     av_freep(&s->cbf_luma);
126     av_freep(&s->is_pcm);
127
128     av_freep(&s->qp_y_tab);
129     av_freep(&s->tab_slice_address);
130     av_freep(&s->filter_slice_edges);
131
132     av_freep(&s->horizontal_bs);
133     av_freep(&s->vertical_bs);
134
135     av_buffer_pool_uninit(&s->tab_mvf_pool);
136     av_buffer_pool_uninit(&s->rpl_tab_pool);
137 }
138
139 /* allocate arrays that depend on frame dimensions */
140 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
141 {
142     int log2_min_cb_size = sps->log2_min_cb_size;
143     int width            = sps->width;
144     int height           = sps->height;
145     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
146                            ((height >> log2_min_cb_size) + 1);
147     int ctb_count        = sps->ctb_width * sps->ctb_height;
148     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
149
150     s->bs_width  = width  >> 3;
151     s->bs_height = height >> 3;
152
153     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
154     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
155     if (!s->sao || !s->deblock)
156         goto fail;
157
158     s->skip_flag    = av_malloc(pic_size_in_ctb);
159     s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
160     if (!s->skip_flag || !s->tab_ct_depth)
161         goto fail;
162
163     s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
164     s->tab_ipm  = av_mallocz(min_pu_size);
165     s->is_pcm   = av_malloc(min_pu_size);
166     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
167         goto fail;
168
169     s->filter_slice_edges = av_malloc(ctb_count);
170     s->tab_slice_address  = av_malloc(pic_size_in_ctb *
171                                       sizeof(*s->tab_slice_address));
172     s->qp_y_tab           = av_malloc(pic_size_in_ctb *
173                                       sizeof(*s->qp_y_tab));
174     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
175         goto fail;
176
177     s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
178     s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
179     if (!s->horizontal_bs || !s->vertical_bs)
180         goto fail;
181
182     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
183                                           av_buffer_alloc);
184     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
185                                           av_buffer_allocz);
186     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
187         goto fail;
188
189     return 0;
190
191 fail:
192     pic_arrays_free(s);
193     return AVERROR(ENOMEM);
194 }
195
196 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
197 {
198     int i = 0;
199     int j = 0;
200     uint8_t luma_weight_l0_flag[16];
201     uint8_t chroma_weight_l0_flag[16];
202     uint8_t luma_weight_l1_flag[16];
203     uint8_t chroma_weight_l1_flag[16];
204
205     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
206     if (s->ps.sps->chroma_format_idc != 0) {
207         int delta = get_se_golomb(gb);
208         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
209     }
210
211     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
212         luma_weight_l0_flag[i] = get_bits1(gb);
213         if (!luma_weight_l0_flag[i]) {
214             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
215             s->sh.luma_offset_l0[i] = 0;
216         }
217     }
218     if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
219         for (i = 0; i < s->sh.nb_refs[L0]; i++)
220             chroma_weight_l0_flag[i] = get_bits1(gb);
221     } else {
222         for (i = 0; i < s->sh.nb_refs[L0]; i++)
223             chroma_weight_l0_flag[i] = 0;
224     }
225     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
226         if (luma_weight_l0_flag[i]) {
227             int delta_luma_weight_l0 = get_se_golomb(gb);
228             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
229             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
230         }
231         if (chroma_weight_l0_flag[i]) {
232             for (j = 0; j < 2; j++) {
233                 int delta_chroma_weight_l0 = get_se_golomb(gb);
234                 int delta_chroma_offset_l0 = get_se_golomb(gb);
235                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
236                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
237                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
238             }
239         } else {
240             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
241             s->sh.chroma_offset_l0[i][0] = 0;
242             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
243             s->sh.chroma_offset_l0[i][1] = 0;
244         }
245     }
246     if (s->sh.slice_type == HEVC_SLICE_B) {
247         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
248             luma_weight_l1_flag[i] = get_bits1(gb);
249             if (!luma_weight_l1_flag[i]) {
250                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
251                 s->sh.luma_offset_l1[i] = 0;
252             }
253         }
254         if (s->ps.sps->chroma_format_idc != 0) {
255             for (i = 0; i < s->sh.nb_refs[L1]; i++)
256                 chroma_weight_l1_flag[i] = get_bits1(gb);
257         } else {
258             for (i = 0; i < s->sh.nb_refs[L1]; i++)
259                 chroma_weight_l1_flag[i] = 0;
260         }
261         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
262             if (luma_weight_l1_flag[i]) {
263                 int delta_luma_weight_l1 = get_se_golomb(gb);
264                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
265                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
266             }
267             if (chroma_weight_l1_flag[i]) {
268                 for (j = 0; j < 2; j++) {
269                     int delta_chroma_weight_l1 = get_se_golomb(gb);
270                     int delta_chroma_offset_l1 = get_se_golomb(gb);
271                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
272                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
273                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
274                 }
275             } else {
276                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
277                 s->sh.chroma_offset_l1[i][0] = 0;
278                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
279                 s->sh.chroma_offset_l1[i][1] = 0;
280             }
281         }
282     }
283 }
284
285 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
286 {
287     const HEVCSPS *sps = s->ps.sps;
288     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
289     int prev_delta_msb = 0;
290     unsigned int nb_sps = 0, nb_sh;
291     int i;
292
293     rps->nb_refs = 0;
294     if (!sps->long_term_ref_pics_present_flag)
295         return 0;
296
297     if (sps->num_long_term_ref_pics_sps > 0)
298         nb_sps = get_ue_golomb_long(gb);
299     nb_sh = get_ue_golomb_long(gb);
300
301     if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
302         return AVERROR_INVALIDDATA;
303
304     rps->nb_refs = nb_sh + nb_sps;
305
306     for (i = 0; i < rps->nb_refs; i++) {
307         uint8_t delta_poc_msb_present;
308
309         if (i < nb_sps) {
310             uint8_t lt_idx_sps = 0;
311
312             if (sps->num_long_term_ref_pics_sps > 1)
313                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
314
315             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
316             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
317         } else {
318             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
319             rps->used[i] = get_bits1(gb);
320         }
321
322         delta_poc_msb_present = get_bits1(gb);
323         if (delta_poc_msb_present) {
324             int delta = get_ue_golomb_long(gb);
325
326             if (i && i != nb_sps)
327                 delta += prev_delta_msb;
328
329             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
330             prev_delta_msb = delta;
331         }
332     }
333
334     return 0;
335 }
336
337 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
338                                  const HEVCSPS *sps)
339 {
340     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
341     unsigned int num = 0, den = 0;
342
343     avctx->pix_fmt             = sps->pix_fmt;
344     avctx->coded_width         = sps->width;
345     avctx->coded_height        = sps->height;
346     avctx->width               = sps->output_width;
347     avctx->height              = sps->output_height;
348     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
349     avctx->profile             = sps->ptl.general_ptl.profile_idc;
350     avctx->level               = sps->ptl.general_ptl.level_idc;
351
352     ff_set_sar(avctx, sps->vui.sar);
353
354     if (sps->vui.video_signal_type_present_flag)
355         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
356                                                             : AVCOL_RANGE_MPEG;
357     else
358         avctx->color_range = AVCOL_RANGE_MPEG;
359
360     if (sps->vui.colour_description_present_flag) {
361         avctx->color_primaries = sps->vui.colour_primaries;
362         avctx->color_trc       = sps->vui.transfer_characteristic;
363         avctx->colorspace      = sps->vui.matrix_coeffs;
364     } else {
365         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
366         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
367         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
368     }
369
370     if (vps->vps_timing_info_present_flag) {
371         num = vps->vps_num_units_in_tick;
372         den = vps->vps_time_scale;
373     } else if (sps->vui.vui_timing_info_present_flag) {
374         num = sps->vui.vui_num_units_in_tick;
375         den = sps->vui.vui_time_scale;
376     }
377
378     if (num != 0 && den != 0)
379         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
380                   num, den, 1 << 30);
381 }
382
383 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
384 {
385     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
386     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
387     int ret;
388
389     pic_arrays_free(s);
390     s->ps.sps = NULL;
391     s->ps.vps = NULL;
392
393     if (!sps)
394         return 0;
395
396     ret = pic_arrays_init(s, sps);
397     if (ret < 0)
398         goto fail;
399
400     export_stream_params(s->avctx, &s->ps, sps);
401
402     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P ||
403         sps->pix_fmt == AV_PIX_FMT_YUV420P10) {
404 #if CONFIG_HEVC_DXVA2_HWACCEL
405         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
406 #endif
407     }
408     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
409 #if CONFIG_HEVC_D3D11VA_HWACCEL
410         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
411 #endif
412 #if CONFIG_HEVC_VDPAU_HWACCEL
413         *fmt++ = AV_PIX_FMT_VDPAU;
414 #endif
415     }
416
417     *fmt++ = sps->pix_fmt;
418     *fmt = AV_PIX_FMT_NONE;
419
420     ret = ff_get_format(s->avctx, pix_fmts);
421     if (ret < 0)
422         goto fail;
423     s->avctx->pix_fmt = ret;
424
425     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
426     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
427     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
428
429     if (sps->sao_enabled && !s->avctx->hwaccel) {
430         av_frame_unref(s->tmp_frame);
431         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
432         if (ret < 0)
433             goto fail;
434         s->frame = s->tmp_frame;
435     }
436
437     s->ps.sps = sps;
438     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
439
440     return 0;
441
442 fail:
443     pic_arrays_free(s);
444     s->ps.sps = NULL;
445     return ret;
446 }
447
448 static int hls_slice_header(HEVCContext *s)
449 {
450     GetBitContext *gb = &s->HEVClc.gb;
451     SliceHeader *sh   = &s->sh;
452     int i, ret;
453
454     // Coded parameters
455     sh->first_slice_in_pic_flag = get_bits1(gb);
456     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
457         s->seq_decode = (s->seq_decode + 1) & 0xff;
458         s->max_ra     = INT_MAX;
459         if (IS_IDR(s))
460             ff_hevc_clear_refs(s);
461     }
462     if (IS_IRAP(s))
463         sh->no_output_of_prior_pics_flag = get_bits1(gb);
464
465     sh->pps_id = get_ue_golomb_long(gb);
466     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
467         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
468         return AVERROR_INVALIDDATA;
469     }
470     if (!sh->first_slice_in_pic_flag &&
471         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
472         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
473         return AVERROR_INVALIDDATA;
474     }
475     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
476
477     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
478         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
479
480         ff_hevc_clear_refs(s);
481         ret = set_sps(s, s->ps.sps);
482         if (ret < 0)
483             return ret;
484
485         s->seq_decode = (s->seq_decode + 1) & 0xff;
486         s->max_ra     = INT_MAX;
487     }
488
489     sh->dependent_slice_segment_flag = 0;
490     if (!sh->first_slice_in_pic_flag) {
491         int slice_address_length;
492
493         if (s->ps.pps->dependent_slice_segments_enabled_flag)
494             sh->dependent_slice_segment_flag = get_bits1(gb);
495
496         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
497                                             s->ps.sps->ctb_height);
498         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
499         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
500             av_log(s->avctx, AV_LOG_ERROR,
501                    "Invalid slice segment address: %u.\n",
502                    sh->slice_segment_addr);
503             return AVERROR_INVALIDDATA;
504         }
505
506         if (!sh->dependent_slice_segment_flag) {
507             sh->slice_addr = sh->slice_segment_addr;
508             s->slice_idx++;
509         }
510     } else {
511         sh->slice_segment_addr = sh->slice_addr = 0;
512         s->slice_idx           = 0;
513         s->slice_initialized   = 0;
514     }
515
516     if (!sh->dependent_slice_segment_flag) {
517         s->slice_initialized = 0;
518
519         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
520             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
521
522         sh->slice_type = get_ue_golomb_long(gb);
523         if (!(sh->slice_type == HEVC_SLICE_I ||
524               sh->slice_type == HEVC_SLICE_P ||
525               sh->slice_type == HEVC_SLICE_B)) {
526             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
527                    sh->slice_type);
528             return AVERROR_INVALIDDATA;
529         }
530         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
531             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
532             return AVERROR_INVALIDDATA;
533         }
534
535         // when flag is not present, picture is inferred to be output
536         sh->pic_output_flag = 1;
537         if (s->ps.pps->output_flag_present_flag)
538             sh->pic_output_flag = get_bits1(gb);
539
540         if (s->ps.sps->separate_colour_plane_flag)
541             sh->colour_plane_id = get_bits(gb, 2);
542
543         if (!IS_IDR(s)) {
544             int poc, pos;
545
546             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
547             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
548             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
549                 av_log(s->avctx, AV_LOG_WARNING,
550                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
551                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
552                     return AVERROR_INVALIDDATA;
553                 poc = s->poc;
554             }
555             s->poc = poc;
556
557             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
558             pos = get_bits_left(gb);
559             if (!sh->short_term_ref_pic_set_sps_flag) {
560                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
561                 if (ret < 0)
562                     return ret;
563
564                 sh->short_term_rps = &sh->slice_rps;
565             } else {
566                 int numbits, rps_idx;
567
568                 if (!s->ps.sps->nb_st_rps) {
569                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
570                     return AVERROR_INVALIDDATA;
571                 }
572
573                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
574                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
575                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
576             }
577             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
578
579             pos = get_bits_left(gb);
580             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
581             if (ret < 0) {
582                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
583                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
584                     return AVERROR_INVALIDDATA;
585             }
586             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
587
588             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
589                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
590             else
591                 sh->slice_temporal_mvp_enabled_flag = 0;
592         } else {
593             s->sh.short_term_rps = NULL;
594             s->poc               = 0;
595         }
596
597         /* 8.3.1 */
598         if (s->temporal_id == 0 &&
599             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
600             s->nal_unit_type != HEVC_NAL_TSA_N   &&
601             s->nal_unit_type != HEVC_NAL_STSA_N  &&
602             s->nal_unit_type != HEVC_NAL_RADL_N  &&
603             s->nal_unit_type != HEVC_NAL_RADL_R  &&
604             s->nal_unit_type != HEVC_NAL_RASL_N  &&
605             s->nal_unit_type != HEVC_NAL_RASL_R)
606             s->pocTid0 = s->poc;
607
608         if (s->ps.sps->sao_enabled) {
609             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
610             sh->slice_sample_adaptive_offset_flag[1] =
611             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
612         } else {
613             sh->slice_sample_adaptive_offset_flag[0] = 0;
614             sh->slice_sample_adaptive_offset_flag[1] = 0;
615             sh->slice_sample_adaptive_offset_flag[2] = 0;
616         }
617
618         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
619         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
620             int nb_refs;
621
622             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
623             if (sh->slice_type == HEVC_SLICE_B)
624                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
625
626             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
627                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
628                 if (sh->slice_type == HEVC_SLICE_B)
629                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
630             }
631             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
632                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
633                        sh->nb_refs[L0], sh->nb_refs[L1]);
634                 return AVERROR_INVALIDDATA;
635             }
636
637             sh->rpl_modification_flag[0] = 0;
638             sh->rpl_modification_flag[1] = 0;
639             nb_refs = ff_hevc_frame_nb_refs(s);
640             if (!nb_refs) {
641                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
642                 return AVERROR_INVALIDDATA;
643             }
644
645             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
646                 sh->rpl_modification_flag[0] = get_bits1(gb);
647                 if (sh->rpl_modification_flag[0]) {
648                     for (i = 0; i < sh->nb_refs[L0]; i++)
649                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
650                 }
651
652                 if (sh->slice_type == HEVC_SLICE_B) {
653                     sh->rpl_modification_flag[1] = get_bits1(gb);
654                     if (sh->rpl_modification_flag[1] == 1)
655                         for (i = 0; i < sh->nb_refs[L1]; i++)
656                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
657                 }
658             }
659
660             if (sh->slice_type == HEVC_SLICE_B)
661                 sh->mvd_l1_zero_flag = get_bits1(gb);
662
663             if (s->ps.pps->cabac_init_present_flag)
664                 sh->cabac_init_flag = get_bits1(gb);
665             else
666                 sh->cabac_init_flag = 0;
667
668             sh->collocated_ref_idx = 0;
669             if (sh->slice_temporal_mvp_enabled_flag) {
670                 sh->collocated_list = L0;
671                 if (sh->slice_type == HEVC_SLICE_B)
672                     sh->collocated_list = !get_bits1(gb);
673
674                 if (sh->nb_refs[sh->collocated_list] > 1) {
675                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
676                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
677                         av_log(s->avctx, AV_LOG_ERROR,
678                                "Invalid collocated_ref_idx: %d.\n",
679                                sh->collocated_ref_idx);
680                         return AVERROR_INVALIDDATA;
681                     }
682                 }
683             }
684
685             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
686                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
687                 pred_weight_table(s, gb);
688             }
689
690             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
691             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
692                 av_log(s->avctx, AV_LOG_ERROR,
693                        "Invalid number of merging MVP candidates: %d.\n",
694                        sh->max_num_merge_cand);
695                 return AVERROR_INVALIDDATA;
696             }
697         }
698
699         sh->slice_qp_delta = get_se_golomb(gb);
700
701         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
702             sh->slice_cb_qp_offset = get_se_golomb(gb);
703             sh->slice_cr_qp_offset = get_se_golomb(gb);
704         } else {
705             sh->slice_cb_qp_offset = 0;
706             sh->slice_cr_qp_offset = 0;
707         }
708
709         if (s->ps.pps->deblocking_filter_control_present_flag) {
710             int deblocking_filter_override_flag = 0;
711
712             if (s->ps.pps->deblocking_filter_override_enabled_flag)
713                 deblocking_filter_override_flag = get_bits1(gb);
714
715             if (deblocking_filter_override_flag) {
716                 sh->disable_deblocking_filter_flag = get_bits1(gb);
717                 if (!sh->disable_deblocking_filter_flag) {
718                     sh->beta_offset = get_se_golomb(gb) * 2;
719                     sh->tc_offset   = get_se_golomb(gb) * 2;
720                 }
721             } else {
722                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
723                 sh->beta_offset                    = s->ps.pps->beta_offset;
724                 sh->tc_offset                      = s->ps.pps->tc_offset;
725             }
726         } else {
727             sh->disable_deblocking_filter_flag = 0;
728             sh->beta_offset                    = 0;
729             sh->tc_offset                      = 0;
730         }
731
732         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
733             (sh->slice_sample_adaptive_offset_flag[0] ||
734              sh->slice_sample_adaptive_offset_flag[1] ||
735              !sh->disable_deblocking_filter_flag)) {
736             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
737         } else {
738             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
739         }
740     } else if (!s->slice_initialized) {
741         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
742         return AVERROR_INVALIDDATA;
743     }
744
745     sh->num_entry_point_offsets = 0;
746     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
747         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
748         if (sh->num_entry_point_offsets > 0) {
749             int offset_len = get_ue_golomb_long(gb) + 1;
750
751             for (i = 0; i < sh->num_entry_point_offsets; i++)
752                 skip_bits(gb, offset_len);
753         }
754     }
755
756     if (s->ps.pps->slice_header_extension_present_flag) {
757         unsigned int length = get_ue_golomb_long(gb);
758         for (i = 0; i < length; i++)
759             skip_bits(gb, 8);  // slice_header_extension_data_byte
760     }
761
762     // Inferred parameters
763     sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
764     if (sh->slice_qp > 51 ||
765         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
766         av_log(s->avctx, AV_LOG_ERROR,
767                "The slice_qp %d is outside the valid range "
768                "[%d, 51].\n",
769                sh->slice_qp,
770                -s->ps.sps->qp_bd_offset);
771         return AVERROR_INVALIDDATA;
772     }
773
774     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
775
776     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
777         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
778         return AVERROR_INVALIDDATA;
779     }
780
781     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
782
783     if (!s->ps.pps->cu_qp_delta_enabled_flag)
784         s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
785                                 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
786
787     s->slice_initialized = 1;
788
789     return 0;
790 }
791
792 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
793
794 #define SET_SAO(elem, value)                            \
795 do {                                                    \
796     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
797         sao->elem = value;                              \
798     else if (sao_merge_left_flag)                       \
799         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
800     else if (sao_merge_up_flag)                         \
801         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
802     else                                                \
803         sao->elem = 0;                                  \
804 } while (0)
805
806 static void hls_sao_param(HEVCContext *s, int rx, int ry)
807 {
808     HEVCLocalContext *lc    = &s->HEVClc;
809     int sao_merge_left_flag = 0;
810     int sao_merge_up_flag   = 0;
811     int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
812     SAOParams *sao          = &CTB(s->sao, rx, ry);
813     int c_idx, i;
814
815     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
816         s->sh.slice_sample_adaptive_offset_flag[1]) {
817         if (rx > 0) {
818             if (lc->ctb_left_flag)
819                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
820         }
821         if (ry > 0 && !sao_merge_left_flag) {
822             if (lc->ctb_up_flag)
823                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
824         }
825     }
826
827     for (c_idx = 0; c_idx < 3; c_idx++) {
828         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
829             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
830             continue;
831         }
832
833         if (c_idx == 2) {
834             sao->type_idx[2] = sao->type_idx[1];
835             sao->eo_class[2] = sao->eo_class[1];
836         } else {
837             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
838         }
839
840         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
841             continue;
842
843         for (i = 0; i < 4; i++)
844             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
845
846         if (sao->type_idx[c_idx] == SAO_BAND) {
847             for (i = 0; i < 4; i++) {
848                 if (sao->offset_abs[c_idx][i]) {
849                     SET_SAO(offset_sign[c_idx][i],
850                             ff_hevc_sao_offset_sign_decode(s));
851                 } else {
852                     sao->offset_sign[c_idx][i] = 0;
853                 }
854             }
855             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
856         } else if (c_idx != 2) {
857             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
858         }
859
860         // Inferred parameters
861         sao->offset_val[c_idx][0] = 0;
862         for (i = 0; i < 4; i++) {
863             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
864             if (sao->type_idx[c_idx] == SAO_EDGE) {
865                 if (i > 1)
866                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
867             } else if (sao->offset_sign[c_idx][i]) {
868                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
869             }
870         }
871     }
872 }
873
874 #undef SET_SAO
875 #undef CTB
876
877 static void hls_residual_coding(HEVCContext *s, int x0, int y0,
878                                 int log2_trafo_size, enum ScanType scan_idx,
879                                 int c_idx)
880 {
881 #define GET_COORD(offset, n)                                    \
882     do {                                                        \
883         x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
884         y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
885     } while (0)
886     HEVCLocalContext *lc    = &s->HEVClc;
887     int transform_skip_flag = 0;
888
889     int last_significant_coeff_x, last_significant_coeff_y;
890     int last_scan_pos;
891     int n_end;
892     int num_coeff    = 0;
893     int greater1_ctx = 1;
894
895     int num_last_subset;
896     int x_cg_last_sig, y_cg_last_sig;
897
898     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
899
900     ptrdiff_t stride = s->frame->linesize[c_idx];
901     int hshift       = s->ps.sps->hshift[c_idx];
902     int vshift       = s->ps.sps->vshift[c_idx];
903     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
904                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
905     DECLARE_ALIGNED(32, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
906     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
907
908     int trafo_size = 1 << log2_trafo_size;
909     int i, qp, shift, add, scale, scale_m;
910     static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
911     const uint8_t *scale_matrix;
912     uint8_t dc_scale;
913
914     // Derive QP for dequant
915     if (!lc->cu.cu_transquant_bypass_flag) {
916         static const int qp_c[] = {
917             29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
918         };
919
920         static const uint8_t rem6[51 + 2 * 6 + 1] = {
921             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
922             3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
923             0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
924         };
925
926         static const uint8_t div6[51 + 2 * 6 + 1] = {
927             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
928             3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
929             7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
930         };
931         int qp_y = lc->qp_y;
932
933         if (c_idx == 0) {
934             qp = qp_y + s->ps.sps->qp_bd_offset;
935         } else {
936             int qp_i, offset;
937
938             if (c_idx == 1)
939                 offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
940             else
941                 offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
942
943             qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
944             if (qp_i < 30)
945                 qp = qp_i;
946             else if (qp_i > 43)
947                 qp = qp_i - 6;
948             else
949                 qp = qp_c[qp_i - 30];
950
951             qp += s->ps.sps->qp_bd_offset;
952         }
953
954         shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
955         add      = 1 << (shift - 1);
956         scale    = level_scale[rem6[qp]] << (div6[qp]);
957         scale_m  = 16; // default when no custom scaling lists.
958         dc_scale = 16;
959
960         if (s->ps.sps->scaling_list_enable_flag) {
961             const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
962                                     &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
963             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
964
965             if (log2_trafo_size != 5)
966                 matrix_id = 3 * matrix_id + c_idx;
967
968             scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
969             if (log2_trafo_size >= 4)
970                 dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
971         }
972     }
973
974     if (s->ps.pps->transform_skip_enabled_flag &&
975         !lc->cu.cu_transquant_bypass_flag   &&
976         log2_trafo_size == 2) {
977         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
978     }
979
980     last_significant_coeff_x =
981         ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
982     last_significant_coeff_y =
983         ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
984
985     if (last_significant_coeff_x > 3) {
986         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
987         last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
988                                    (2 + (last_significant_coeff_x & 1)) +
989                                    suffix;
990     }
991
992     if (last_significant_coeff_y > 3) {
993         int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
994         last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
995                                    (2 + (last_significant_coeff_y & 1)) +
996                                    suffix;
997     }
998
999     if (scan_idx == SCAN_VERT)
1000         FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
1001
1002     x_cg_last_sig = last_significant_coeff_x >> 2;
1003     y_cg_last_sig = last_significant_coeff_y >> 2;
1004
1005     switch (scan_idx) {
1006     case SCAN_DIAG: {
1007         int last_x_c = last_significant_coeff_x & 3;
1008         int last_y_c = last_significant_coeff_y & 3;
1009
1010         scan_x_off = ff_hevc_diag_scan4x4_x;
1011         scan_y_off = ff_hevc_diag_scan4x4_y;
1012         num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
1013         if (trafo_size == 4) {
1014             scan_x_cg = scan_1x1;
1015             scan_y_cg = scan_1x1;
1016         } else if (trafo_size == 8) {
1017             num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1018             scan_x_cg  = diag_scan2x2_x;
1019             scan_y_cg  = diag_scan2x2_y;
1020         } else if (trafo_size == 16) {
1021             num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1022             scan_x_cg  = ff_hevc_diag_scan4x4_x;
1023             scan_y_cg  = ff_hevc_diag_scan4x4_y;
1024         } else { // trafo_size == 32
1025             num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
1026             scan_x_cg  = ff_hevc_diag_scan8x8_x;
1027             scan_y_cg  = ff_hevc_diag_scan8x8_y;
1028         }
1029         break;
1030     }
1031     case SCAN_HORIZ:
1032         scan_x_cg  = horiz_scan2x2_x;
1033         scan_y_cg  = horiz_scan2x2_y;
1034         scan_x_off = horiz_scan4x4_x;
1035         scan_y_off = horiz_scan4x4_y;
1036         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
1037         break;
1038     default: //SCAN_VERT
1039         scan_x_cg  = horiz_scan2x2_y;
1040         scan_y_cg  = horiz_scan2x2_x;
1041         scan_x_off = horiz_scan4x4_y;
1042         scan_y_off = horiz_scan4x4_x;
1043         num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
1044         break;
1045     }
1046     num_coeff++;
1047     num_last_subset = (num_coeff - 1) >> 4;
1048
1049     for (i = num_last_subset; i >= 0; i--) {
1050         int n, m;
1051         int x_cg, y_cg, x_c, y_c;
1052         int implicit_non_zero_coeff = 0;
1053         int64_t trans_coeff_level;
1054         int prev_sig = 0;
1055         int offset   = i << 4;
1056
1057         uint8_t significant_coeff_flag_idx[16];
1058         uint8_t nb_significant_coeff_flag = 0;
1059
1060         x_cg = scan_x_cg[i];
1061         y_cg = scan_y_cg[i];
1062
1063         if (i < num_last_subset && i > 0) {
1064             int ctx_cg = 0;
1065             if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
1066                 ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
1067             if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
1068                 ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
1069
1070             significant_coeff_group_flag[x_cg][y_cg] =
1071                 ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
1072             implicit_non_zero_coeff = 1;
1073         } else {
1074             significant_coeff_group_flag[x_cg][y_cg] =
1075                 ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
1076                  (x_cg == 0 && y_cg == 0));
1077         }
1078
1079         last_scan_pos = num_coeff - offset - 1;
1080
1081         if (i == num_last_subset) {
1082             n_end                         = last_scan_pos - 1;
1083             significant_coeff_flag_idx[0] = last_scan_pos;
1084             nb_significant_coeff_flag     = 1;
1085         } else {
1086             n_end = 15;
1087         }
1088
1089         if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
1090             prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
1091         if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
1092             prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
1093
1094         for (n = n_end; n >= 0; n--) {
1095             GET_COORD(offset, n);
1096
1097             if (significant_coeff_group_flag[x_cg][y_cg] &&
1098                 (n > 0 || implicit_non_zero_coeff == 0)) {
1099                 if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
1100                                                           log2_trafo_size,
1101                                                           scan_idx,
1102                                                           prev_sig) == 1) {
1103                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1104                     nb_significant_coeff_flag++;
1105                     implicit_non_zero_coeff = 0;
1106                 }
1107             } else {
1108                 int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
1109                 if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
1110                     significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
1111                     nb_significant_coeff_flag++;
1112                 }
1113             }
1114         }
1115
1116         n_end = nb_significant_coeff_flag;
1117
1118         if (n_end) {
1119             int first_nz_pos_in_cg = 16;
1120             int last_nz_pos_in_cg = -1;
1121             int c_rice_param = 0;
1122             int first_greater1_coeff_idx = -1;
1123             uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
1124             uint16_t coeff_sign_flag;
1125             int sum_abs = 0;
1126             int sign_hidden = 0;
1127
1128             // initialize first elem of coeff_bas_level_greater1_flag
1129             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
1130
1131             if (!(i == num_last_subset) && greater1_ctx == 0)
1132                 ctx_set++;
1133             greater1_ctx      = 1;
1134             last_nz_pos_in_cg = significant_coeff_flag_idx[0];
1135
1136             for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
1137                 int n_idx = significant_coeff_flag_idx[m];
1138                 int inc   = (ctx_set << 2) + greater1_ctx;
1139                 coeff_abs_level_greater1_flag[n_idx] =
1140                     ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
1141                 if (coeff_abs_level_greater1_flag[n_idx]) {
1142                     greater1_ctx = 0;
1143                 } else if (greater1_ctx > 0 && greater1_ctx < 3) {
1144                     greater1_ctx++;
1145                 }
1146
1147                 if (coeff_abs_level_greater1_flag[n_idx] &&
1148                     first_greater1_coeff_idx == -1)
1149                     first_greater1_coeff_idx = n_idx;
1150             }
1151             first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
1152             sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
1153                                  !lc->cu.cu_transquant_bypass_flag;
1154
1155             if (first_greater1_coeff_idx != -1) {
1156                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
1157             }
1158             if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
1159                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
1160             } else {
1161                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
1162             }
1163
1164             for (m = 0; m < n_end; m++) {
1165                 n = significant_coeff_flag_idx[m];
1166                 GET_COORD(offset, n);
1167                 trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
1168                 if (trans_coeff_level == ((m < 8) ?
1169                                           ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
1170                     trans_coeff_level += ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
1171                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
1172                         c_rice_param = FFMIN(c_rice_param + 1, 4);
1173                 }
1174                 if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
1175                     sum_abs += trans_coeff_level;
1176                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
1177                         trans_coeff_level = -trans_coeff_level;
1178                 }
1179                 if (coeff_sign_flag >> 15)
1180                     trans_coeff_level = -trans_coeff_level;
1181                 coeff_sign_flag <<= 1;
1182                 if (!lc->cu.cu_transquant_bypass_flag) {
1183                     if (s->ps.sps->scaling_list_enable_flag) {
1184                         if (y_c || x_c || log2_trafo_size < 4) {
1185                             int pos;
1186                             switch (log2_trafo_size) {
1187                             case 3:  pos = (y_c        << 3) +  x_c;       break;
1188                             case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
1189                             case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
1190                             default: pos = (y_c        << 2) +  x_c;
1191                             }
1192                             scale_m = scale_matrix[pos];
1193                         } else {
1194                             scale_m = dc_scale;
1195                         }
1196                     }
1197                     trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
1198                     if(trans_coeff_level < 0) {
1199                         if((~trans_coeff_level) & 0xFffffffffff8000)
1200                             trans_coeff_level = -32768;
1201                     } else {
1202                         if (trans_coeff_level & 0xffffffffffff8000)
1203                             trans_coeff_level = 32767;
1204                     }
1205                 }
1206                 coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
1207             }
1208         }
1209     }
1210
1211     if (!lc->cu.cu_transquant_bypass_flag) {
1212         if (transform_skip_flag)
1213             s->hevcdsp.dequant(coeffs);
1214         else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
1215                  log2_trafo_size == 2)
1216             s->hevcdsp.transform_4x4_luma(coeffs);
1217         else {
1218             int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
1219             if (max_xy == 0)
1220                 s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs);
1221             else {
1222                 int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
1223                 if (max_xy < 4)
1224                     col_limit = FFMIN(4, col_limit);
1225                 else if (max_xy < 8)
1226                     col_limit = FFMIN(8, col_limit);
1227                 else if (max_xy < 12)
1228                     col_limit = FFMIN(24, col_limit);
1229                 s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit);
1230             }
1231         }
1232     }
1233     s->hevcdsp.add_residual[log2_trafo_size - 2](dst, coeffs, stride);
1234 }
1235
1236 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1237                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1238                               int log2_cb_size, int log2_trafo_size,
1239                               int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr)
1240 {
1241     HEVCLocalContext *lc = &s->HEVClc;
1242
1243     if (lc->cu.pred_mode == MODE_INTRA) {
1244         int trafo_size = 1 << log2_trafo_size;
1245         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1246
1247         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1248         if (log2_trafo_size > 2) {
1249             trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
1250             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1251             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
1252             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
1253         } else if (blk_idx == 3) {
1254             trafo_size = trafo_size << s->ps.sps->hshift[1];
1255             ff_hevc_set_neighbour_available(s, xBase, yBase,
1256                                             trafo_size, trafo_size);
1257             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1258             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1259         }
1260     }
1261
1262     if (cbf_luma || cbf_cb || cbf_cr) {
1263         int scan_idx   = SCAN_DIAG;
1264         int scan_idx_c = SCAN_DIAG;
1265
1266         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1267             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1268             if (lc->tu.cu_qp_delta != 0)
1269                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1270                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1271             lc->tu.is_cu_qp_delta_coded = 1;
1272
1273             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1274                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1275                 av_log(s->avctx, AV_LOG_ERROR,
1276                        "The cu_qp_delta %d is outside the valid range "
1277                        "[%d, %d].\n",
1278                        lc->tu.cu_qp_delta,
1279                        -(26 + s->ps.sps->qp_bd_offset / 2),
1280                         (25 + s->ps.sps->qp_bd_offset / 2));
1281                 return AVERROR_INVALIDDATA;
1282             }
1283
1284             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
1285         }
1286
1287         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1288             if (lc->tu.cur_intra_pred_mode >= 6 &&
1289                 lc->tu.cur_intra_pred_mode <= 14) {
1290                 scan_idx = SCAN_VERT;
1291             } else if (lc->tu.cur_intra_pred_mode >= 22 &&
1292                        lc->tu.cur_intra_pred_mode <= 30) {
1293                 scan_idx = SCAN_HORIZ;
1294             }
1295
1296             if (lc->pu.intra_pred_mode_c >=  6 &&
1297                 lc->pu.intra_pred_mode_c <= 14) {
1298                 scan_idx_c = SCAN_VERT;
1299             } else if (lc->pu.intra_pred_mode_c >= 22 &&
1300                        lc->pu.intra_pred_mode_c <= 30) {
1301                 scan_idx_c = SCAN_HORIZ;
1302             }
1303         }
1304
1305         if (cbf_luma)
1306             hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1307         if (log2_trafo_size > 2) {
1308             if (cbf_cb)
1309                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
1310             if (cbf_cr)
1311                 hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
1312         } else if (blk_idx == 3) {
1313             if (cbf_cb)
1314                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
1315             if (cbf_cr)
1316                 hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
1317         }
1318     }
1319     return 0;
1320 }
1321
1322 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1323 {
1324     int cb_size          = 1 << log2_cb_size;
1325     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1326
1327     int min_pu_width     = s->ps.sps->min_pu_width;
1328     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1329     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1330     int i, j;
1331
1332     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1333         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1334             s->is_pcm[i + j * min_pu_width] = 2;
1335 }
1336
1337 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1338                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1339                               int log2_cb_size, int log2_trafo_size,
1340                               int trafo_depth, int blk_idx,
1341                               int cbf_cb, int cbf_cr)
1342 {
1343     HEVCLocalContext *lc = &s->HEVClc;
1344     uint8_t split_transform_flag;
1345     int ret;
1346
1347     if (lc->cu.intra_split_flag) {
1348         if (trafo_depth == 1)
1349             lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1350     } else {
1351         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
1352     }
1353
1354     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1355         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1356         trafo_depth     < lc->cu.max_trafo_depth       &&
1357         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1358         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1359     } else {
1360         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1361                           lc->cu.pred_mode == MODE_INTER &&
1362                           lc->cu.part_mode != PART_2Nx2N &&
1363                           trafo_depth == 0;
1364
1365         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1366                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1367                                inter_split;
1368     }
1369
1370     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
1371         cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1372     else if (log2_trafo_size > 2 || trafo_depth == 0)
1373         cbf_cb = 0;
1374     if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
1375         cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1376     else if (log2_trafo_size > 2 || trafo_depth == 0)
1377         cbf_cr = 0;
1378
1379     if (split_transform_flag) {
1380         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1381         const int x1 = x0 + trafo_size_split;
1382         const int y1 = y0 + trafo_size_split;
1383
1384 #define SUBDIVIDE(x, y, idx)                                                    \
1385 do {                                                                            \
1386     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1387                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1388                              cbf_cb, cbf_cr);                                   \
1389     if (ret < 0)                                                                \
1390         return ret;                                                             \
1391 } while (0)
1392
1393         SUBDIVIDE(x0, y0, 0);
1394         SUBDIVIDE(x1, y0, 1);
1395         SUBDIVIDE(x0, y1, 2);
1396         SUBDIVIDE(x1, y1, 3);
1397
1398 #undef SUBDIVIDE
1399     } else {
1400         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1401         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1402         int min_tu_width     = s->ps.sps->min_tb_width;
1403         int cbf_luma         = 1;
1404
1405         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1406             cbf_cb || cbf_cr)
1407             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1408
1409         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1410                                  log2_cb_size, log2_trafo_size,
1411                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1412         if (ret < 0)
1413             return ret;
1414         // TODO: store cbf_luma somewhere else
1415         if (cbf_luma) {
1416             int i, j;
1417             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1418                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1419                     int x_tu = (x0 + j) >> log2_min_tu_size;
1420                     int y_tu = (y0 + i) >> log2_min_tu_size;
1421                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1422                 }
1423         }
1424         if (!s->sh.disable_deblocking_filter_flag) {
1425             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1426             if (s->ps.pps->transquant_bypass_enable_flag &&
1427                 lc->cu.cu_transquant_bypass_flag)
1428                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1429         }
1430     }
1431     return 0;
1432 }
1433
1434 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1435 {
1436     //TODO: non-4:2:0 support
1437     HEVCLocalContext *lc = &s->HEVClc;
1438     GetBitContext gb;
1439     int cb_size   = 1 << log2_cb_size;
1440     ptrdiff_t stride0 = s->frame->linesize[0];
1441     ptrdiff_t stride1 = s->frame->linesize[1];
1442     ptrdiff_t stride2 = s->frame->linesize[2];
1443     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1444     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1445     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1446
1447     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
1448     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1449     int ret;
1450
1451     if (!s->sh.disable_deblocking_filter_flag)
1452         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1453
1454     ret = init_get_bits(&gb, pcm, length);
1455     if (ret < 0)
1456         return ret;
1457
1458     s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1459     s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1460     s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
1461     return 0;
1462 }
1463
1464 static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
1465 {
1466     HEVCLocalContext *lc = &s->HEVClc;
1467     int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
1468     int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
1469
1470     if (x)
1471         x += ff_hevc_abs_mvd_greater1_flag_decode(s);
1472     if (y)
1473         y += ff_hevc_abs_mvd_greater1_flag_decode(s);
1474
1475     switch (x) {
1476     case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
1477     case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
1478     case 0: lc->pu.mvd.x = 0;                               break;
1479     }
1480
1481     switch (y) {
1482     case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
1483     case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
1484     case 0: lc->pu.mvd.y = 0;                               break;
1485     }
1486 }
1487
1488 /**
1489  * 8.5.3.2.2.1 Luma sample interpolation process
1490  *
1491  * @param s HEVC decoding context
1492  * @param dst target buffer for block data at block position
1493  * @param dststride stride of the dst buffer
1494  * @param ref reference picture buffer at origin (0, 0)
1495  * @param mv motion vector (relative to block position) to get pixel data from
1496  * @param x_off horizontal position of block from origin (0, 0)
1497  * @param y_off vertical position of block from origin (0, 0)
1498  * @param block_w width of block
1499  * @param block_h height of block
1500  */
1501 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
1502                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
1503                     int block_w, int block_h, int pred_idx)
1504 {
1505     HEVCLocalContext *lc = &s->HEVClc;
1506     uint8_t *src         = ref->data[0];
1507     ptrdiff_t srcstride  = ref->linesize[0];
1508     int pic_width        = s->ps.sps->width;
1509     int pic_height       = s->ps.sps->height;
1510
1511     int mx         = mv->x & 3;
1512     int my         = mv->y & 3;
1513     int extra_left = ff_hevc_qpel_extra_before[mx];
1514     int extra_top  = ff_hevc_qpel_extra_before[my];
1515
1516     x_off += mv->x >> 2;
1517     y_off += mv->y >> 2;
1518     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1519
1520     if (x_off < extra_left || y_off < extra_top ||
1521         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
1522         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
1523         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1524         int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
1525         int buf_offset = extra_top *
1526                          edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
1527
1528         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1529                                  edge_emu_stride, srcstride,
1530                                  block_w + ff_hevc_qpel_extra[mx],
1531                                  block_h + ff_hevc_qpel_extra[my],
1532                                  x_off - extra_left, y_off - extra_top,
1533                                  pic_width, pic_height);
1534         src = lc->edge_emu_buffer + buf_offset;
1535         srcstride = edge_emu_stride;
1536     }
1537     s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
1538                                                    block_h, mx, my, lc->mc_buffer);
1539 }
1540
1541 /**
1542  * 8.5.3.2.2.2 Chroma sample interpolation process
1543  *
1544  * @param s HEVC decoding context
1545  * @param dst1 target buffer for block data at block position (U plane)
1546  * @param dst2 target buffer for block data at block position (V plane)
1547  * @param dststride stride of the dst1 and dst2 buffers
1548  * @param ref reference picture buffer at origin (0, 0)
1549  * @param mv motion vector (relative to block position) to get pixel data from
1550  * @param x_off horizontal position of block from origin (0, 0)
1551  * @param y_off vertical position of block from origin (0, 0)
1552  * @param block_w width of block
1553  * @param block_h height of block
1554  */
1555 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
1556                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
1557                       int x_off, int y_off, int block_w, int block_h, int pred_idx)
1558 {
1559     HEVCLocalContext *lc = &s->HEVClc;
1560     uint8_t *src1        = ref->data[1];
1561     uint8_t *src2        = ref->data[2];
1562     ptrdiff_t src1stride = ref->linesize[1];
1563     ptrdiff_t src2stride = ref->linesize[2];
1564     int pic_width        = s->ps.sps->width >> 1;
1565     int pic_height       = s->ps.sps->height >> 1;
1566
1567     int mx = mv->x & 7;
1568     int my = mv->y & 7;
1569
1570     x_off += mv->x >> 3;
1571     y_off += mv->y >> 3;
1572     src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
1573     src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
1574
1575     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1576         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1577         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1578         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1579         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1580         int buf_offset1 = EPEL_EXTRA_BEFORE *
1581                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1582         int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1583         int buf_offset2 = EPEL_EXTRA_BEFORE *
1584                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1585
1586         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1587                                  edge_emu_stride, src1stride,
1588                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1589                                  x_off - EPEL_EXTRA_BEFORE,
1590                                  y_off - EPEL_EXTRA_BEFORE,
1591                                  pic_width, pic_height);
1592
1593         src1 = lc->edge_emu_buffer + buf_offset1;
1594         src1stride = edge_emu_stride;
1595         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1596                                                        block_h, mx, my, lc->mc_buffer);
1597
1598         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
1599                                  edge_emu_stride, src2stride,
1600                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1601                                  x_off - EPEL_EXTRA_BEFORE,
1602                                  y_off - EPEL_EXTRA_BEFORE,
1603                                  pic_width, pic_height);
1604         src2 = lc->edge_emu_buffer + buf_offset2;
1605         src2stride = edge_emu_stride;
1606
1607         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1608                                                        block_h, mx, my, lc->mc_buffer);
1609     } else {
1610         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
1611                                                        block_h, mx, my, lc->mc_buffer);
1612         s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
1613                                                        block_h, mx, my, lc->mc_buffer);
1614     }
1615 }
1616
1617 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1618                                 const Mv *mv, int y0, int height)
1619 {
1620     int y = (mv->y >> 2) + y0 + height + 9;
1621     ff_thread_await_progress(&ref->tf, y, 0);
1622 }
1623
1624 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1625                                   int nPbH, int log2_cb_size, int part_idx,
1626                                   int merge_idx, MvField *mv)
1627 {
1628     HEVCLocalContext *lc             = &s->HEVClc;
1629     enum InterPredIdc inter_pred_idc = PRED_L0;
1630     int mvp_flag;
1631
1632     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1633     if (s->sh.slice_type == HEVC_SLICE_B)
1634         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1635
1636     if (inter_pred_idc != PRED_L1) {
1637         if (s->sh.nb_refs[L0])
1638             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1639
1640         mv->pred_flag[0] = 1;
1641         hls_mvd_coding(s, x0, y0, 0);
1642         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1643         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1644                                  part_idx, merge_idx, mv, mvp_flag, 0);
1645         mv->mv[0].x += lc->pu.mvd.x;
1646         mv->mv[0].y += lc->pu.mvd.y;
1647     }
1648
1649     if (inter_pred_idc != PRED_L0) {
1650         if (s->sh.nb_refs[L1])
1651             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1652
1653         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1654             AV_ZERO32(&lc->pu.mvd);
1655         } else {
1656             hls_mvd_coding(s, x0, y0, 1);
1657         }
1658
1659         mv->pred_flag[1] = 1;
1660         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1661         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1662                                  part_idx, merge_idx, mv, mvp_flag, 1);
1663         mv->mv[1].x += lc->pu.mvd.x;
1664         mv->mv[1].y += lc->pu.mvd.y;
1665     }
1666 }
1667
1668 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1669                                 int nPbW, int nPbH,
1670                                 int log2_cb_size, int partIdx)
1671 {
1672     static const int pred_indices[] = {
1673         [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
1674     };
1675     const int pred_idx = pred_indices[nPbW];
1676
1677 #define POS(c_idx, x, y)                                                              \
1678     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1679                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1680     HEVCLocalContext *lc = &s->HEVClc;
1681     int merge_idx = 0;
1682     struct MvField current_mv = {{{ 0 }}};
1683
1684     int min_pu_width = s->ps.sps->min_pu_width;
1685     int weighted_pred = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1686                         (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1687
1688     MvField *tab_mvf = s->ref->tab_mvf;
1689     RefPicList  *refPicList = s->ref->refPicList;
1690     HEVCFrame *ref0, *ref1;
1691
1692     ptrdiff_t tmpstride = MAX_PB_SIZE * sizeof(int16_t);
1693
1694     uint8_t *dst0 = POS(0, x0, y0);
1695     uint8_t *dst1 = POS(1, x0, y0);
1696     uint8_t *dst2 = POS(2, x0, y0);
1697     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1698     int min_cb_width     = s->ps.sps->min_cb_width;
1699     int x_cb             = x0 >> log2_min_cb_size;
1700     int y_cb             = y0 >> log2_min_cb_size;
1701     int x_pu, y_pu;
1702     int i, j;
1703
1704     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1705
1706     if (!skip_flag)
1707         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1708
1709     if (skip_flag || lc->pu.merge_flag) {
1710         if (s->sh.max_num_merge_cand > 1)
1711             merge_idx = ff_hevc_merge_idx_decode(s);
1712         else
1713             merge_idx = 0;
1714
1715         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1716                                    partIdx, merge_idx, &current_mv);
1717     } else {
1718         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1719                               partIdx, merge_idx, &current_mv);
1720     }
1721
1722     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1723     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1724
1725     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1726         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1727             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1728
1729     if (current_mv.pred_flag[0]) {
1730         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1731         if (!ref0)
1732             return;
1733         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1734     }
1735     if (current_mv.pred_flag[1]) {
1736         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1737         if (!ref1)
1738             return;
1739         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1740     }
1741
1742     if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
1743         DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1744         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1745
1746         luma_mc(s, tmp, tmpstride, ref0->frame,
1747                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1748
1749         if (weighted_pred) {
1750             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1751                                                s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1752                                                s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1753                                                dst0, s->frame->linesize[0], tmp,
1754                                                tmpstride, nPbH);
1755         } else {
1756             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1757         }
1758         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1759                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1760
1761         if (weighted_pred) {
1762             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1763                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1764                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1765                                                       dst1, s->frame->linesize[1], tmp, tmpstride,
1766                                                       nPbH / 2);
1767             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1768                                                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1769                                                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1770                                                       dst2, s->frame->linesize[2], tmp2, tmpstride,
1771                                                       nPbH / 2);
1772         } else {
1773             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1774             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1775         }
1776     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1777         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1778         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1779
1780         luma_mc(s, tmp, tmpstride, ref1->frame,
1781                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1782
1783         if (weighted_pred) {
1784             s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
1785                                                s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1786                                                s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1787                                                dst0, s->frame->linesize[0], tmp, tmpstride,
1788                                                nPbH);
1789         } else {
1790             s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
1791         }
1792
1793         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
1794                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1795
1796         if (weighted_pred) {
1797             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1798                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1799                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1800                                                       dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
1801             s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1802                                                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1803                                                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1804                                                       dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
1805         } else {
1806             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
1807             s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
1808         }
1809     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
1810         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1811         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
1812         DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
1813         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
1814
1815         luma_mc(s, tmp, tmpstride, ref0->frame,
1816                 &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
1817         luma_mc(s, tmp2, tmpstride, ref1->frame,
1818                 &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
1819
1820         if (weighted_pred) {
1821             s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
1822                                                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1823                                                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1824                                                    s->sh.luma_offset_l0[current_mv.ref_idx[0]],
1825                                                    s->sh.luma_offset_l1[current_mv.ref_idx[1]],
1826                                                    dst0, s->frame->linesize[0],
1827                                                    tmp, tmp2, tmpstride, nPbH);
1828         } else {
1829             s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
1830                                                          tmp, tmp2, tmpstride, nPbH);
1831         }
1832
1833         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
1834                   &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1835         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
1836                   &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
1837
1838         if (weighted_pred) {
1839             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1840                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
1841                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
1842                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
1843                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
1844                                                           dst1, s->frame->linesize[1], tmp, tmp3,
1845                                                           tmpstride, nPbH / 2);
1846             s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
1847                                                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
1848                                                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
1849                                                           s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
1850                                                           s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
1851                                                           dst2, s->frame->linesize[2], tmp2, tmp4,
1852                                                           tmpstride, nPbH / 2);
1853         } else {
1854             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
1855             s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
1856         }
1857     }
1858 }
1859
1860 /**
1861  * 8.4.1
1862  */
1863 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1864                                 int prev_intra_luma_pred_flag)
1865 {
1866     HEVCLocalContext *lc = &s->HEVClc;
1867     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1868     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1869     int min_pu_width     = s->ps.sps->min_pu_width;
1870     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1871     int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1872     int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
1873
1874     int cand_up   = (lc->ctb_up_flag || y0b) ?
1875                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1876     int cand_left = (lc->ctb_left_flag || x0b) ?
1877                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1878
1879     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1880
1881     MvField *tab_mvf = s->ref->tab_mvf;
1882     int intra_pred_mode;
1883     int candidate[3];
1884     int i, j;
1885
1886     // intra_pred_mode prediction does not cross vertical CTB boundaries
1887     if ((y0 - 1) < y_ctb)
1888         cand_up = INTRA_DC;
1889
1890     if (cand_left == cand_up) {
1891         if (cand_left < 2) {
1892             candidate[0] = INTRA_PLANAR;
1893             candidate[1] = INTRA_DC;
1894             candidate[2] = INTRA_ANGULAR_26;
1895         } else {
1896             candidate[0] = cand_left;
1897             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1898             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1899         }
1900     } else {
1901         candidate[0] = cand_left;
1902         candidate[1] = cand_up;
1903         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1904             candidate[2] = INTRA_PLANAR;
1905         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1906             candidate[2] = INTRA_DC;
1907         } else {
1908             candidate[2] = INTRA_ANGULAR_26;
1909         }
1910     }
1911
1912     if (prev_intra_luma_pred_flag) {
1913         intra_pred_mode = candidate[lc->pu.mpm_idx];
1914     } else {
1915         if (candidate[0] > candidate[1])
1916             FFSWAP(uint8_t, candidate[0], candidate[1]);
1917         if (candidate[0] > candidate[2])
1918             FFSWAP(uint8_t, candidate[0], candidate[2]);
1919         if (candidate[1] > candidate[2])
1920             FFSWAP(uint8_t, candidate[1], candidate[2]);
1921
1922         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1923         for (i = 0; i < 3; i++)
1924             if (intra_pred_mode >= candidate[i])
1925                 intra_pred_mode++;
1926     }
1927
1928     /* write the intra prediction units into the mv array */
1929     if (!size_in_pus)
1930         size_in_pus = 1;
1931     for (i = 0; i < size_in_pus; i++) {
1932         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1933                intra_pred_mode, size_in_pus);
1934
1935         for (j = 0; j < size_in_pus; j++) {
1936             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
1937             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
1938             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
1939             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
1940             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
1941             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
1942             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
1943             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
1944             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
1945         }
1946     }
1947
1948     return intra_pred_mode;
1949 }
1950
1951 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1952                                           int log2_cb_size, int ct_depth)
1953 {
1954     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1955     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1956     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1957     int y;
1958
1959     for (y = 0; y < length; y++)
1960         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1961                ct_depth, length);
1962 }
1963
1964 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1965                                   int log2_cb_size)
1966 {
1967     HEVCLocalContext *lc = &s->HEVClc;
1968     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1969     uint8_t prev_intra_luma_pred_flag[4];
1970     int split   = lc->cu.part_mode == PART_NxN;
1971     int pb_size = (1 << log2_cb_size) >> split;
1972     int side    = split + 1;
1973     int chroma_mode;
1974     int i, j;
1975
1976     for (i = 0; i < side; i++)
1977         for (j = 0; j < side; j++)
1978             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1979
1980     for (i = 0; i < side; i++) {
1981         for (j = 0; j < side; j++) {
1982             if (prev_intra_luma_pred_flag[2 * i + j])
1983                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1984             else
1985                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1986
1987             lc->pu.intra_pred_mode[2 * i + j] =
1988                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1989                                      prev_intra_luma_pred_flag[2 * i + j]);
1990         }
1991     }
1992
1993     chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1994     if (chroma_mode != 4) {
1995         if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1996             lc->pu.intra_pred_mode_c = 34;
1997         else
1998             lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
1999     } else {
2000         lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
2001     }
2002 }
2003
2004 static void intra_prediction_unit_default_value(HEVCContext *s,
2005                                                 int x0, int y0,
2006                                                 int log2_cb_size)
2007 {
2008     HEVCLocalContext *lc = &s->HEVClc;
2009     int pb_size          = 1 << log2_cb_size;
2010     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2011     int min_pu_width     = s->ps.sps->min_pu_width;
2012     MvField *tab_mvf     = s->ref->tab_mvf;
2013     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2014     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2015     int j, k;
2016
2017     if (size_in_pus == 0)
2018         size_in_pus = 1;
2019     for (j = 0; j < size_in_pus; j++) {
2020         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2021         for (k = 0; k < size_in_pus; k++)
2022             tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
2023     }
2024 }
2025
2026 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2027 {
2028     int cb_size          = 1 << log2_cb_size;
2029     HEVCLocalContext *lc = &s->HEVClc;
2030     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2031     int length           = cb_size >> log2_min_cb_size;
2032     int min_cb_width     = s->ps.sps->min_cb_width;
2033     int x_cb             = x0 >> log2_min_cb_size;
2034     int y_cb             = y0 >> log2_min_cb_size;
2035     int x, y, ret;
2036
2037     lc->cu.x                = x0;
2038     lc->cu.y                = y0;
2039     lc->cu.pred_mode        = MODE_INTRA;
2040     lc->cu.part_mode        = PART_2Nx2N;
2041     lc->cu.intra_split_flag = 0;
2042
2043     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2044     for (x = 0; x < 4; x++)
2045         lc->pu.intra_pred_mode[x] = 1;
2046     if (s->ps.pps->transquant_bypass_enable_flag) {
2047         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2048         if (lc->cu.cu_transquant_bypass_flag)
2049             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2050     } else
2051         lc->cu.cu_transquant_bypass_flag = 0;
2052
2053     if (s->sh.slice_type != HEVC_SLICE_I) {
2054         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2055
2056         x = y_cb * min_cb_width + x_cb;
2057         for (y = 0; y < length; y++) {
2058             memset(&s->skip_flag[x], skip_flag, length);
2059             x += min_cb_width;
2060         }
2061         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2062     }
2063
2064     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2065         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2066         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2067
2068         if (!s->sh.disable_deblocking_filter_flag)
2069             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2070     } else {
2071         int pcm_flag = 0;
2072
2073         if (s->sh.slice_type != HEVC_SLICE_I)
2074             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2075         if (lc->cu.pred_mode != MODE_INTRA ||
2076             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2077             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2078             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2079                                       lc->cu.pred_mode == MODE_INTRA;
2080         }
2081
2082         if (lc->cu.pred_mode == MODE_INTRA) {
2083             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2084                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2085                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2086                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2087             }
2088             if (pcm_flag) {
2089                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2090                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2091                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2092                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2093
2094                 if (ret < 0)
2095                     return ret;
2096             } else {
2097                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2098             }
2099         } else {
2100             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2101             switch (lc->cu.part_mode) {
2102             case PART_2Nx2N:
2103                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
2104                 break;
2105             case PART_2NxN:
2106                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
2107                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
2108                 break;
2109             case PART_Nx2N:
2110                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
2111                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
2112                 break;
2113             case PART_2NxnU:
2114                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
2115                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
2116                 break;
2117             case PART_2NxnD:
2118                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
2119                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
2120                 break;
2121             case PART_nLx2N:
2122                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
2123                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
2124                 break;
2125             case PART_nRx2N:
2126                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
2127                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
2128                 break;
2129             case PART_NxN:
2130                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
2131                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
2132                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
2133                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
2134                 break;
2135             }
2136         }
2137
2138         if (!pcm_flag) {
2139             int rqt_root_cbf = 1;
2140
2141             if (lc->cu.pred_mode != MODE_INTRA &&
2142                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2143                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2144             }
2145             if (rqt_root_cbf) {
2146                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2147                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2148                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2149                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2150                                          log2_cb_size,
2151                                          log2_cb_size, 0, 0, 0, 0);
2152                 if (ret < 0)
2153                     return ret;
2154             } else {
2155                 if (!s->sh.disable_deblocking_filter_flag)
2156                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2157             }
2158         }
2159     }
2160
2161     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2162         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2163
2164     x = y_cb * min_cb_width + x_cb;
2165     for (y = 0; y < length; y++) {
2166         memset(&s->qp_y_tab[x], lc->qp_y, length);
2167         x += min_cb_width;
2168     }
2169
2170     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2171
2172     return 0;
2173 }
2174
2175 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2176                                int log2_cb_size, int cb_depth)
2177 {
2178     HEVCLocalContext *lc = &s->HEVClc;
2179     const int cb_size    = 1 << log2_cb_size;
2180     int split_cu;
2181
2182     lc->ct.depth = cb_depth;
2183     if (x0 + cb_size <= s->ps.sps->width  &&
2184         y0 + cb_size <= s->ps.sps->height &&
2185         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2186         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2187     } else {
2188         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2189     }
2190     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2191         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2192         lc->tu.is_cu_qp_delta_coded = 0;
2193         lc->tu.cu_qp_delta          = 0;
2194     }
2195
2196     if (split_cu) {
2197         const int cb_size_split = cb_size >> 1;
2198         const int x1 = x0 + cb_size_split;
2199         const int y1 = y0 + cb_size_split;
2200
2201         log2_cb_size--;
2202         cb_depth++;
2203
2204 #define SUBDIVIDE(x, y)                                                \
2205 do {                                                                   \
2206     if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
2207         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
2208         if (ret < 0)                                                   \
2209             return ret;                                                \
2210     }                                                                  \
2211 } while (0)
2212
2213         SUBDIVIDE(x0, y0);
2214         SUBDIVIDE(x1, y0);
2215         SUBDIVIDE(x0, y1);
2216         SUBDIVIDE(x1, y1);
2217     } else {
2218         int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2219         if (ret < 0)
2220             return ret;
2221     }
2222
2223     return 0;
2224 }
2225
2226 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2227                                  int ctb_addr_ts)
2228 {
2229     HEVCLocalContext *lc  = &s->HEVClc;
2230     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2231     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2232     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2233
2234     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2235
2236     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2237         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2238             lc->first_qp_group = 1;
2239         lc->end_of_tiles_x = s->ps.sps->width;
2240     } else if (s->ps.pps->tiles_enabled_flag) {
2241         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2242             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2243             lc->start_of_tiles_x = x_ctb;
2244             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2245             lc->first_qp_group   = 1;
2246         }
2247     } else {
2248         lc->end_of_tiles_x = s->ps.sps->width;
2249     }
2250
2251     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2252
2253     lc->boundary_flags = 0;
2254     if (s->ps.pps->tiles_enabled_flag) {
2255         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2256             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2257         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2258             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2259         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2260             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2261         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2262             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2263     } else {
2264         if (!ctb_addr_in_slice)
2265             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2266         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2267             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2268     }
2269
2270     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2271     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2272     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2273     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2274 }
2275
2276 static int hls_slice_data(HEVCContext *s)
2277 {
2278     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2279     int more_data   = 1;
2280     int x_ctb       = 0;
2281     int y_ctb       = 0;
2282     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2283     int ret;
2284
2285     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2286         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2287
2288         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2289         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2290         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2291
2292         ff_hevc_cabac_init(s, ctb_addr_ts);
2293
2294         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2295
2296         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2297         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2298         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2299
2300         ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2301         if (ret < 0)
2302             return ret;
2303         more_data = !ff_hevc_end_of_slice_flag_decode(s);
2304
2305         ctb_addr_ts++;
2306         ff_hevc_save_states(s, ctb_addr_ts);
2307         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2308     }
2309
2310     if (x_ctb + ctb_size >= s->ps.sps->width &&
2311         y_ctb + ctb_size >= s->ps.sps->height)
2312         ff_hevc_hls_filter(s, x_ctb, y_ctb);
2313
2314     return ctb_addr_ts;
2315 }
2316
2317 static void restore_tqb_pixels(HEVCContext *s)
2318 {
2319     int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
2320     int x, y, c_idx;
2321
2322     for (c_idx = 0; c_idx < 3; c_idx++) {
2323         ptrdiff_t stride = s->frame->linesize[c_idx];
2324         int hshift       = s->ps.sps->hshift[c_idx];
2325         int vshift       = s->ps.sps->vshift[c_idx];
2326         for (y = 0; y < s->ps.sps->min_pu_height; y++) {
2327             for (x = 0; x < s->ps.sps->min_pu_width; x++) {
2328                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
2329                     int n;
2330                     int len      = min_pu_size >> hshift;
2331                     uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2332                     uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
2333                     for (n = 0; n < (min_pu_size >> vshift); n++) {
2334                         memcpy(dst, src, len);
2335                         src += stride;
2336                         dst += stride;
2337                     }
2338                 }
2339             }
2340         }
2341     }
2342 }
2343
2344 static int set_side_data(HEVCContext *s)
2345 {
2346     AVFrame *out = s->ref->frame;
2347
2348     if (s->sei_frame_packing_present &&
2349         s->frame_packing_arrangement_type >= 3 &&
2350         s->frame_packing_arrangement_type <= 5 &&
2351         s->content_interpretation_type > 0 &&
2352         s->content_interpretation_type < 3) {
2353         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2354         if (!stereo)
2355             return AVERROR(ENOMEM);
2356
2357         switch (s->frame_packing_arrangement_type) {
2358         case 3:
2359             if (s->quincunx_subsampling)
2360                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2361             else
2362                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2363             break;
2364         case 4:
2365             stereo->type = AV_STEREO3D_TOPBOTTOM;
2366             break;
2367         case 5:
2368             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2369             break;
2370         }
2371
2372         if (s->content_interpretation_type == 2)
2373             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2374     }
2375
2376     if (s->sei_display_orientation_present &&
2377         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2378         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2379         AVFrameSideData *rotation = av_frame_new_side_data(out,
2380                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2381                                                            sizeof(int32_t) * 9);
2382         if (!rotation)
2383             return AVERROR(ENOMEM);
2384
2385         av_display_rotation_set((int32_t *)rotation->data, angle);
2386         av_display_matrix_flip((int32_t *)rotation->data,
2387                                s->sei_hflip, s->sei_vflip);
2388     }
2389
2390     return 0;
2391 }
2392
2393 static int hevc_frame_start(HEVCContext *s)
2394 {
2395     HEVCLocalContext *lc = &s->HEVClc;
2396     int ret;
2397
2398     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2399     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2400     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2401     memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
2402
2403     lc->start_of_tiles_x = 0;
2404     s->is_decoded        = 0;
2405     s->first_nal_type    = s->nal_unit_type;
2406
2407     if (s->ps.pps->tiles_enabled_flag)
2408         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2409
2410     ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
2411                               s->poc);
2412     if (ret < 0)
2413         goto fail;
2414
2415     ret = ff_hevc_frame_rps(s);
2416     if (ret < 0) {
2417         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2418         goto fail;
2419     }
2420
2421     s->ref->frame->key_frame = IS_IRAP(s);
2422
2423     ret = set_side_data(s);
2424     if (ret < 0)
2425         goto fail;
2426
2427     av_frame_unref(s->output_frame);
2428     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2429     if (ret < 0)
2430         goto fail;
2431
2432     ff_thread_finish_setup(s->avctx);
2433
2434     return 0;
2435
2436 fail:
2437     if (s->ref)
2438         ff_hevc_unref_frame(s, s->ref, ~0);
2439     s->ref = NULL;
2440     return ret;
2441 }
2442
2443 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2444 {
2445     HEVCLocalContext *lc = &s->HEVClc;
2446     GetBitContext *gb    = &lc->gb;
2447     int ctb_addr_ts, ret;
2448
2449     *gb              = nal->gb;
2450     s->nal_unit_type = nal->type;
2451     s->temporal_id   = nal->temporal_id;
2452
2453     switch (s->nal_unit_type) {
2454     case HEVC_NAL_VPS:
2455         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2456         if (ret < 0)
2457             goto fail;
2458         break;
2459     case HEVC_NAL_SPS:
2460         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2461                                      s->apply_defdispwin);
2462         if (ret < 0)
2463             goto fail;
2464         break;
2465     case HEVC_NAL_PPS:
2466         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2467         if (ret < 0)
2468             goto fail;
2469         break;
2470     case HEVC_NAL_SEI_PREFIX:
2471     case HEVC_NAL_SEI_SUFFIX:
2472         ret = ff_hevc_decode_nal_sei(s);
2473         if (ret < 0)
2474             goto fail;
2475         break;
2476     case HEVC_NAL_TRAIL_R:
2477     case HEVC_NAL_TRAIL_N:
2478     case HEVC_NAL_TSA_N:
2479     case HEVC_NAL_TSA_R:
2480     case HEVC_NAL_STSA_N:
2481     case HEVC_NAL_STSA_R:
2482     case HEVC_NAL_BLA_W_LP:
2483     case HEVC_NAL_BLA_W_RADL:
2484     case HEVC_NAL_BLA_N_LP:
2485     case HEVC_NAL_IDR_W_RADL:
2486     case HEVC_NAL_IDR_N_LP:
2487     case HEVC_NAL_CRA_NUT:
2488     case HEVC_NAL_RADL_N:
2489     case HEVC_NAL_RADL_R:
2490     case HEVC_NAL_RASL_N:
2491     case HEVC_NAL_RASL_R:
2492         ret = hls_slice_header(s);
2493         if (ret < 0)
2494             return ret;
2495
2496         if (s->max_ra == INT_MAX) {
2497             if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2498                 s->max_ra = s->poc;
2499             } else {
2500                 if (IS_IDR(s))
2501                     s->max_ra = INT_MIN;
2502             }
2503         }
2504
2505         if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2506             s->poc <= s->max_ra) {
2507             s->is_decoded = 0;
2508             break;
2509         } else {
2510             if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2511                 s->max_ra = INT_MIN;
2512         }
2513
2514         if (s->sh.first_slice_in_pic_flag) {
2515             ret = hevc_frame_start(s);
2516             if (ret < 0)
2517                 return ret;
2518         } else if (!s->ref) {
2519             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2520             goto fail;
2521         }
2522
2523         if (s->nal_unit_type != s->first_nal_type) {
2524             av_log(s->avctx, AV_LOG_ERROR,
2525                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2526                    s->first_nal_type, s->nal_unit_type);
2527             return AVERROR_INVALIDDATA;
2528         }
2529
2530         if (!s->sh.dependent_slice_segment_flag &&
2531             s->sh.slice_type != HEVC_SLICE_I) {
2532             ret = ff_hevc_slice_rpl(s);
2533             if (ret < 0) {
2534                 av_log(s->avctx, AV_LOG_WARNING,
2535                        "Error constructing the reference lists for the current slice.\n");
2536                 goto fail;
2537             }
2538         }
2539
2540         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2541             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2542             if (ret < 0)
2543                 goto fail;
2544         }
2545
2546         if (s->avctx->hwaccel) {
2547             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2548             if (ret < 0)
2549                 goto fail;
2550         } else {
2551             ctb_addr_ts = hls_slice_data(s);
2552             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2553                 s->is_decoded = 1;
2554                 if ((s->ps.pps->transquant_bypass_enable_flag ||
2555                      (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
2556                     s->ps.sps->sao_enabled)
2557                     restore_tqb_pixels(s);
2558             }
2559
2560             if (ctb_addr_ts < 0) {
2561                 ret = ctb_addr_ts;
2562                 goto fail;
2563             }
2564         }
2565         break;
2566     case HEVC_NAL_EOS_NUT:
2567     case HEVC_NAL_EOB_NUT:
2568         s->seq_decode = (s->seq_decode + 1) & 0xff;
2569         s->max_ra     = INT_MAX;
2570         break;
2571     case HEVC_NAL_AUD:
2572     case HEVC_NAL_FD_NUT:
2573         break;
2574     default:
2575         av_log(s->avctx, AV_LOG_INFO,
2576                "Skipping NAL unit %d\n", s->nal_unit_type);
2577     }
2578
2579     return 0;
2580 fail:
2581     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2582         return ret;
2583     return 0;
2584 }
2585
2586 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2587 {
2588     int i, ret = 0;
2589
2590     s->ref = NULL;
2591     s->eos = 0;
2592
2593     /* split the input packet into NAL units, so we know the upper bound on the
2594      * number of slices in the frame */
2595     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2596                                 s->nal_length_size, s->avctx->codec_id);
2597     if (ret < 0) {
2598         av_log(s->avctx, AV_LOG_ERROR,
2599                "Error splitting the input into NAL units.\n");
2600         return ret;
2601     }
2602
2603     for (i = 0; i < s->pkt.nb_nals; i++) {
2604         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
2605             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT)
2606             s->eos = 1;
2607     }
2608
2609     /* decode the NAL units */
2610     for (i = 0; i < s->pkt.nb_nals; i++) {
2611         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2612         if (ret < 0) {
2613             av_log(s->avctx, AV_LOG_WARNING,
2614                    "Error parsing NAL unit #%d.\n", i);
2615             goto fail;
2616         }
2617     }
2618
2619 fail:
2620     if (s->ref)
2621         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2622
2623     return ret;
2624 }
2625
2626 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2627 {
2628     int i;
2629     for (i = 0; i < 16; i++)
2630         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2631 }
2632
2633 static int verify_md5(HEVCContext *s, AVFrame *frame)
2634 {
2635     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2636     int pixel_shift;
2637     int i, j;
2638
2639     if (!desc)
2640         return AVERROR(EINVAL);
2641
2642     pixel_shift = desc->comp[0].depth > 8;
2643
2644     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2645            s->poc);
2646
2647     /* the checksums are LE, so we have to byteswap for >8bpp formats
2648      * on BE arches */
2649 #if HAVE_BIGENDIAN
2650     if (pixel_shift && !s->checksum_buf) {
2651         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2652                        FFMAX3(frame->linesize[0], frame->linesize[1],
2653                               frame->linesize[2]));
2654         if (!s->checksum_buf)
2655             return AVERROR(ENOMEM);
2656     }
2657 #endif
2658
2659     for (i = 0; frame->data[i]; i++) {
2660         int width  = s->avctx->coded_width;
2661         int height = s->avctx->coded_height;
2662         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2663         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2664         uint8_t md5[16];
2665
2666         av_md5_init(s->md5_ctx);
2667         for (j = 0; j < h; j++) {
2668             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2669 #if HAVE_BIGENDIAN
2670             if (pixel_shift) {
2671                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2672                                     (const uint16_t *) src, w);
2673                 src = s->checksum_buf;
2674             }
2675 #endif
2676             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2677         }
2678         av_md5_final(s->md5_ctx, md5);
2679
2680         if (!memcmp(md5, s->md5[i], 16)) {
2681             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2682             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2683             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2684         } else {
2685             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2686             print_md5(s->avctx, AV_LOG_ERROR, md5);
2687             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2688             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2689             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2690             return AVERROR_INVALIDDATA;
2691         }
2692     }
2693
2694     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2695
2696     return 0;
2697 }
2698
2699 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length)
2700 {
2701     AVCodecContext *avctx = s->avctx;
2702     GetByteContext gb;
2703     int ret, i;
2704
2705     bytestream2_init(&gb, buf, length);
2706
2707     if (length > 3 && (buf[0] || buf[1] || buf[2] > 1)) {
2708         /* It seems the extradata is encoded as hvcC format.
2709          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2710          * is finalized. When finalized, configurationVersion will be 1 and we
2711          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2712         int i, j, num_arrays, nal_len_size;
2713
2714         s->is_nalff = 1;
2715
2716         bytestream2_skip(&gb, 21);
2717         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2718         num_arrays   = bytestream2_get_byte(&gb);
2719
2720         /* nal units in the hvcC always have length coded with 2 bytes,
2721          * so put a fake nal_length_size = 2 while parsing them */
2722         s->nal_length_size = 2;
2723
2724         /* Decode nal units from hvcC. */
2725         for (i = 0; i < num_arrays; i++) {
2726             int type = bytestream2_get_byte(&gb) & 0x3f;
2727             int cnt  = bytestream2_get_be16(&gb);
2728
2729             for (j = 0; j < cnt; j++) {
2730                 // +2 for the nal size field
2731                 int nalsize = bytestream2_peek_be16(&gb) + 2;
2732                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
2733                     av_log(s->avctx, AV_LOG_ERROR,
2734                            "Invalid NAL unit size in extradata.\n");
2735                     return AVERROR_INVALIDDATA;
2736                 }
2737
2738                 ret = decode_nal_units(s, gb.buffer, nalsize);
2739                 if (ret < 0) {
2740                     av_log(avctx, AV_LOG_ERROR,
2741                            "Decoding nal unit %d %d from hvcC failed\n",
2742                            type, i);
2743                     return ret;
2744                 }
2745                 bytestream2_skip(&gb, nalsize);
2746             }
2747         }
2748
2749         /* Now store right nal length size, that will be used to parse
2750          * all other nals */
2751         s->nal_length_size = nal_len_size;
2752     } else {
2753         s->is_nalff = 0;
2754         ret = decode_nal_units(s, buf, length);
2755         if (ret < 0)
2756             return ret;
2757     }
2758
2759     /* export stream parameters from the first SPS */
2760     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2761         if (s->ps.sps_list[i]) {
2762             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
2763             export_stream_params(s->avctx, &s->ps, sps);
2764             break;
2765         }
2766     }
2767
2768     return 0;
2769 }
2770
2771 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2772                              AVPacket *avpkt)
2773 {
2774     int ret;
2775     int new_extradata_size;
2776     uint8_t *new_extradata;
2777     HEVCContext *s = avctx->priv_data;
2778
2779     if (!avpkt->size) {
2780         ret = ff_hevc_output_frame(s, data, 1);
2781         if (ret < 0)
2782             return ret;
2783
2784         *got_output = ret;
2785         return 0;
2786     }
2787
2788     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
2789                                             &new_extradata_size);
2790     if (new_extradata && new_extradata_size > 0) {
2791         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size);
2792         if (ret < 0)
2793             return ret;
2794     }
2795
2796     s->ref = NULL;
2797     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2798     if (ret < 0)
2799         return ret;
2800
2801     if (avctx->hwaccel) {
2802         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2803             av_log(avctx, AV_LOG_ERROR,
2804                    "hardware accelerator failed to decode picture\n");
2805     } else {
2806         /* verify the SEI checksum */
2807         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2808             s->is_md5) {
2809             ret = verify_md5(s, s->ref->frame);
2810             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2811                 ff_hevc_unref_frame(s, s->ref, ~0);
2812                 return ret;
2813             }
2814         }
2815     }
2816     s->is_md5 = 0;
2817
2818     if (s->is_decoded) {
2819         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2820         s->is_decoded = 0;
2821     }
2822
2823     if (s->output_frame->buf[0]) {
2824         av_frame_move_ref(data, s->output_frame);
2825         *got_output = 1;
2826     }
2827
2828     return avpkt->size;
2829 }
2830
2831 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2832 {
2833     int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2834     if (ret < 0)
2835         return ret;
2836
2837     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2838     if (!dst->tab_mvf_buf)
2839         goto fail;
2840     dst->tab_mvf = src->tab_mvf;
2841
2842     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2843     if (!dst->rpl_tab_buf)
2844         goto fail;
2845     dst->rpl_tab = src->rpl_tab;
2846
2847     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2848     if (!dst->rpl_buf)
2849         goto fail;
2850
2851     dst->poc        = src->poc;
2852     dst->ctb_count  = src->ctb_count;
2853     dst->window     = src->window;
2854     dst->flags      = src->flags;
2855     dst->sequence   = src->sequence;
2856
2857     if (src->hwaccel_picture_private) {
2858         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2859         if (!dst->hwaccel_priv_buf)
2860             goto fail;
2861         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2862     }
2863
2864     return 0;
2865 fail:
2866     ff_hevc_unref_frame(s, dst, ~0);
2867     return AVERROR(ENOMEM);
2868 }
2869
2870 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2871 {
2872     HEVCContext       *s = avctx->priv_data;
2873     int i;
2874
2875     pic_arrays_free(s);
2876
2877     av_freep(&s->md5_ctx);
2878
2879     av_frame_free(&s->tmp_frame);
2880     av_frame_free(&s->output_frame);
2881
2882     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2883         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2884         av_frame_free(&s->DPB[i].frame);
2885     }
2886
2887     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2888         av_buffer_unref(&s->ps.vps_list[i]);
2889     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2890         av_buffer_unref(&s->ps.sps_list[i]);
2891     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2892         av_buffer_unref(&s->ps.pps_list[i]);
2893
2894     ff_h2645_packet_uninit(&s->pkt);
2895
2896     return 0;
2897 }
2898
2899 static av_cold int hevc_init_context(AVCodecContext *avctx)
2900 {
2901     HEVCContext *s = avctx->priv_data;
2902     int i;
2903
2904     s->avctx = avctx;
2905
2906     s->tmp_frame = av_frame_alloc();
2907     if (!s->tmp_frame)
2908         goto fail;
2909
2910     s->output_frame = av_frame_alloc();
2911     if (!s->output_frame)
2912         goto fail;
2913
2914     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2915         s->DPB[i].frame = av_frame_alloc();
2916         if (!s->DPB[i].frame)
2917             goto fail;
2918         s->DPB[i].tf.f = s->DPB[i].frame;
2919     }
2920
2921     s->max_ra = INT_MAX;
2922
2923     s->md5_ctx = av_md5_alloc();
2924     if (!s->md5_ctx)
2925         goto fail;
2926
2927     ff_bswapdsp_init(&s->bdsp);
2928
2929     s->context_initialized = 1;
2930
2931     return 0;
2932
2933 fail:
2934     hevc_decode_free(avctx);
2935     return AVERROR(ENOMEM);
2936 }
2937
2938 static int hevc_update_thread_context(AVCodecContext *dst,
2939                                       const AVCodecContext *src)
2940 {
2941     HEVCContext *s  = dst->priv_data;
2942     HEVCContext *s0 = src->priv_data;
2943     int i, ret;
2944
2945     if (!s->context_initialized) {
2946         ret = hevc_init_context(dst);
2947         if (ret < 0)
2948             return ret;
2949     }
2950
2951     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2952         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2953         if (s0->DPB[i].frame->buf[0]) {
2954             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
2955             if (ret < 0)
2956                 return ret;
2957         }
2958     }
2959
2960     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
2961         av_buffer_unref(&s->ps.vps_list[i]);
2962         if (s0->ps.vps_list[i]) {
2963             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
2964             if (!s->ps.vps_list[i])
2965                 return AVERROR(ENOMEM);
2966         }
2967     }
2968
2969     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
2970         av_buffer_unref(&s->ps.sps_list[i]);
2971         if (s0->ps.sps_list[i]) {
2972             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
2973             if (!s->ps.sps_list[i])
2974                 return AVERROR(ENOMEM);
2975         }
2976     }
2977
2978     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
2979         av_buffer_unref(&s->ps.pps_list[i]);
2980         if (s0->ps.pps_list[i]) {
2981             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
2982             if (!s->ps.pps_list[i])
2983                 return AVERROR(ENOMEM);
2984         }
2985     }
2986
2987     if (s->ps.sps != s0->ps.sps)
2988         ret = set_sps(s, s0->ps.sps);
2989
2990     s->seq_decode = s0->seq_decode;
2991     s->seq_output = s0->seq_output;
2992     s->pocTid0    = s0->pocTid0;
2993     s->max_ra     = s0->max_ra;
2994
2995     s->is_nalff        = s0->is_nalff;
2996     s->nal_length_size = s0->nal_length_size;
2997
2998     if (s0->eos) {
2999         s->seq_decode = (s->seq_decode + 1) & 0xff;
3000         s->max_ra = INT_MAX;
3001     }
3002
3003     return 0;
3004 }
3005
3006 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3007 {
3008     HEVCContext *s = avctx->priv_data;
3009     int ret;
3010
3011     avctx->internal->allocate_progress = 1;
3012
3013     ret = hevc_init_context(avctx);
3014     if (ret < 0)
3015         return ret;
3016
3017     if (avctx->extradata_size > 0 && avctx->extradata) {
3018         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size);
3019         if (ret < 0) {
3020             hevc_decode_free(avctx);
3021             return ret;
3022         }
3023     }
3024
3025     return 0;
3026 }
3027
3028 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3029 {
3030     HEVCContext *s = avctx->priv_data;
3031     int ret;
3032
3033     memset(s, 0, sizeof(*s));
3034
3035     ret = hevc_init_context(avctx);
3036     if (ret < 0)
3037         return ret;
3038
3039     return 0;
3040 }
3041
3042 static void hevc_decode_flush(AVCodecContext *avctx)
3043 {
3044     HEVCContext *s = avctx->priv_data;
3045     ff_hevc_flush_dpb(s);
3046     s->max_ra = INT_MAX;
3047 }
3048
3049 #define OFFSET(x) offsetof(HEVCContext, x)
3050 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3051
3052 static const AVOption options[] = {
3053     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3054         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3055     { NULL },
3056 };
3057
3058 static const AVClass hevc_decoder_class = {
3059     .class_name = "HEVC decoder",
3060     .item_name  = av_default_item_name,
3061     .option     = options,
3062     .version    = LIBAVUTIL_VERSION_INT,
3063 };
3064
3065 AVCodec ff_hevc_decoder = {
3066     .name                  = "hevc",
3067     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3068     .type                  = AVMEDIA_TYPE_VIDEO,
3069     .id                    = AV_CODEC_ID_HEVC,
3070     .priv_data_size        = sizeof(HEVCContext),
3071     .priv_class            = &hevc_decoder_class,
3072     .init                  = hevc_decode_init,
3073     .close                 = hevc_decode_free,
3074     .decode                = hevc_decode_frame,
3075     .flush                 = hevc_decode_flush,
3076     .update_thread_context = hevc_update_thread_context,
3077     .init_thread_copy      = hevc_init_thread_copy,
3078     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3079                              AV_CODEC_CAP_FRAME_THREADS,
3080     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3081 };