]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
avcodec/hevcdsp_template: Fix undefined shift in put_hevc_qpel_bi_w_hv()
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/mastering_display_metadata.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
35
36 #include "bswapdsp.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
39 #include "golomb.h"
40 #include "hevc.h"
41 #include "hevc_data.h"
42 #include "hevc_parse.h"
43 #include "hevcdec.h"
44 #include "hwaccel.h"
45 #include "profiles.h"
46
47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
48
49 /**
50  * NOTE: Each function hls_foo correspond to the function foo in the
51  * specification (HLS stands for High Level Syntax).
52  */
53
54 /**
55  * Section 5.7
56  */
57
58 /* free everything allocated  by pic_arrays_init() */
59 static void pic_arrays_free(HEVCContext *s)
60 {
61     av_freep(&s->sao);
62     av_freep(&s->deblock);
63
64     av_freep(&s->skip_flag);
65     av_freep(&s->tab_ct_depth);
66
67     av_freep(&s->tab_ipm);
68     av_freep(&s->cbf_luma);
69     av_freep(&s->is_pcm);
70
71     av_freep(&s->qp_y_tab);
72     av_freep(&s->tab_slice_address);
73     av_freep(&s->filter_slice_edges);
74
75     av_freep(&s->horizontal_bs);
76     av_freep(&s->vertical_bs);
77
78     av_freep(&s->sh.entry_point_offset);
79     av_freep(&s->sh.size);
80     av_freep(&s->sh.offset);
81
82     av_buffer_pool_uninit(&s->tab_mvf_pool);
83     av_buffer_pool_uninit(&s->rpl_tab_pool);
84 }
85
86 /* allocate arrays that depend on frame dimensions */
87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
88 {
89     int log2_min_cb_size = sps->log2_min_cb_size;
90     int width            = sps->width;
91     int height           = sps->height;
92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
93                            ((height >> log2_min_cb_size) + 1);
94     int ctb_count        = sps->ctb_width * sps->ctb_height;
95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
96
97     s->bs_width  = (width  >> 2) + 1;
98     s->bs_height = (height >> 2) + 1;
99
100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
102     if (!s->sao || !s->deblock)
103         goto fail;
104
105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107     if (!s->skip_flag || !s->tab_ct_depth)
108         goto fail;
109
110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
111     s->tab_ipm  = av_mallocz(min_pu_size);
112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
114         goto fail;
115
116     s->filter_slice_edges = av_mallocz(ctb_count);
117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
118                                       sizeof(*s->tab_slice_address));
119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
120                                       sizeof(*s->qp_y_tab));
121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
122         goto fail;
123
124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
126     if (!s->horizontal_bs || !s->vertical_bs)
127         goto fail;
128
129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
130                                           av_buffer_allocz);
131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
132                                           av_buffer_allocz);
133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
134         goto fail;
135
136     return 0;
137
138 fail:
139     pic_arrays_free(s);
140     return AVERROR(ENOMEM);
141 }
142
143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
144 {
145     int i = 0;
146     int j = 0;
147     uint8_t luma_weight_l0_flag[16];
148     uint8_t chroma_weight_l0_flag[16];
149     uint8_t luma_weight_l1_flag[16];
150     uint8_t chroma_weight_l1_flag[16];
151     int luma_log2_weight_denom;
152
153     luma_log2_weight_denom = get_ue_golomb_long(gb);
154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
156     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
157     if (s->ps.sps->chroma_format_idc != 0) {
158         int delta = get_se_golomb(gb);
159         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
160     }
161
162     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
163         luma_weight_l0_flag[i] = get_bits1(gb);
164         if (!luma_weight_l0_flag[i]) {
165             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
166             s->sh.luma_offset_l0[i] = 0;
167         }
168     }
169     if (s->ps.sps->chroma_format_idc != 0) {
170         for (i = 0; i < s->sh.nb_refs[L0]; i++)
171             chroma_weight_l0_flag[i] = get_bits1(gb);
172     } else {
173         for (i = 0; i < s->sh.nb_refs[L0]; i++)
174             chroma_weight_l0_flag[i] = 0;
175     }
176     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
177         if (luma_weight_l0_flag[i]) {
178             int delta_luma_weight_l0 = get_se_golomb(gb);
179             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
180             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
181         }
182         if (chroma_weight_l0_flag[i]) {
183             for (j = 0; j < 2; j++) {
184                 int delta_chroma_weight_l0 = get_se_golomb(gb);
185                 int delta_chroma_offset_l0 = get_se_golomb(gb);
186
187                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
188                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
189                     return AVERROR_INVALIDDATA;
190                 }
191
192                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
193                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
194                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
195             }
196         } else {
197             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
198             s->sh.chroma_offset_l0[i][0] = 0;
199             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
200             s->sh.chroma_offset_l0[i][1] = 0;
201         }
202     }
203     if (s->sh.slice_type == HEVC_SLICE_B) {
204         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
205             luma_weight_l1_flag[i] = get_bits1(gb);
206             if (!luma_weight_l1_flag[i]) {
207                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
208                 s->sh.luma_offset_l1[i] = 0;
209             }
210         }
211         if (s->ps.sps->chroma_format_idc != 0) {
212             for (i = 0; i < s->sh.nb_refs[L1]; i++)
213                 chroma_weight_l1_flag[i] = get_bits1(gb);
214         } else {
215             for (i = 0; i < s->sh.nb_refs[L1]; i++)
216                 chroma_weight_l1_flag[i] = 0;
217         }
218         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
219             if (luma_weight_l1_flag[i]) {
220                 int delta_luma_weight_l1 = get_se_golomb(gb);
221                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
222                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
223             }
224             if (chroma_weight_l1_flag[i]) {
225                 for (j = 0; j < 2; j++) {
226                     int delta_chroma_weight_l1 = get_se_golomb(gb);
227                     int delta_chroma_offset_l1 = get_se_golomb(gb);
228
229                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
230                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
231                         return AVERROR_INVALIDDATA;
232                     }
233
234                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
235                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
236                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
237                 }
238             } else {
239                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
240                 s->sh.chroma_offset_l1[i][0] = 0;
241                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
242                 s->sh.chroma_offset_l1[i][1] = 0;
243             }
244         }
245     }
246     return 0;
247 }
248
249 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
250 {
251     const HEVCSPS *sps = s->ps.sps;
252     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
253     int prev_delta_msb = 0;
254     unsigned int nb_sps = 0, nb_sh;
255     int i;
256
257     rps->nb_refs = 0;
258     if (!sps->long_term_ref_pics_present_flag)
259         return 0;
260
261     if (sps->num_long_term_ref_pics_sps > 0)
262         nb_sps = get_ue_golomb_long(gb);
263     nb_sh = get_ue_golomb_long(gb);
264
265     if (nb_sps > sps->num_long_term_ref_pics_sps)
266         return AVERROR_INVALIDDATA;
267     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
268         return AVERROR_INVALIDDATA;
269
270     rps->nb_refs = nb_sh + nb_sps;
271
272     for (i = 0; i < rps->nb_refs; i++) {
273         uint8_t delta_poc_msb_present;
274
275         if (i < nb_sps) {
276             uint8_t lt_idx_sps = 0;
277
278             if (sps->num_long_term_ref_pics_sps > 1)
279                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
280
281             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
282             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
283         } else {
284             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
285             rps->used[i] = get_bits1(gb);
286         }
287
288         delta_poc_msb_present = get_bits1(gb);
289         if (delta_poc_msb_present) {
290             int64_t delta = get_ue_golomb_long(gb);
291             int64_t poc;
292
293             if (i && i != nb_sps)
294                 delta += prev_delta_msb;
295
296             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
297             if (poc != (int32_t)poc)
298                 return AVERROR_INVALIDDATA;
299             rps->poc[i] = poc;
300             prev_delta_msb = delta;
301         }
302     }
303
304     return 0;
305 }
306
307 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
308                                  const HEVCSPS *sps)
309 {
310     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
311     const HEVCWindow *ow = &sps->output_window;
312     unsigned int num = 0, den = 0;
313
314     avctx->pix_fmt             = sps->pix_fmt;
315     avctx->coded_width         = sps->width;
316     avctx->coded_height        = sps->height;
317     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
318     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
319     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
320     avctx->profile             = sps->ptl.general_ptl.profile_idc;
321     avctx->level               = sps->ptl.general_ptl.level_idc;
322
323     ff_set_sar(avctx, sps->vui.sar);
324
325     if (sps->vui.video_signal_type_present_flag)
326         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
327                                                             : AVCOL_RANGE_MPEG;
328     else
329         avctx->color_range = AVCOL_RANGE_MPEG;
330
331     if (sps->vui.colour_description_present_flag) {
332         avctx->color_primaries = sps->vui.colour_primaries;
333         avctx->color_trc       = sps->vui.transfer_characteristic;
334         avctx->colorspace      = sps->vui.matrix_coeffs;
335     } else {
336         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
337         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
338         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
339     }
340
341     if (vps->vps_timing_info_present_flag) {
342         num = vps->vps_num_units_in_tick;
343         den = vps->vps_time_scale;
344     } else if (sps->vui.vui_timing_info_present_flag) {
345         num = sps->vui.vui_num_units_in_tick;
346         den = sps->vui.vui_time_scale;
347     }
348
349     if (num != 0 && den != 0)
350         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
351                   num, den, 1 << 30);
352 }
353
354 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
355 {
356 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
357                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
358                      CONFIG_HEVC_NVDEC_HWACCEL + \
359                      CONFIG_HEVC_VAAPI_HWACCEL + \
360                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
361                      CONFIG_HEVC_VDPAU_HWACCEL)
362     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
363
364     switch (sps->pix_fmt) {
365     case AV_PIX_FMT_YUV420P:
366     case AV_PIX_FMT_YUVJ420P:
367 #if CONFIG_HEVC_DXVA2_HWACCEL
368         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
369 #endif
370 #if CONFIG_HEVC_D3D11VA_HWACCEL
371         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
372         *fmt++ = AV_PIX_FMT_D3D11;
373 #endif
374 #if CONFIG_HEVC_VAAPI_HWACCEL
375         *fmt++ = AV_PIX_FMT_VAAPI;
376 #endif
377 #if CONFIG_HEVC_VDPAU_HWACCEL
378         *fmt++ = AV_PIX_FMT_VDPAU;
379 #endif
380 #if CONFIG_HEVC_NVDEC_HWACCEL
381         *fmt++ = AV_PIX_FMT_CUDA;
382 #endif
383 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
384         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
385 #endif
386         break;
387     case AV_PIX_FMT_YUV420P10:
388 #if CONFIG_HEVC_DXVA2_HWACCEL
389         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
390 #endif
391 #if CONFIG_HEVC_D3D11VA_HWACCEL
392         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
393         *fmt++ = AV_PIX_FMT_D3D11;
394 #endif
395 #if CONFIG_HEVC_VAAPI_HWACCEL
396         *fmt++ = AV_PIX_FMT_VAAPI;
397 #endif
398 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
399         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
400 #endif
401 #if CONFIG_HEVC_NVDEC_HWACCEL
402         *fmt++ = AV_PIX_FMT_CUDA;
403 #endif
404         break;
405     }
406
407     *fmt++ = sps->pix_fmt;
408     *fmt = AV_PIX_FMT_NONE;
409
410     return ff_thread_get_format(s->avctx, pix_fmts);
411 }
412
413 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
414                    enum AVPixelFormat pix_fmt)
415 {
416     int ret, i;
417
418     pic_arrays_free(s);
419     s->ps.sps = NULL;
420     s->ps.vps = NULL;
421
422     if (!sps)
423         return 0;
424
425     ret = pic_arrays_init(s, sps);
426     if (ret < 0)
427         goto fail;
428
429     export_stream_params(s->avctx, &s->ps, sps);
430
431     s->avctx->pix_fmt = pix_fmt;
432
433     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
434     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
435     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
436
437     for (i = 0; i < 3; i++) {
438         av_freep(&s->sao_pixel_buffer_h[i]);
439         av_freep(&s->sao_pixel_buffer_v[i]);
440     }
441
442     if (sps->sao_enabled && !s->avctx->hwaccel) {
443         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
444         int c_idx;
445
446         for(c_idx = 0; c_idx < c_count; c_idx++) {
447             int w = sps->width >> sps->hshift[c_idx];
448             int h = sps->height >> sps->vshift[c_idx];
449             s->sao_pixel_buffer_h[c_idx] =
450                 av_malloc((w * 2 * sps->ctb_height) <<
451                           sps->pixel_shift);
452             s->sao_pixel_buffer_v[c_idx] =
453                 av_malloc((h * 2 * sps->ctb_width) <<
454                           sps->pixel_shift);
455         }
456     }
457
458     s->ps.sps = sps;
459     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
460
461     return 0;
462
463 fail:
464     pic_arrays_free(s);
465     s->ps.sps = NULL;
466     return ret;
467 }
468
469 static int hls_slice_header(HEVCContext *s)
470 {
471     GetBitContext *gb = &s->HEVClc->gb;
472     SliceHeader *sh   = &s->sh;
473     int i, ret;
474
475     // Coded parameters
476     sh->first_slice_in_pic_flag = get_bits1(gb);
477     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
478         s->seq_decode = (s->seq_decode + 1) & 0xff;
479         s->max_ra     = INT_MAX;
480         if (IS_IDR(s))
481             ff_hevc_clear_refs(s);
482     }
483     sh->no_output_of_prior_pics_flag = 0;
484     if (IS_IRAP(s))
485         sh->no_output_of_prior_pics_flag = get_bits1(gb);
486
487     sh->pps_id = get_ue_golomb_long(gb);
488     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
489         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
490         return AVERROR_INVALIDDATA;
491     }
492     if (!sh->first_slice_in_pic_flag &&
493         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
494         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
495         return AVERROR_INVALIDDATA;
496     }
497     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
498     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
499         sh->no_output_of_prior_pics_flag = 1;
500
501     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
502         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
503         const HEVCSPS *last_sps = s->ps.sps;
504         enum AVPixelFormat pix_fmt;
505
506         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
507             if (sps->width != last_sps->width || sps->height != last_sps->height ||
508                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
509                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
510                 sh->no_output_of_prior_pics_flag = 0;
511         }
512         ff_hevc_clear_refs(s);
513
514         ret = set_sps(s, sps, sps->pix_fmt);
515         if (ret < 0)
516             return ret;
517
518         pix_fmt = get_format(s, sps);
519         if (pix_fmt < 0)
520             return pix_fmt;
521         s->avctx->pix_fmt = pix_fmt;
522
523         s->seq_decode = (s->seq_decode + 1) & 0xff;
524         s->max_ra     = INT_MAX;
525     }
526
527     sh->dependent_slice_segment_flag = 0;
528     if (!sh->first_slice_in_pic_flag) {
529         int slice_address_length;
530
531         if (s->ps.pps->dependent_slice_segments_enabled_flag)
532             sh->dependent_slice_segment_flag = get_bits1(gb);
533
534         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
535                                             s->ps.sps->ctb_height);
536         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
537         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
538             av_log(s->avctx, AV_LOG_ERROR,
539                    "Invalid slice segment address: %u.\n",
540                    sh->slice_segment_addr);
541             return AVERROR_INVALIDDATA;
542         }
543
544         if (!sh->dependent_slice_segment_flag) {
545             sh->slice_addr = sh->slice_segment_addr;
546             s->slice_idx++;
547         }
548     } else {
549         sh->slice_segment_addr = sh->slice_addr = 0;
550         s->slice_idx           = 0;
551         s->slice_initialized   = 0;
552     }
553
554     if (!sh->dependent_slice_segment_flag) {
555         s->slice_initialized = 0;
556
557         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
558             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
559
560         sh->slice_type = get_ue_golomb_long(gb);
561         if (!(sh->slice_type == HEVC_SLICE_I ||
562               sh->slice_type == HEVC_SLICE_P ||
563               sh->slice_type == HEVC_SLICE_B)) {
564             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
565                    sh->slice_type);
566             return AVERROR_INVALIDDATA;
567         }
568         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
569             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
570             return AVERROR_INVALIDDATA;
571         }
572
573         // when flag is not present, picture is inferred to be output
574         sh->pic_output_flag = 1;
575         if (s->ps.pps->output_flag_present_flag)
576             sh->pic_output_flag = get_bits1(gb);
577
578         if (s->ps.sps->separate_colour_plane_flag)
579             sh->colour_plane_id = get_bits(gb, 2);
580
581         if (!IS_IDR(s)) {
582             int poc, pos;
583
584             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
585             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
586             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
587                 av_log(s->avctx, AV_LOG_WARNING,
588                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
589                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
590                     return AVERROR_INVALIDDATA;
591                 poc = s->poc;
592             }
593             s->poc = poc;
594
595             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
596             pos = get_bits_left(gb);
597             if (!sh->short_term_ref_pic_set_sps_flag) {
598                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
599                 if (ret < 0)
600                     return ret;
601
602                 sh->short_term_rps = &sh->slice_rps;
603             } else {
604                 int numbits, rps_idx;
605
606                 if (!s->ps.sps->nb_st_rps) {
607                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
608                     return AVERROR_INVALIDDATA;
609                 }
610
611                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
612                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
613                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
614             }
615             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
616
617             pos = get_bits_left(gb);
618             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
619             if (ret < 0) {
620                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
621                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
622                     return AVERROR_INVALIDDATA;
623             }
624             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
625
626             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
627                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
628             else
629                 sh->slice_temporal_mvp_enabled_flag = 0;
630         } else {
631             s->sh.short_term_rps = NULL;
632             s->poc               = 0;
633         }
634
635         /* 8.3.1 */
636         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
637             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
638             s->nal_unit_type != HEVC_NAL_TSA_N   &&
639             s->nal_unit_type != HEVC_NAL_STSA_N  &&
640             s->nal_unit_type != HEVC_NAL_RADL_N  &&
641             s->nal_unit_type != HEVC_NAL_RADL_R  &&
642             s->nal_unit_type != HEVC_NAL_RASL_N  &&
643             s->nal_unit_type != HEVC_NAL_RASL_R)
644             s->pocTid0 = s->poc;
645
646         if (s->ps.sps->sao_enabled) {
647             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
648             if (s->ps.sps->chroma_format_idc) {
649                 sh->slice_sample_adaptive_offset_flag[1] =
650                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
651             }
652         } else {
653             sh->slice_sample_adaptive_offset_flag[0] = 0;
654             sh->slice_sample_adaptive_offset_flag[1] = 0;
655             sh->slice_sample_adaptive_offset_flag[2] = 0;
656         }
657
658         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
659         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
660             int nb_refs;
661
662             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
663             if (sh->slice_type == HEVC_SLICE_B)
664                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
665
666             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
667                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
668                 if (sh->slice_type == HEVC_SLICE_B)
669                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
670             }
671             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
672                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
673                        sh->nb_refs[L0], sh->nb_refs[L1]);
674                 return AVERROR_INVALIDDATA;
675             }
676
677             sh->rpl_modification_flag[0] = 0;
678             sh->rpl_modification_flag[1] = 0;
679             nb_refs = ff_hevc_frame_nb_refs(s);
680             if (!nb_refs) {
681                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
682                 return AVERROR_INVALIDDATA;
683             }
684
685             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
686                 sh->rpl_modification_flag[0] = get_bits1(gb);
687                 if (sh->rpl_modification_flag[0]) {
688                     for (i = 0; i < sh->nb_refs[L0]; i++)
689                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
690                 }
691
692                 if (sh->slice_type == HEVC_SLICE_B) {
693                     sh->rpl_modification_flag[1] = get_bits1(gb);
694                     if (sh->rpl_modification_flag[1] == 1)
695                         for (i = 0; i < sh->nb_refs[L1]; i++)
696                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
697                 }
698             }
699
700             if (sh->slice_type == HEVC_SLICE_B)
701                 sh->mvd_l1_zero_flag = get_bits1(gb);
702
703             if (s->ps.pps->cabac_init_present_flag)
704                 sh->cabac_init_flag = get_bits1(gb);
705             else
706                 sh->cabac_init_flag = 0;
707
708             sh->collocated_ref_idx = 0;
709             if (sh->slice_temporal_mvp_enabled_flag) {
710                 sh->collocated_list = L0;
711                 if (sh->slice_type == HEVC_SLICE_B)
712                     sh->collocated_list = !get_bits1(gb);
713
714                 if (sh->nb_refs[sh->collocated_list] > 1) {
715                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
716                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
717                         av_log(s->avctx, AV_LOG_ERROR,
718                                "Invalid collocated_ref_idx: %d.\n",
719                                sh->collocated_ref_idx);
720                         return AVERROR_INVALIDDATA;
721                     }
722                 }
723             }
724
725             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
726                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
727                 int ret = pred_weight_table(s, gb);
728                 if (ret < 0)
729                     return ret;
730             }
731
732             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
733             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
734                 av_log(s->avctx, AV_LOG_ERROR,
735                        "Invalid number of merging MVP candidates: %d.\n",
736                        sh->max_num_merge_cand);
737                 return AVERROR_INVALIDDATA;
738             }
739         }
740
741         sh->slice_qp_delta = get_se_golomb(gb);
742
743         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
744             sh->slice_cb_qp_offset = get_se_golomb(gb);
745             sh->slice_cr_qp_offset = get_se_golomb(gb);
746         } else {
747             sh->slice_cb_qp_offset = 0;
748             sh->slice_cr_qp_offset = 0;
749         }
750
751         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
752             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
753         else
754             sh->cu_chroma_qp_offset_enabled_flag = 0;
755
756         if (s->ps.pps->deblocking_filter_control_present_flag) {
757             int deblocking_filter_override_flag = 0;
758
759             if (s->ps.pps->deblocking_filter_override_enabled_flag)
760                 deblocking_filter_override_flag = get_bits1(gb);
761
762             if (deblocking_filter_override_flag) {
763                 sh->disable_deblocking_filter_flag = get_bits1(gb);
764                 if (!sh->disable_deblocking_filter_flag) {
765                     int beta_offset_div2 = get_se_golomb(gb);
766                     int tc_offset_div2   = get_se_golomb(gb) ;
767                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
768                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
769                         av_log(s->avctx, AV_LOG_ERROR,
770                             "Invalid deblock filter offsets: %d, %d\n",
771                             beta_offset_div2, tc_offset_div2);
772                         return AVERROR_INVALIDDATA;
773                     }
774                     sh->beta_offset = beta_offset_div2 * 2;
775                     sh->tc_offset   =   tc_offset_div2 * 2;
776                 }
777             } else {
778                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
779                 sh->beta_offset                    = s->ps.pps->beta_offset;
780                 sh->tc_offset                      = s->ps.pps->tc_offset;
781             }
782         } else {
783             sh->disable_deblocking_filter_flag = 0;
784             sh->beta_offset                    = 0;
785             sh->tc_offset                      = 0;
786         }
787
788         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
789             (sh->slice_sample_adaptive_offset_flag[0] ||
790              sh->slice_sample_adaptive_offset_flag[1] ||
791              !sh->disable_deblocking_filter_flag)) {
792             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
793         } else {
794             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
795         }
796     } else if (!s->slice_initialized) {
797         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
798         return AVERROR_INVALIDDATA;
799     }
800
801     sh->num_entry_point_offsets = 0;
802     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
803         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
804         // It would be possible to bound this tighter but this here is simpler
805         if (num_entry_point_offsets > get_bits_left(gb)) {
806             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
807             return AVERROR_INVALIDDATA;
808         }
809
810         sh->num_entry_point_offsets = num_entry_point_offsets;
811         if (sh->num_entry_point_offsets > 0) {
812             int offset_len = get_ue_golomb_long(gb) + 1;
813
814             if (offset_len < 1 || offset_len > 32) {
815                 sh->num_entry_point_offsets = 0;
816                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
817                 return AVERROR_INVALIDDATA;
818             }
819
820             av_freep(&sh->entry_point_offset);
821             av_freep(&sh->offset);
822             av_freep(&sh->size);
823             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
824             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
825             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
826             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
827                 sh->num_entry_point_offsets = 0;
828                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
829                 return AVERROR(ENOMEM);
830             }
831             for (i = 0; i < sh->num_entry_point_offsets; i++) {
832                 unsigned val = get_bits_long(gb, offset_len);
833                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
834             }
835             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
836                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
837                 s->threads_number = 1;
838             } else
839                 s->enable_parallel_tiles = 0;
840         } else
841             s->enable_parallel_tiles = 0;
842     }
843
844     if (s->ps.pps->slice_header_extension_present_flag) {
845         unsigned int length = get_ue_golomb_long(gb);
846         if (length*8LL > get_bits_left(gb)) {
847             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
848             return AVERROR_INVALIDDATA;
849         }
850         for (i = 0; i < length; i++)
851             skip_bits(gb, 8);  // slice_header_extension_data_byte
852     }
853
854     // Inferred parameters
855     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
856     if (sh->slice_qp > 51 ||
857         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
858         av_log(s->avctx, AV_LOG_ERROR,
859                "The slice_qp %d is outside the valid range "
860                "[%d, 51].\n",
861                sh->slice_qp,
862                -s->ps.sps->qp_bd_offset);
863         return AVERROR_INVALIDDATA;
864     }
865
866     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
867
868     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
869         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
870         return AVERROR_INVALIDDATA;
871     }
872
873     if (get_bits_left(gb) < 0) {
874         av_log(s->avctx, AV_LOG_ERROR,
875                "Overread slice header by %d bits\n", -get_bits_left(gb));
876         return AVERROR_INVALIDDATA;
877     }
878
879     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
880
881     if (!s->ps.pps->cu_qp_delta_enabled_flag)
882         s->HEVClc->qp_y = s->sh.slice_qp;
883
884     s->slice_initialized = 1;
885     s->HEVClc->tu.cu_qp_offset_cb = 0;
886     s->HEVClc->tu.cu_qp_offset_cr = 0;
887
888     return 0;
889 }
890
891 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
892
893 #define SET_SAO(elem, value)                            \
894 do {                                                    \
895     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
896         sao->elem = value;                              \
897     else if (sao_merge_left_flag)                       \
898         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
899     else if (sao_merge_up_flag)                         \
900         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
901     else                                                \
902         sao->elem = 0;                                  \
903 } while (0)
904
905 static void hls_sao_param(HEVCContext *s, int rx, int ry)
906 {
907     HEVCLocalContext *lc    = s->HEVClc;
908     int sao_merge_left_flag = 0;
909     int sao_merge_up_flag   = 0;
910     SAOParams *sao          = &CTB(s->sao, rx, ry);
911     int c_idx, i;
912
913     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
914         s->sh.slice_sample_adaptive_offset_flag[1]) {
915         if (rx > 0) {
916             if (lc->ctb_left_flag)
917                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
918         }
919         if (ry > 0 && !sao_merge_left_flag) {
920             if (lc->ctb_up_flag)
921                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
922         }
923     }
924
925     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
926         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
927                                                  s->ps.pps->log2_sao_offset_scale_chroma;
928
929         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
930             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
931             continue;
932         }
933
934         if (c_idx == 2) {
935             sao->type_idx[2] = sao->type_idx[1];
936             sao->eo_class[2] = sao->eo_class[1];
937         } else {
938             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
939         }
940
941         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
942             continue;
943
944         for (i = 0; i < 4; i++)
945             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
946
947         if (sao->type_idx[c_idx] == SAO_BAND) {
948             for (i = 0; i < 4; i++) {
949                 if (sao->offset_abs[c_idx][i]) {
950                     SET_SAO(offset_sign[c_idx][i],
951                             ff_hevc_sao_offset_sign_decode(s));
952                 } else {
953                     sao->offset_sign[c_idx][i] = 0;
954                 }
955             }
956             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
957         } else if (c_idx != 2) {
958             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
959         }
960
961         // Inferred parameters
962         sao->offset_val[c_idx][0] = 0;
963         for (i = 0; i < 4; i++) {
964             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
965             if (sao->type_idx[c_idx] == SAO_EDGE) {
966                 if (i > 1)
967                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
968             } else if (sao->offset_sign[c_idx][i]) {
969                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
970             }
971             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
972         }
973     }
974 }
975
976 #undef SET_SAO
977 #undef CTB
978
979 static int hls_cross_component_pred(HEVCContext *s, int idx) {
980     HEVCLocalContext *lc    = s->HEVClc;
981     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
982
983     if (log2_res_scale_abs_plus1 !=  0) {
984         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
985         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
986                                (1 - 2 * res_scale_sign_flag);
987     } else {
988         lc->tu.res_scale_val = 0;
989     }
990
991
992     return 0;
993 }
994
995 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
996                               int xBase, int yBase, int cb_xBase, int cb_yBase,
997                               int log2_cb_size, int log2_trafo_size,
998                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
999 {
1000     HEVCLocalContext *lc = s->HEVClc;
1001     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1002     int i;
1003
1004     if (lc->cu.pred_mode == MODE_INTRA) {
1005         int trafo_size = 1 << log2_trafo_size;
1006         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1007
1008         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1009     }
1010
1011     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1012         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1013         int scan_idx   = SCAN_DIAG;
1014         int scan_idx_c = SCAN_DIAG;
1015         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1016                          (s->ps.sps->chroma_format_idc == 2 &&
1017                          (cbf_cb[1] || cbf_cr[1]));
1018
1019         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1020             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1021             if (lc->tu.cu_qp_delta != 0)
1022                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1023                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1024             lc->tu.is_cu_qp_delta_coded = 1;
1025
1026             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1027                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1028                 av_log(s->avctx, AV_LOG_ERROR,
1029                        "The cu_qp_delta %d is outside the valid range "
1030                        "[%d, %d].\n",
1031                        lc->tu.cu_qp_delta,
1032                        -(26 + s->ps.sps->qp_bd_offset / 2),
1033                         (25 + s->ps.sps->qp_bd_offset / 2));
1034                 return AVERROR_INVALIDDATA;
1035             }
1036
1037             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1038         }
1039
1040         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1041             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1042             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1043             if (cu_chroma_qp_offset_flag) {
1044                 int cu_chroma_qp_offset_idx  = 0;
1045                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1046                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1047                     av_log(s->avctx, AV_LOG_ERROR,
1048                         "cu_chroma_qp_offset_idx not yet tested.\n");
1049                 }
1050                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1051                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1052             } else {
1053                 lc->tu.cu_qp_offset_cb = 0;
1054                 lc->tu.cu_qp_offset_cr = 0;
1055             }
1056             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1057         }
1058
1059         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1060             if (lc->tu.intra_pred_mode >= 6 &&
1061                 lc->tu.intra_pred_mode <= 14) {
1062                 scan_idx = SCAN_VERT;
1063             } else if (lc->tu.intra_pred_mode >= 22 &&
1064                        lc->tu.intra_pred_mode <= 30) {
1065                 scan_idx = SCAN_HORIZ;
1066             }
1067
1068             if (lc->tu.intra_pred_mode_c >=  6 &&
1069                 lc->tu.intra_pred_mode_c <= 14) {
1070                 scan_idx_c = SCAN_VERT;
1071             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1072                        lc->tu.intra_pred_mode_c <= 30) {
1073                 scan_idx_c = SCAN_HORIZ;
1074             }
1075         }
1076
1077         lc->tu.cross_pf = 0;
1078
1079         if (cbf_luma)
1080             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1081         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1082             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1083             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1084             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1085                                 (lc->cu.pred_mode == MODE_INTER ||
1086                                  (lc->tu.chroma_mode_c ==  4)));
1087
1088             if (lc->tu.cross_pf) {
1089                 hls_cross_component_pred(s, 0);
1090             }
1091             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1092                 if (lc->cu.pred_mode == MODE_INTRA) {
1093                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1094                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1095                 }
1096                 if (cbf_cb[i])
1097                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1098                                                 log2_trafo_size_c, scan_idx_c, 1);
1099                 else
1100                     if (lc->tu.cross_pf) {
1101                         ptrdiff_t stride = s->frame->linesize[1];
1102                         int hshift = s->ps.sps->hshift[1];
1103                         int vshift = s->ps.sps->vshift[1];
1104                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1105                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1106                         int size = 1 << log2_trafo_size_c;
1107
1108                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1109                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1110                         for (i = 0; i < (size * size); i++) {
1111                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1112                         }
1113                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1114                     }
1115             }
1116
1117             if (lc->tu.cross_pf) {
1118                 hls_cross_component_pred(s, 1);
1119             }
1120             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1121                 if (lc->cu.pred_mode == MODE_INTRA) {
1122                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1123                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1124                 }
1125                 if (cbf_cr[i])
1126                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1127                                                 log2_trafo_size_c, scan_idx_c, 2);
1128                 else
1129                     if (lc->tu.cross_pf) {
1130                         ptrdiff_t stride = s->frame->linesize[2];
1131                         int hshift = s->ps.sps->hshift[2];
1132                         int vshift = s->ps.sps->vshift[2];
1133                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1134                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1135                         int size = 1 << log2_trafo_size_c;
1136
1137                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1138                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1139                         for (i = 0; i < (size * size); i++) {
1140                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1141                         }
1142                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1143                     }
1144             }
1145         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1146             int trafo_size_h = 1 << (log2_trafo_size + 1);
1147             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1148             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1149                 if (lc->cu.pred_mode == MODE_INTRA) {
1150                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1151                                                     trafo_size_h, trafo_size_v);
1152                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1153                 }
1154                 if (cbf_cb[i])
1155                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1156                                                 log2_trafo_size, scan_idx_c, 1);
1157             }
1158             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1159                 if (lc->cu.pred_mode == MODE_INTRA) {
1160                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1161                                                 trafo_size_h, trafo_size_v);
1162                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1163                 }
1164                 if (cbf_cr[i])
1165                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1166                                                 log2_trafo_size, scan_idx_c, 2);
1167             }
1168         }
1169     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1170         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1171             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1172             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1173             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1174             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1175             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1176             if (s->ps.sps->chroma_format_idc == 2) {
1177                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1178                                                 trafo_size_h, trafo_size_v);
1179                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1180                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1181             }
1182         } else if (blk_idx == 3) {
1183             int trafo_size_h = 1 << (log2_trafo_size + 1);
1184             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1185             ff_hevc_set_neighbour_available(s, xBase, yBase,
1186                                             trafo_size_h, trafo_size_v);
1187             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1188             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1189             if (s->ps.sps->chroma_format_idc == 2) {
1190                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1191                                                 trafo_size_h, trafo_size_v);
1192                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1193                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1194             }
1195         }
1196     }
1197
1198     return 0;
1199 }
1200
1201 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1202 {
1203     int cb_size          = 1 << log2_cb_size;
1204     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1205
1206     int min_pu_width     = s->ps.sps->min_pu_width;
1207     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1208     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1209     int i, j;
1210
1211     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1212         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1213             s->is_pcm[i + j * min_pu_width] = 2;
1214 }
1215
1216 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1217                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1218                               int log2_cb_size, int log2_trafo_size,
1219                               int trafo_depth, int blk_idx,
1220                               const int *base_cbf_cb, const int *base_cbf_cr)
1221 {
1222     HEVCLocalContext *lc = s->HEVClc;
1223     uint8_t split_transform_flag;
1224     int cbf_cb[2];
1225     int cbf_cr[2];
1226     int ret;
1227
1228     cbf_cb[0] = base_cbf_cb[0];
1229     cbf_cb[1] = base_cbf_cb[1];
1230     cbf_cr[0] = base_cbf_cr[0];
1231     cbf_cr[1] = base_cbf_cr[1];
1232
1233     if (lc->cu.intra_split_flag) {
1234         if (trafo_depth == 1) {
1235             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1236             if (s->ps.sps->chroma_format_idc == 3) {
1237                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1238                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1239             } else {
1240                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1241                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1242             }
1243         }
1244     } else {
1245         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1246         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1247         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1248     }
1249
1250     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1251         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1252         trafo_depth     < lc->cu.max_trafo_depth       &&
1253         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1254         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1255     } else {
1256         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1257                           lc->cu.pred_mode == MODE_INTER &&
1258                           lc->cu.part_mode != PART_2Nx2N &&
1259                           trafo_depth == 0;
1260
1261         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1262                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1263                                inter_split;
1264     }
1265
1266     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1267         if (trafo_depth == 0 || cbf_cb[0]) {
1268             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1269             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1270                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1271             }
1272         }
1273
1274         if (trafo_depth == 0 || cbf_cr[0]) {
1275             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1276             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1277                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1278             }
1279         }
1280     }
1281
1282     if (split_transform_flag) {
1283         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1284         const int x1 = x0 + trafo_size_split;
1285         const int y1 = y0 + trafo_size_split;
1286
1287 #define SUBDIVIDE(x, y, idx)                                                    \
1288 do {                                                                            \
1289     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1290                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1291                              cbf_cb, cbf_cr);                                   \
1292     if (ret < 0)                                                                \
1293         return ret;                                                             \
1294 } while (0)
1295
1296         SUBDIVIDE(x0, y0, 0);
1297         SUBDIVIDE(x1, y0, 1);
1298         SUBDIVIDE(x0, y1, 2);
1299         SUBDIVIDE(x1, y1, 3);
1300
1301 #undef SUBDIVIDE
1302     } else {
1303         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1304         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1305         int min_tu_width     = s->ps.sps->min_tb_width;
1306         int cbf_luma         = 1;
1307
1308         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1309             cbf_cb[0] || cbf_cr[0] ||
1310             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1311             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1312         }
1313
1314         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1315                                  log2_cb_size, log2_trafo_size,
1316                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1317         if (ret < 0)
1318             return ret;
1319         // TODO: store cbf_luma somewhere else
1320         if (cbf_luma) {
1321             int i, j;
1322             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1323                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1324                     int x_tu = (x0 + j) >> log2_min_tu_size;
1325                     int y_tu = (y0 + i) >> log2_min_tu_size;
1326                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1327                 }
1328         }
1329         if (!s->sh.disable_deblocking_filter_flag) {
1330             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1331             if (s->ps.pps->transquant_bypass_enable_flag &&
1332                 lc->cu.cu_transquant_bypass_flag)
1333                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1334         }
1335     }
1336     return 0;
1337 }
1338
1339 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1340 {
1341     HEVCLocalContext *lc = s->HEVClc;
1342     GetBitContext gb;
1343     int cb_size   = 1 << log2_cb_size;
1344     ptrdiff_t stride0 = s->frame->linesize[0];
1345     ptrdiff_t stride1 = s->frame->linesize[1];
1346     ptrdiff_t stride2 = s->frame->linesize[2];
1347     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1348     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1349     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1350
1351     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1352                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1353                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1354                           s->ps.sps->pcm.bit_depth_chroma;
1355     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1356     int ret;
1357
1358     if (!s->sh.disable_deblocking_filter_flag)
1359         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1360
1361     ret = init_get_bits(&gb, pcm, length);
1362     if (ret < 0)
1363         return ret;
1364
1365     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1366     if (s->ps.sps->chroma_format_idc) {
1367         s->hevcdsp.put_pcm(dst1, stride1,
1368                            cb_size >> s->ps.sps->hshift[1],
1369                            cb_size >> s->ps.sps->vshift[1],
1370                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1371         s->hevcdsp.put_pcm(dst2, stride2,
1372                            cb_size >> s->ps.sps->hshift[2],
1373                            cb_size >> s->ps.sps->vshift[2],
1374                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1375     }
1376
1377     return 0;
1378 }
1379
1380 /**
1381  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1382  *
1383  * @param s HEVC decoding context
1384  * @param dst target buffer for block data at block position
1385  * @param dststride stride of the dst buffer
1386  * @param ref reference picture buffer at origin (0, 0)
1387  * @param mv motion vector (relative to block position) to get pixel data from
1388  * @param x_off horizontal position of block from origin (0, 0)
1389  * @param y_off vertical position of block from origin (0, 0)
1390  * @param block_w width of block
1391  * @param block_h height of block
1392  * @param luma_weight weighting factor applied to the luma prediction
1393  * @param luma_offset additive offset applied to the luma prediction value
1394  */
1395
1396 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1397                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1398                         int block_w, int block_h, int luma_weight, int luma_offset)
1399 {
1400     HEVCLocalContext *lc = s->HEVClc;
1401     uint8_t *src         = ref->data[0];
1402     ptrdiff_t srcstride  = ref->linesize[0];
1403     int pic_width        = s->ps.sps->width;
1404     int pic_height       = s->ps.sps->height;
1405     int mx               = mv->x & 3;
1406     int my               = mv->y & 3;
1407     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1408                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1409     int idx              = ff_hevc_pel_weight[block_w];
1410
1411     x_off += mv->x >> 2;
1412     y_off += mv->y >> 2;
1413     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1414
1415     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1416         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1417         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1418         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1419         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1420         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1421
1422         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1423                                  edge_emu_stride, srcstride,
1424                                  block_w + QPEL_EXTRA,
1425                                  block_h + QPEL_EXTRA,
1426                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1427                                  pic_width, pic_height);
1428         src = lc->edge_emu_buffer + buf_offset;
1429         srcstride = edge_emu_stride;
1430     }
1431
1432     if (!weight_flag)
1433         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1434                                                       block_h, mx, my, block_w);
1435     else
1436         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1437                                                         block_h, s->sh.luma_log2_weight_denom,
1438                                                         luma_weight, luma_offset, mx, my, block_w);
1439 }
1440
1441 /**
1442  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1443  *
1444  * @param s HEVC decoding context
1445  * @param dst target buffer for block data at block position
1446  * @param dststride stride of the dst buffer
1447  * @param ref0 reference picture0 buffer at origin (0, 0)
1448  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1449  * @param x_off horizontal position of block from origin (0, 0)
1450  * @param y_off vertical position of block from origin (0, 0)
1451  * @param block_w width of block
1452  * @param block_h height of block
1453  * @param ref1 reference picture1 buffer at origin (0, 0)
1454  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1455  * @param current_mv current motion vector structure
1456  */
1457  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1458                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1459                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1460 {
1461     HEVCLocalContext *lc = s->HEVClc;
1462     ptrdiff_t src0stride  = ref0->linesize[0];
1463     ptrdiff_t src1stride  = ref1->linesize[0];
1464     int pic_width        = s->ps.sps->width;
1465     int pic_height       = s->ps.sps->height;
1466     int mx0              = mv0->x & 3;
1467     int my0              = mv0->y & 3;
1468     int mx1              = mv1->x & 3;
1469     int my1              = mv1->y & 3;
1470     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1471                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1472     int x_off0           = x_off + (mv0->x >> 2);
1473     int y_off0           = y_off + (mv0->y >> 2);
1474     int x_off1           = x_off + (mv1->x >> 2);
1475     int y_off1           = y_off + (mv1->y >> 2);
1476     int idx              = ff_hevc_pel_weight[block_w];
1477
1478     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1479     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1480
1481     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1482         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1483         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1484         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1485         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1486         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1487
1488         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1489                                  edge_emu_stride, src0stride,
1490                                  block_w + QPEL_EXTRA,
1491                                  block_h + QPEL_EXTRA,
1492                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1493                                  pic_width, pic_height);
1494         src0 = lc->edge_emu_buffer + buf_offset;
1495         src0stride = edge_emu_stride;
1496     }
1497
1498     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1499         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1500         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1501         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1502         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1503         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1504
1505         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1506                                  edge_emu_stride, src1stride,
1507                                  block_w + QPEL_EXTRA,
1508                                  block_h + QPEL_EXTRA,
1509                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1510                                  pic_width, pic_height);
1511         src1 = lc->edge_emu_buffer2 + buf_offset;
1512         src1stride = edge_emu_stride;
1513     }
1514
1515     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1516                                                 block_h, mx0, my0, block_w);
1517     if (!weight_flag)
1518         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1519                                                        block_h, mx1, my1, block_w);
1520     else
1521         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1522                                                          block_h, s->sh.luma_log2_weight_denom,
1523                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1524                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1525                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1526                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1527                                                          mx1, my1, block_w);
1528
1529 }
1530
1531 /**
1532  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1533  *
1534  * @param s HEVC decoding context
1535  * @param dst1 target buffer for block data at block position (U plane)
1536  * @param dst2 target buffer for block data at block position (V plane)
1537  * @param dststride stride of the dst1 and dst2 buffers
1538  * @param ref reference picture buffer at origin (0, 0)
1539  * @param mv motion vector (relative to block position) to get pixel data from
1540  * @param x_off horizontal position of block from origin (0, 0)
1541  * @param y_off vertical position of block from origin (0, 0)
1542  * @param block_w width of block
1543  * @param block_h height of block
1544  * @param chroma_weight weighting factor applied to the chroma prediction
1545  * @param chroma_offset additive offset applied to the chroma prediction value
1546  */
1547
1548 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1549                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1550                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1551 {
1552     HEVCLocalContext *lc = s->HEVClc;
1553     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1554     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1555     const Mv *mv         = &current_mv->mv[reflist];
1556     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1557                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1558     int idx              = ff_hevc_pel_weight[block_w];
1559     int hshift           = s->ps.sps->hshift[1];
1560     int vshift           = s->ps.sps->vshift[1];
1561     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1562     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1563     intptr_t _mx         = mx << (1 - hshift);
1564     intptr_t _my         = my << (1 - vshift);
1565
1566     x_off += mv->x >> (2 + hshift);
1567     y_off += mv->y >> (2 + vshift);
1568     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1569
1570     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1571         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1572         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1573         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1574         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1575         int buf_offset0 = EPEL_EXTRA_BEFORE *
1576                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1577         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1578                                  edge_emu_stride, srcstride,
1579                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1580                                  x_off - EPEL_EXTRA_BEFORE,
1581                                  y_off - EPEL_EXTRA_BEFORE,
1582                                  pic_width, pic_height);
1583
1584         src0 = lc->edge_emu_buffer + buf_offset0;
1585         srcstride = edge_emu_stride;
1586     }
1587     if (!weight_flag)
1588         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1589                                                   block_h, _mx, _my, block_w);
1590     else
1591         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1592                                                         block_h, s->sh.chroma_log2_weight_denom,
1593                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1594 }
1595
1596 /**
1597  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1598  *
1599  * @param s HEVC decoding context
1600  * @param dst target buffer for block data at block position
1601  * @param dststride stride of the dst buffer
1602  * @param ref0 reference picture0 buffer at origin (0, 0)
1603  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1604  * @param x_off horizontal position of block from origin (0, 0)
1605  * @param y_off vertical position of block from origin (0, 0)
1606  * @param block_w width of block
1607  * @param block_h height of block
1608  * @param ref1 reference picture1 buffer at origin (0, 0)
1609  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1610  * @param current_mv current motion vector structure
1611  * @param cidx chroma component(cb, cr)
1612  */
1613 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1614                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1615 {
1616     HEVCLocalContext *lc = s->HEVClc;
1617     uint8_t *src1        = ref0->data[cidx+1];
1618     uint8_t *src2        = ref1->data[cidx+1];
1619     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1620     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1621     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1622                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1623     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1624     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1625     Mv *mv0              = &current_mv->mv[0];
1626     Mv *mv1              = &current_mv->mv[1];
1627     int hshift = s->ps.sps->hshift[1];
1628     int vshift = s->ps.sps->vshift[1];
1629
1630     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1631     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1632     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1633     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1634     intptr_t _mx0 = mx0 << (1 - hshift);
1635     intptr_t _my0 = my0 << (1 - vshift);
1636     intptr_t _mx1 = mx1 << (1 - hshift);
1637     intptr_t _my1 = my1 << (1 - vshift);
1638
1639     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1640     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1641     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1642     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1643     int idx = ff_hevc_pel_weight[block_w];
1644     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1645     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1646
1647     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1648         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1649         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1650         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1651         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1652         int buf_offset1 = EPEL_EXTRA_BEFORE *
1653                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1654
1655         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1656                                  edge_emu_stride, src1stride,
1657                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1658                                  x_off0 - EPEL_EXTRA_BEFORE,
1659                                  y_off0 - EPEL_EXTRA_BEFORE,
1660                                  pic_width, pic_height);
1661
1662         src1 = lc->edge_emu_buffer + buf_offset1;
1663         src1stride = edge_emu_stride;
1664     }
1665
1666     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1667         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1668         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1669         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1670         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1671         int buf_offset1 = EPEL_EXTRA_BEFORE *
1672                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1673
1674         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1675                                  edge_emu_stride, src2stride,
1676                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1677                                  x_off1 - EPEL_EXTRA_BEFORE,
1678                                  y_off1 - EPEL_EXTRA_BEFORE,
1679                                  pic_width, pic_height);
1680
1681         src2 = lc->edge_emu_buffer2 + buf_offset1;
1682         src2stride = edge_emu_stride;
1683     }
1684
1685     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1686                                                 block_h, _mx0, _my0, block_w);
1687     if (!weight_flag)
1688         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1689                                                        src2, src2stride, lc->tmp,
1690                                                        block_h, _mx1, _my1, block_w);
1691     else
1692         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1693                                                          src2, src2stride, lc->tmp,
1694                                                          block_h,
1695                                                          s->sh.chroma_log2_weight_denom,
1696                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1697                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1698                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1699                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1700                                                          _mx1, _my1, block_w);
1701 }
1702
1703 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1704                                 const Mv *mv, int y0, int height)
1705 {
1706     if (s->threads_type == FF_THREAD_FRAME ) {
1707         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1708
1709         ff_thread_await_progress(&ref->tf, y, 0);
1710     }
1711 }
1712
1713 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1714                                   int nPbH, int log2_cb_size, int part_idx,
1715                                   int merge_idx, MvField *mv)
1716 {
1717     HEVCLocalContext *lc = s->HEVClc;
1718     enum InterPredIdc inter_pred_idc = PRED_L0;
1719     int mvp_flag;
1720
1721     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1722     mv->pred_flag = 0;
1723     if (s->sh.slice_type == HEVC_SLICE_B)
1724         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1725
1726     if (inter_pred_idc != PRED_L1) {
1727         if (s->sh.nb_refs[L0])
1728             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1729
1730         mv->pred_flag = PF_L0;
1731         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1732         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1733         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1734                                  part_idx, merge_idx, mv, mvp_flag, 0);
1735         mv->mv[0].x += lc->pu.mvd.x;
1736         mv->mv[0].y += lc->pu.mvd.y;
1737     }
1738
1739     if (inter_pred_idc != PRED_L0) {
1740         if (s->sh.nb_refs[L1])
1741             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1742
1743         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1744             AV_ZERO32(&lc->pu.mvd);
1745         } else {
1746             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1747         }
1748
1749         mv->pred_flag += PF_L1;
1750         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1751         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1752                                  part_idx, merge_idx, mv, mvp_flag, 1);
1753         mv->mv[1].x += lc->pu.mvd.x;
1754         mv->mv[1].y += lc->pu.mvd.y;
1755     }
1756 }
1757
1758 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1759                                 int nPbW, int nPbH,
1760                                 int log2_cb_size, int partIdx, int idx)
1761 {
1762 #define POS(c_idx, x, y)                                                              \
1763     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1764                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1765     HEVCLocalContext *lc = s->HEVClc;
1766     int merge_idx = 0;
1767     struct MvField current_mv = {{{ 0 }}};
1768
1769     int min_pu_width = s->ps.sps->min_pu_width;
1770
1771     MvField *tab_mvf = s->ref->tab_mvf;
1772     RefPicList  *refPicList = s->ref->refPicList;
1773     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1774     uint8_t *dst0 = POS(0, x0, y0);
1775     uint8_t *dst1 = POS(1, x0, y0);
1776     uint8_t *dst2 = POS(2, x0, y0);
1777     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1778     int min_cb_width     = s->ps.sps->min_cb_width;
1779     int x_cb             = x0 >> log2_min_cb_size;
1780     int y_cb             = y0 >> log2_min_cb_size;
1781     int x_pu, y_pu;
1782     int i, j;
1783
1784     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1785
1786     if (!skip_flag)
1787         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1788
1789     if (skip_flag || lc->pu.merge_flag) {
1790         if (s->sh.max_num_merge_cand > 1)
1791             merge_idx = ff_hevc_merge_idx_decode(s);
1792         else
1793             merge_idx = 0;
1794
1795         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1796                                    partIdx, merge_idx, &current_mv);
1797     } else {
1798         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1799                               partIdx, merge_idx, &current_mv);
1800     }
1801
1802     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1803     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1804
1805     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1806         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1807             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1808
1809     if (current_mv.pred_flag & PF_L0) {
1810         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1811         if (!ref0)
1812             return;
1813         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1814     }
1815     if (current_mv.pred_flag & PF_L1) {
1816         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1817         if (!ref1)
1818             return;
1819         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1820     }
1821
1822     if (current_mv.pred_flag == PF_L0) {
1823         int x0_c = x0 >> s->ps.sps->hshift[1];
1824         int y0_c = y0 >> s->ps.sps->vshift[1];
1825         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1826         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1827
1828         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1829                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1830                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1831                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1832
1833         if (s->ps.sps->chroma_format_idc) {
1834             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1835                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1836                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1837             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1838                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1839                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1840         }
1841     } else if (current_mv.pred_flag == PF_L1) {
1842         int x0_c = x0 >> s->ps.sps->hshift[1];
1843         int y0_c = y0 >> s->ps.sps->vshift[1];
1844         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1845         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1846
1847         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1848                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1849                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1850                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1851
1852         if (s->ps.sps->chroma_format_idc) {
1853             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1854                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1855                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1856
1857             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1858                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1859                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1860         }
1861     } else if (current_mv.pred_flag == PF_BI) {
1862         int x0_c = x0 >> s->ps.sps->hshift[1];
1863         int y0_c = y0 >> s->ps.sps->vshift[1];
1864         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1865         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1866
1867         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1868                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1869                    ref1->frame, &current_mv.mv[1], &current_mv);
1870
1871         if (s->ps.sps->chroma_format_idc) {
1872             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1873                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1874
1875             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1876                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1877         }
1878     }
1879 }
1880
1881 /**
1882  * 8.4.1
1883  */
1884 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1885                                 int prev_intra_luma_pred_flag)
1886 {
1887     HEVCLocalContext *lc = s->HEVClc;
1888     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1889     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1890     int min_pu_width     = s->ps.sps->min_pu_width;
1891     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1892     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1893     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1894
1895     int cand_up   = (lc->ctb_up_flag || y0b) ?
1896                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1897     int cand_left = (lc->ctb_left_flag || x0b) ?
1898                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1899
1900     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1901
1902     MvField *tab_mvf = s->ref->tab_mvf;
1903     int intra_pred_mode;
1904     int candidate[3];
1905     int i, j;
1906
1907     // intra_pred_mode prediction does not cross vertical CTB boundaries
1908     if ((y0 - 1) < y_ctb)
1909         cand_up = INTRA_DC;
1910
1911     if (cand_left == cand_up) {
1912         if (cand_left < 2) {
1913             candidate[0] = INTRA_PLANAR;
1914             candidate[1] = INTRA_DC;
1915             candidate[2] = INTRA_ANGULAR_26;
1916         } else {
1917             candidate[0] = cand_left;
1918             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1919             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1920         }
1921     } else {
1922         candidate[0] = cand_left;
1923         candidate[1] = cand_up;
1924         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1925             candidate[2] = INTRA_PLANAR;
1926         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1927             candidate[2] = INTRA_DC;
1928         } else {
1929             candidate[2] = INTRA_ANGULAR_26;
1930         }
1931     }
1932
1933     if (prev_intra_luma_pred_flag) {
1934         intra_pred_mode = candidate[lc->pu.mpm_idx];
1935     } else {
1936         if (candidate[0] > candidate[1])
1937             FFSWAP(uint8_t, candidate[0], candidate[1]);
1938         if (candidate[0] > candidate[2])
1939             FFSWAP(uint8_t, candidate[0], candidate[2]);
1940         if (candidate[1] > candidate[2])
1941             FFSWAP(uint8_t, candidate[1], candidate[2]);
1942
1943         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1944         for (i = 0; i < 3; i++)
1945             if (intra_pred_mode >= candidate[i])
1946                 intra_pred_mode++;
1947     }
1948
1949     /* write the intra prediction units into the mv array */
1950     if (!size_in_pus)
1951         size_in_pus = 1;
1952     for (i = 0; i < size_in_pus; i++) {
1953         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1954                intra_pred_mode, size_in_pus);
1955
1956         for (j = 0; j < size_in_pus; j++) {
1957             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1958         }
1959     }
1960
1961     return intra_pred_mode;
1962 }
1963
1964 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1965                                           int log2_cb_size, int ct_depth)
1966 {
1967     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1968     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1969     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1970     int y;
1971
1972     for (y = 0; y < length; y++)
1973         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1974                ct_depth, length);
1975 }
1976
1977 static const uint8_t tab_mode_idx[] = {
1978      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1979     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1980
1981 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1982                                   int log2_cb_size)
1983 {
1984     HEVCLocalContext *lc = s->HEVClc;
1985     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1986     uint8_t prev_intra_luma_pred_flag[4];
1987     int split   = lc->cu.part_mode == PART_NxN;
1988     int pb_size = (1 << log2_cb_size) >> split;
1989     int side    = split + 1;
1990     int chroma_mode;
1991     int i, j;
1992
1993     for (i = 0; i < side; i++)
1994         for (j = 0; j < side; j++)
1995             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1996
1997     for (i = 0; i < side; i++) {
1998         for (j = 0; j < side; j++) {
1999             if (prev_intra_luma_pred_flag[2 * i + j])
2000                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2001             else
2002                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2003
2004             lc->pu.intra_pred_mode[2 * i + j] =
2005                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2006                                      prev_intra_luma_pred_flag[2 * i + j]);
2007         }
2008     }
2009
2010     if (s->ps.sps->chroma_format_idc == 3) {
2011         for (i = 0; i < side; i++) {
2012             for (j = 0; j < side; j++) {
2013                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2014                 if (chroma_mode != 4) {
2015                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2016                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2017                     else
2018                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2019                 } else {
2020                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2021                 }
2022             }
2023         }
2024     } else if (s->ps.sps->chroma_format_idc == 2) {
2025         int mode_idx;
2026         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2027         if (chroma_mode != 4) {
2028             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2029                 mode_idx = 34;
2030             else
2031                 mode_idx = intra_chroma_table[chroma_mode];
2032         } else {
2033             mode_idx = lc->pu.intra_pred_mode[0];
2034         }
2035         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2036     } else if (s->ps.sps->chroma_format_idc != 0) {
2037         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2038         if (chroma_mode != 4) {
2039             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2040                 lc->pu.intra_pred_mode_c[0] = 34;
2041             else
2042                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2043         } else {
2044             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2045         }
2046     }
2047 }
2048
2049 static void intra_prediction_unit_default_value(HEVCContext *s,
2050                                                 int x0, int y0,
2051                                                 int log2_cb_size)
2052 {
2053     HEVCLocalContext *lc = s->HEVClc;
2054     int pb_size          = 1 << log2_cb_size;
2055     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2056     int min_pu_width     = s->ps.sps->min_pu_width;
2057     MvField *tab_mvf     = s->ref->tab_mvf;
2058     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2059     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2060     int j, k;
2061
2062     if (size_in_pus == 0)
2063         size_in_pus = 1;
2064     for (j = 0; j < size_in_pus; j++)
2065         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2066     if (lc->cu.pred_mode == MODE_INTRA)
2067         for (j = 0; j < size_in_pus; j++)
2068             for (k = 0; k < size_in_pus; k++)
2069                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2070 }
2071
2072 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2073 {
2074     int cb_size          = 1 << log2_cb_size;
2075     HEVCLocalContext *lc = s->HEVClc;
2076     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2077     int length           = cb_size >> log2_min_cb_size;
2078     int min_cb_width     = s->ps.sps->min_cb_width;
2079     int x_cb             = x0 >> log2_min_cb_size;
2080     int y_cb             = y0 >> log2_min_cb_size;
2081     int idx              = log2_cb_size - 2;
2082     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2083     int x, y, ret;
2084
2085     lc->cu.x                = x0;
2086     lc->cu.y                = y0;
2087     lc->cu.pred_mode        = MODE_INTRA;
2088     lc->cu.part_mode        = PART_2Nx2N;
2089     lc->cu.intra_split_flag = 0;
2090
2091     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2092     for (x = 0; x < 4; x++)
2093         lc->pu.intra_pred_mode[x] = 1;
2094     if (s->ps.pps->transquant_bypass_enable_flag) {
2095         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2096         if (lc->cu.cu_transquant_bypass_flag)
2097             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2098     } else
2099         lc->cu.cu_transquant_bypass_flag = 0;
2100
2101     if (s->sh.slice_type != HEVC_SLICE_I) {
2102         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2103
2104         x = y_cb * min_cb_width + x_cb;
2105         for (y = 0; y < length; y++) {
2106             memset(&s->skip_flag[x], skip_flag, length);
2107             x += min_cb_width;
2108         }
2109         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2110     } else {
2111         x = y_cb * min_cb_width + x_cb;
2112         for (y = 0; y < length; y++) {
2113             memset(&s->skip_flag[x], 0, length);
2114             x += min_cb_width;
2115         }
2116     }
2117
2118     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2119         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2120         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2121
2122         if (!s->sh.disable_deblocking_filter_flag)
2123             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2124     } else {
2125         int pcm_flag = 0;
2126
2127         if (s->sh.slice_type != HEVC_SLICE_I)
2128             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2129         if (lc->cu.pred_mode != MODE_INTRA ||
2130             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2131             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2132             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2133                                       lc->cu.pred_mode == MODE_INTRA;
2134         }
2135
2136         if (lc->cu.pred_mode == MODE_INTRA) {
2137             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2138                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2139                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2140                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2141             }
2142             if (pcm_flag) {
2143                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2144                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2145                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2146                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2147
2148                 if (ret < 0)
2149                     return ret;
2150             } else {
2151                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2152             }
2153         } else {
2154             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2155             switch (lc->cu.part_mode) {
2156             case PART_2Nx2N:
2157                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2158                 break;
2159             case PART_2NxN:
2160                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2161                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2162                 break;
2163             case PART_Nx2N:
2164                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2165                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2166                 break;
2167             case PART_2NxnU:
2168                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2169                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2170                 break;
2171             case PART_2NxnD:
2172                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2173                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2174                 break;
2175             case PART_nLx2N:
2176                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2177                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2178                 break;
2179             case PART_nRx2N:
2180                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2181                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2182                 break;
2183             case PART_NxN:
2184                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2185                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2186                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2187                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2188                 break;
2189             }
2190         }
2191
2192         if (!pcm_flag) {
2193             int rqt_root_cbf = 1;
2194
2195             if (lc->cu.pred_mode != MODE_INTRA &&
2196                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2197                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2198             }
2199             if (rqt_root_cbf) {
2200                 const static int cbf[2] = { 0 };
2201                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2202                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2203                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2204                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2205                                          log2_cb_size,
2206                                          log2_cb_size, 0, 0, cbf, cbf);
2207                 if (ret < 0)
2208                     return ret;
2209             } else {
2210                 if (!s->sh.disable_deblocking_filter_flag)
2211                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2212             }
2213         }
2214     }
2215
2216     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2217         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2218
2219     x = y_cb * min_cb_width + x_cb;
2220     for (y = 0; y < length; y++) {
2221         memset(&s->qp_y_tab[x], lc->qp_y, length);
2222         x += min_cb_width;
2223     }
2224
2225     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2226        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2227         lc->qPy_pred = lc->qp_y;
2228     }
2229
2230     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2231
2232     return 0;
2233 }
2234
2235 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2236                                int log2_cb_size, int cb_depth)
2237 {
2238     HEVCLocalContext *lc = s->HEVClc;
2239     const int cb_size    = 1 << log2_cb_size;
2240     int ret;
2241     int split_cu;
2242
2243     lc->ct_depth = cb_depth;
2244     if (x0 + cb_size <= s->ps.sps->width  &&
2245         y0 + cb_size <= s->ps.sps->height &&
2246         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2247         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2248     } else {
2249         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2250     }
2251     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2252         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2253         lc->tu.is_cu_qp_delta_coded = 0;
2254         lc->tu.cu_qp_delta          = 0;
2255     }
2256
2257     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2258         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2259         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2260     }
2261
2262     if (split_cu) {
2263         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2264         const int cb_size_split = cb_size >> 1;
2265         const int x1 = x0 + cb_size_split;
2266         const int y1 = y0 + cb_size_split;
2267
2268         int more_data = 0;
2269
2270         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2271         if (more_data < 0)
2272             return more_data;
2273
2274         if (more_data && x1 < s->ps.sps->width) {
2275             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2276             if (more_data < 0)
2277                 return more_data;
2278         }
2279         if (more_data && y1 < s->ps.sps->height) {
2280             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2281             if (more_data < 0)
2282                 return more_data;
2283         }
2284         if (more_data && x1 < s->ps.sps->width &&
2285             y1 < s->ps.sps->height) {
2286             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2287             if (more_data < 0)
2288                 return more_data;
2289         }
2290
2291         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2292             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2293             lc->qPy_pred = lc->qp_y;
2294
2295         if (more_data)
2296             return ((x1 + cb_size_split) < s->ps.sps->width ||
2297                     (y1 + cb_size_split) < s->ps.sps->height);
2298         else
2299             return 0;
2300     } else {
2301         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2302         if (ret < 0)
2303             return ret;
2304         if ((!((x0 + cb_size) %
2305                (1 << (s->ps.sps->log2_ctb_size))) ||
2306              (x0 + cb_size >= s->ps.sps->width)) &&
2307             (!((y0 + cb_size) %
2308                (1 << (s->ps.sps->log2_ctb_size))) ||
2309              (y0 + cb_size >= s->ps.sps->height))) {
2310             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2311             return !end_of_slice_flag;
2312         } else {
2313             return 1;
2314         }
2315     }
2316
2317     return 0;
2318 }
2319
2320 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2321                                  int ctb_addr_ts)
2322 {
2323     HEVCLocalContext *lc  = s->HEVClc;
2324     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2325     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2326     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2327
2328     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2329
2330     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2331         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2332             lc->first_qp_group = 1;
2333         lc->end_of_tiles_x = s->ps.sps->width;
2334     } else if (s->ps.pps->tiles_enabled_flag) {
2335         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2336             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2337             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2338             lc->first_qp_group   = 1;
2339         }
2340     } else {
2341         lc->end_of_tiles_x = s->ps.sps->width;
2342     }
2343
2344     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2345
2346     lc->boundary_flags = 0;
2347     if (s->ps.pps->tiles_enabled_flag) {
2348         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2349             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2350         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2351             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2352         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2353             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2354         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2355             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2356     } else {
2357         if (ctb_addr_in_slice <= 0)
2358             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2359         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2360             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2361     }
2362
2363     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2364     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2365     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2366     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2367 }
2368
2369 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2370 {
2371     HEVCContext *s  = avctxt->priv_data;
2372     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2373     int more_data   = 1;
2374     int x_ctb       = 0;
2375     int y_ctb       = 0;
2376     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2377     int ret;
2378
2379     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2380         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2381         return AVERROR_INVALIDDATA;
2382     }
2383
2384     if (s->sh.dependent_slice_segment_flag) {
2385         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2386         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2387             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2388             return AVERROR_INVALIDDATA;
2389         }
2390     }
2391
2392     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2393         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2394
2395         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2396         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2397         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2398
2399         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2400         if (ret < 0) {
2401             s->tab_slice_address[ctb_addr_rs] = -1;
2402             return ret;
2403         }
2404
2405         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2406
2407         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2408         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2409         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2410
2411         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2412         if (more_data < 0) {
2413             s->tab_slice_address[ctb_addr_rs] = -1;
2414             return more_data;
2415         }
2416
2417
2418         ctb_addr_ts++;
2419         ff_hevc_save_states(s, ctb_addr_ts);
2420         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2421     }
2422
2423     if (x_ctb + ctb_size >= s->ps.sps->width &&
2424         y_ctb + ctb_size >= s->ps.sps->height)
2425         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2426
2427     return ctb_addr_ts;
2428 }
2429
2430 static int hls_slice_data(HEVCContext *s)
2431 {
2432     int arg[2];
2433     int ret[2];
2434
2435     arg[0] = 0;
2436     arg[1] = 1;
2437
2438     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2439     return ret[0];
2440 }
2441 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2442 {
2443     HEVCContext *s1  = avctxt->priv_data, *s;
2444     HEVCLocalContext *lc;
2445     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2446     int more_data   = 1;
2447     int *ctb_row_p    = input_ctb_row;
2448     int ctb_row = ctb_row_p[job];
2449     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2450     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2451     int thread = ctb_row % s1->threads_number;
2452     int ret;
2453
2454     s = s1->sList[self_id];
2455     lc = s->HEVClc;
2456
2457     if(ctb_row) {
2458         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2459         if (ret < 0)
2460             goto error;
2461         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2462     }
2463
2464     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2465         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2466         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2467
2468         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2469
2470         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2471
2472         if (atomic_load(&s1->wpp_err)) {
2473             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2474             return 0;
2475         }
2476
2477         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2478         if (ret < 0)
2479             goto error;
2480         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2481         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2482
2483         if (more_data < 0) {
2484             ret = more_data;
2485             goto error;
2486         }
2487
2488         ctb_addr_ts++;
2489
2490         ff_hevc_save_states(s, ctb_addr_ts);
2491         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2492         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2493
2494         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2495             atomic_store(&s1->wpp_err, 1);
2496             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2497             return 0;
2498         }
2499
2500         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2501             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2502             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2503             return ctb_addr_ts;
2504         }
2505         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2506         x_ctb+=ctb_size;
2507
2508         if(x_ctb >= s->ps.sps->width) {
2509             break;
2510         }
2511     }
2512     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2513
2514     return 0;
2515 error:
2516     s->tab_slice_address[ctb_addr_rs] = -1;
2517     atomic_store(&s1->wpp_err, 1);
2518     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2519     return ret;
2520 }
2521
2522 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2523 {
2524     const uint8_t *data = nal->data;
2525     int length          = nal->size;
2526     HEVCLocalContext *lc = s->HEVClc;
2527     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2528     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2529     int64_t offset;
2530     int64_t startheader, cmpt = 0;
2531     int i, j, res = 0;
2532
2533     if (!ret || !arg) {
2534         av_free(ret);
2535         av_free(arg);
2536         return AVERROR(ENOMEM);
2537     }
2538
2539     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2540         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2541             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2542             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2543         );
2544         res = AVERROR_INVALIDDATA;
2545         goto error;
2546     }
2547
2548     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2549
2550     if (!s->sList[1]) {
2551         for (i = 1; i < s->threads_number; i++) {
2552             s->sList[i] = av_malloc(sizeof(HEVCContext));
2553             memcpy(s->sList[i], s, sizeof(HEVCContext));
2554             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2555             s->sList[i]->HEVClc = s->HEVClcList[i];
2556         }
2557     }
2558
2559     offset = (lc->gb.index >> 3);
2560
2561     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2562         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2563             startheader--;
2564             cmpt++;
2565         }
2566     }
2567
2568     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2569         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2570         for (j = 0, cmpt = 0, startheader = offset
2571              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2572             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2573                 startheader--;
2574                 cmpt++;
2575             }
2576         }
2577         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2578         s->sh.offset[i - 1] = offset;
2579
2580     }
2581     if (s->sh.num_entry_point_offsets != 0) {
2582         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2583         if (length < offset) {
2584             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2585             res = AVERROR_INVALIDDATA;
2586             goto error;
2587         }
2588         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2589         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2590
2591     }
2592     s->data = data;
2593
2594     for (i = 1; i < s->threads_number; i++) {
2595         s->sList[i]->HEVClc->first_qp_group = 1;
2596         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2597         memcpy(s->sList[i], s, sizeof(HEVCContext));
2598         s->sList[i]->HEVClc = s->HEVClcList[i];
2599     }
2600
2601     atomic_store(&s->wpp_err, 0);
2602     ff_reset_entries(s->avctx);
2603
2604     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2605         arg[i] = i;
2606         ret[i] = 0;
2607     }
2608
2609     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2610         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2611
2612     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2613         res += ret[i];
2614 error:
2615     av_free(ret);
2616     av_free(arg);
2617     return res;
2618 }
2619
2620 static int set_side_data(HEVCContext *s)
2621 {
2622     AVFrame *out = s->ref->frame;
2623
2624     if (s->sei.frame_packing.present &&
2625         s->sei.frame_packing.arrangement_type >= 3 &&
2626         s->sei.frame_packing.arrangement_type <= 5 &&
2627         s->sei.frame_packing.content_interpretation_type > 0 &&
2628         s->sei.frame_packing.content_interpretation_type < 3) {
2629         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2630         if (!stereo)
2631             return AVERROR(ENOMEM);
2632
2633         switch (s->sei.frame_packing.arrangement_type) {
2634         case 3:
2635             if (s->sei.frame_packing.quincunx_subsampling)
2636                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2637             else
2638                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2639             break;
2640         case 4:
2641             stereo->type = AV_STEREO3D_TOPBOTTOM;
2642             break;
2643         case 5:
2644             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2645             break;
2646         }
2647
2648         if (s->sei.frame_packing.content_interpretation_type == 2)
2649             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2650
2651         if (s->sei.frame_packing.arrangement_type == 5) {
2652             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2653                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2654             else
2655                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2656         }
2657     }
2658
2659     if (s->sei.display_orientation.present &&
2660         (s->sei.display_orientation.anticlockwise_rotation ||
2661          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2662         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2663         AVFrameSideData *rotation = av_frame_new_side_data(out,
2664                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2665                                                            sizeof(int32_t) * 9);
2666         if (!rotation)
2667             return AVERROR(ENOMEM);
2668
2669         av_display_rotation_set((int32_t *)rotation->data, angle);
2670         av_display_matrix_flip((int32_t *)rotation->data,
2671                                s->sei.display_orientation.hflip,
2672                                s->sei.display_orientation.vflip);
2673     }
2674
2675     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2676     // so the side data persists for the entire coded video sequence.
2677     if (s->sei.mastering_display.present > 0 &&
2678         IS_IRAP(s) && s->no_rasl_output_flag) {
2679         s->sei.mastering_display.present--;
2680     }
2681     if (s->sei.mastering_display.present) {
2682         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2683         const int mapping[3] = {2, 0, 1};
2684         const int chroma_den = 50000;
2685         const int luma_den = 10000;
2686         int i;
2687         AVMasteringDisplayMetadata *metadata =
2688             av_mastering_display_metadata_create_side_data(out);
2689         if (!metadata)
2690             return AVERROR(ENOMEM);
2691
2692         for (i = 0; i < 3; i++) {
2693             const int j = mapping[i];
2694             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2695             metadata->display_primaries[i][0].den = chroma_den;
2696             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2697             metadata->display_primaries[i][1].den = chroma_den;
2698         }
2699         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2700         metadata->white_point[0].den = chroma_den;
2701         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2702         metadata->white_point[1].den = chroma_den;
2703
2704         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2705         metadata->max_luminance.den = luma_den;
2706         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2707         metadata->min_luminance.den = luma_den;
2708         metadata->has_luminance = 1;
2709         metadata->has_primaries = 1;
2710
2711         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2712         av_log(s->avctx, AV_LOG_DEBUG,
2713                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2714                av_q2d(metadata->display_primaries[0][0]),
2715                av_q2d(metadata->display_primaries[0][1]),
2716                av_q2d(metadata->display_primaries[1][0]),
2717                av_q2d(metadata->display_primaries[1][1]),
2718                av_q2d(metadata->display_primaries[2][0]),
2719                av_q2d(metadata->display_primaries[2][1]),
2720                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2721         av_log(s->avctx, AV_LOG_DEBUG,
2722                "min_luminance=%f, max_luminance=%f\n",
2723                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2724     }
2725     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2726     // so the side data persists for the entire coded video sequence.
2727     if (s->sei.content_light.present > 0 &&
2728         IS_IRAP(s) && s->no_rasl_output_flag) {
2729         s->sei.content_light.present--;
2730     }
2731     if (s->sei.content_light.present) {
2732         AVContentLightMetadata *metadata =
2733             av_content_light_metadata_create_side_data(out);
2734         if (!metadata)
2735             return AVERROR(ENOMEM);
2736         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2737         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2738
2739         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2740         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2741                metadata->MaxCLL, metadata->MaxFALL);
2742     }
2743
2744     if (s->sei.a53_caption.a53_caption) {
2745         AVFrameSideData* sd = av_frame_new_side_data(out,
2746                                                      AV_FRAME_DATA_A53_CC,
2747                                                      s->sei.a53_caption.a53_caption_size);
2748         if (sd)
2749             memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
2750         av_freep(&s->sei.a53_caption.a53_caption);
2751         s->sei.a53_caption.a53_caption_size = 0;
2752         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2753     }
2754
2755     if (s->sei.alternative_transfer.present &&
2756         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
2757         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2758         s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2759     }
2760
2761     return 0;
2762 }
2763
2764 static int hevc_frame_start(HEVCContext *s)
2765 {
2766     HEVCLocalContext *lc = s->HEVClc;
2767     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2768                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2769     int ret;
2770
2771     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2772     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2773     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2774     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2775     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2776
2777     s->is_decoded        = 0;
2778     s->first_nal_type    = s->nal_unit_type;
2779
2780     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2781
2782     if (s->ps.pps->tiles_enabled_flag)
2783         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2784
2785     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2786     if (ret < 0)
2787         goto fail;
2788
2789     ret = ff_hevc_frame_rps(s);
2790     if (ret < 0) {
2791         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2792         goto fail;
2793     }
2794
2795     s->ref->frame->key_frame = IS_IRAP(s);
2796
2797     ret = set_side_data(s);
2798     if (ret < 0)
2799         goto fail;
2800
2801     s->frame->pict_type = 3 - s->sh.slice_type;
2802
2803     if (!IS_IRAP(s))
2804         ff_hevc_bump_frame(s);
2805
2806     av_frame_unref(s->output_frame);
2807     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2808     if (ret < 0)
2809         goto fail;
2810
2811     if (!s->avctx->hwaccel)
2812         ff_thread_finish_setup(s->avctx);
2813
2814     return 0;
2815
2816 fail:
2817     if (s->ref)
2818         ff_hevc_unref_frame(s, s->ref, ~0);
2819     s->ref = NULL;
2820     return ret;
2821 }
2822
2823 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2824 {
2825     HEVCLocalContext *lc = s->HEVClc;
2826     GetBitContext *gb    = &lc->gb;
2827     int ctb_addr_ts, ret;
2828
2829     *gb              = nal->gb;
2830     s->nal_unit_type = nal->type;
2831     s->temporal_id   = nal->temporal_id;
2832
2833     switch (s->nal_unit_type) {
2834     case HEVC_NAL_VPS:
2835         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2836             ret = s->avctx->hwaccel->decode_params(s->avctx,
2837                                                    nal->type,
2838                                                    nal->raw_data,
2839                                                    nal->raw_size);
2840             if (ret < 0)
2841                 goto fail;
2842         }
2843         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2844         if (ret < 0)
2845             goto fail;
2846         break;
2847     case HEVC_NAL_SPS:
2848         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2849             ret = s->avctx->hwaccel->decode_params(s->avctx,
2850                                                    nal->type,
2851                                                    nal->raw_data,
2852                                                    nal->raw_size);
2853             if (ret < 0)
2854                 goto fail;
2855         }
2856         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2857                                      s->apply_defdispwin);
2858         if (ret < 0)
2859             goto fail;
2860         break;
2861     case HEVC_NAL_PPS:
2862         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2863             ret = s->avctx->hwaccel->decode_params(s->avctx,
2864                                                    nal->type,
2865                                                    nal->raw_data,
2866                                                    nal->raw_size);
2867             if (ret < 0)
2868                 goto fail;
2869         }
2870         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2871         if (ret < 0)
2872             goto fail;
2873         break;
2874     case HEVC_NAL_SEI_PREFIX:
2875     case HEVC_NAL_SEI_SUFFIX:
2876         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2877             ret = s->avctx->hwaccel->decode_params(s->avctx,
2878                                                    nal->type,
2879                                                    nal->raw_data,
2880                                                    nal->raw_size);
2881             if (ret < 0)
2882                 goto fail;
2883         }
2884         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2885         if (ret < 0)
2886             goto fail;
2887         break;
2888     case HEVC_NAL_TRAIL_R:
2889     case HEVC_NAL_TRAIL_N:
2890     case HEVC_NAL_TSA_N:
2891     case HEVC_NAL_TSA_R:
2892     case HEVC_NAL_STSA_N:
2893     case HEVC_NAL_STSA_R:
2894     case HEVC_NAL_BLA_W_LP:
2895     case HEVC_NAL_BLA_W_RADL:
2896     case HEVC_NAL_BLA_N_LP:
2897     case HEVC_NAL_IDR_W_RADL:
2898     case HEVC_NAL_IDR_N_LP:
2899     case HEVC_NAL_CRA_NUT:
2900     case HEVC_NAL_RADL_N:
2901     case HEVC_NAL_RADL_R:
2902     case HEVC_NAL_RASL_N:
2903     case HEVC_NAL_RASL_R:
2904         ret = hls_slice_header(s);
2905         if (ret < 0)
2906             return ret;
2907
2908         if (s->sh.first_slice_in_pic_flag) {
2909             if (s->max_ra == INT_MAX) {
2910                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2911                     s->max_ra = s->poc;
2912                 } else {
2913                     if (IS_IDR(s))
2914                         s->max_ra = INT_MIN;
2915                 }
2916             }
2917
2918             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2919                 s->poc <= s->max_ra) {
2920                 s->is_decoded = 0;
2921                 break;
2922             } else {
2923                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2924                     s->max_ra = INT_MIN;
2925             }
2926
2927             ret = hevc_frame_start(s);
2928             if (ret < 0)
2929                 return ret;
2930         } else if (!s->ref) {
2931             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2932             goto fail;
2933         }
2934
2935         if (s->nal_unit_type != s->first_nal_type) {
2936             av_log(s->avctx, AV_LOG_ERROR,
2937                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2938                    s->first_nal_type, s->nal_unit_type);
2939             return AVERROR_INVALIDDATA;
2940         }
2941
2942         if (!s->sh.dependent_slice_segment_flag &&
2943             s->sh.slice_type != HEVC_SLICE_I) {
2944             ret = ff_hevc_slice_rpl(s);
2945             if (ret < 0) {
2946                 av_log(s->avctx, AV_LOG_WARNING,
2947                        "Error constructing the reference lists for the current slice.\n");
2948                 goto fail;
2949             }
2950         }
2951
2952         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2953             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2954             if (ret < 0)
2955                 goto fail;
2956         }
2957
2958         if (s->avctx->hwaccel) {
2959             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2960             if (ret < 0)
2961                 goto fail;
2962         } else {
2963             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2964                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2965             else
2966                 ctb_addr_ts = hls_slice_data(s);
2967             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2968                 s->is_decoded = 1;
2969             }
2970
2971             if (ctb_addr_ts < 0) {
2972                 ret = ctb_addr_ts;
2973                 goto fail;
2974             }
2975         }
2976         break;
2977     case HEVC_NAL_EOS_NUT:
2978     case HEVC_NAL_EOB_NUT:
2979         s->seq_decode = (s->seq_decode + 1) & 0xff;
2980         s->max_ra     = INT_MAX;
2981         break;
2982     case HEVC_NAL_AUD:
2983     case HEVC_NAL_FD_NUT:
2984         break;
2985     default:
2986         av_log(s->avctx, AV_LOG_INFO,
2987                "Skipping NAL unit %d\n", s->nal_unit_type);
2988     }
2989
2990     return 0;
2991 fail:
2992     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2993         return ret;
2994     return 0;
2995 }
2996
2997 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2998 {
2999     int i, ret = 0;
3000     int eos_at_start = 1;
3001
3002     s->ref = NULL;
3003     s->last_eos = s->eos;
3004     s->eos = 0;
3005
3006     /* split the input packet into NAL units, so we know the upper bound on the
3007      * number of slices in the frame */
3008     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3009                                 s->nal_length_size, s->avctx->codec_id, 1);
3010     if (ret < 0) {
3011         av_log(s->avctx, AV_LOG_ERROR,
3012                "Error splitting the input into NAL units.\n");
3013         return ret;
3014     }
3015
3016     for (i = 0; i < s->pkt.nb_nals; i++) {
3017         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3018             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3019             if (eos_at_start) {
3020                 s->last_eos = 1;
3021             } else {
3022                 s->eos = 1;
3023             }
3024         } else {
3025             eos_at_start = 0;
3026         }
3027     }
3028
3029     /* decode the NAL units */
3030     for (i = 0; i < s->pkt.nb_nals; i++) {
3031         ret = decode_nal_unit(s, &s->pkt.nals[i]);
3032         if (ret < 0) {
3033             av_log(s->avctx, AV_LOG_WARNING,
3034                    "Error parsing NAL unit #%d.\n", i);
3035             goto fail;
3036         }
3037     }
3038
3039 fail:
3040     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3041         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3042
3043     return ret;
3044 }
3045
3046 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3047 {
3048     int i;
3049     for (i = 0; i < 16; i++)
3050         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3051 }
3052
3053 static int verify_md5(HEVCContext *s, AVFrame *frame)
3054 {
3055     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3056     int pixel_shift;
3057     int i, j;
3058
3059     if (!desc)
3060         return AVERROR(EINVAL);
3061
3062     pixel_shift = desc->comp[0].depth > 8;
3063
3064     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3065            s->poc);
3066
3067     /* the checksums are LE, so we have to byteswap for >8bpp formats
3068      * on BE arches */
3069 #if HAVE_BIGENDIAN
3070     if (pixel_shift && !s->checksum_buf) {
3071         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3072                        FFMAX3(frame->linesize[0], frame->linesize[1],
3073                               frame->linesize[2]));
3074         if (!s->checksum_buf)
3075             return AVERROR(ENOMEM);
3076     }
3077 #endif
3078
3079     for (i = 0; frame->data[i]; i++) {
3080         int width  = s->avctx->coded_width;
3081         int height = s->avctx->coded_height;
3082         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3083         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3084         uint8_t md5[16];
3085
3086         av_md5_init(s->md5_ctx);
3087         for (j = 0; j < h; j++) {
3088             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3089 #if HAVE_BIGENDIAN
3090             if (pixel_shift) {
3091                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3092                                     (const uint16_t *) src, w);
3093                 src = s->checksum_buf;
3094             }
3095 #endif
3096             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3097         }
3098         av_md5_final(s->md5_ctx, md5);
3099
3100         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3101             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3102             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3103             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3104         } else {
3105             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3106             print_md5(s->avctx, AV_LOG_ERROR, md5);
3107             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3108             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3109             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3110             return AVERROR_INVALIDDATA;
3111         }
3112     }
3113
3114     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3115
3116     return 0;
3117 }
3118
3119 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3120 {
3121     int ret, i;
3122
3123     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3124                                    &s->nal_length_size, s->avctx->err_recognition,
3125                                    s->apply_defdispwin, s->avctx);
3126     if (ret < 0)
3127         return ret;
3128
3129     /* export stream parameters from the first SPS */
3130     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3131         if (first && s->ps.sps_list[i]) {
3132             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3133             export_stream_params(s->avctx, &s->ps, sps);
3134             break;
3135         }
3136     }
3137
3138     return 0;
3139 }
3140
3141 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3142                              AVPacket *avpkt)
3143 {
3144     int ret;
3145     int new_extradata_size;
3146     uint8_t *new_extradata;
3147     HEVCContext *s = avctx->priv_data;
3148
3149     if (!avpkt->size) {
3150         ret = ff_hevc_output_frame(s, data, 1);
3151         if (ret < 0)
3152             return ret;
3153
3154         *got_output = ret;
3155         return 0;
3156     }
3157
3158     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3159                                             &new_extradata_size);
3160     if (new_extradata && new_extradata_size > 0) {
3161         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3162         if (ret < 0)
3163             return ret;
3164     }
3165
3166     s->ref = NULL;
3167     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3168     if (ret < 0)
3169         return ret;
3170
3171     if (avctx->hwaccel) {
3172         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3173             av_log(avctx, AV_LOG_ERROR,
3174                    "hardware accelerator failed to decode picture\n");
3175             ff_hevc_unref_frame(s, s->ref, ~0);
3176             return ret;
3177         }
3178     } else {
3179         /* verify the SEI checksum */
3180         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3181             s->sei.picture_hash.is_md5) {
3182             ret = verify_md5(s, s->ref->frame);
3183             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3184                 ff_hevc_unref_frame(s, s->ref, ~0);
3185                 return ret;
3186             }
3187         }
3188     }
3189     s->sei.picture_hash.is_md5 = 0;
3190
3191     if (s->is_decoded) {
3192         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3193         s->is_decoded = 0;
3194     }
3195
3196     if (s->output_frame->buf[0]) {
3197         av_frame_move_ref(data, s->output_frame);
3198         *got_output = 1;
3199     }
3200
3201     return avpkt->size;
3202 }
3203
3204 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3205 {
3206     int ret;
3207
3208     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3209     if (ret < 0)
3210         return ret;
3211
3212     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3213     if (!dst->tab_mvf_buf)
3214         goto fail;
3215     dst->tab_mvf = src->tab_mvf;
3216
3217     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3218     if (!dst->rpl_tab_buf)
3219         goto fail;
3220     dst->rpl_tab = src->rpl_tab;
3221
3222     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3223     if (!dst->rpl_buf)
3224         goto fail;
3225
3226     dst->poc        = src->poc;
3227     dst->ctb_count  = src->ctb_count;
3228     dst->flags      = src->flags;
3229     dst->sequence   = src->sequence;
3230
3231     if (src->hwaccel_picture_private) {
3232         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3233         if (!dst->hwaccel_priv_buf)
3234             goto fail;
3235         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3236     }
3237
3238     return 0;
3239 fail:
3240     ff_hevc_unref_frame(s, dst, ~0);
3241     return AVERROR(ENOMEM);
3242 }
3243
3244 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3245 {
3246     HEVCContext       *s = avctx->priv_data;
3247     int i;
3248
3249     pic_arrays_free(s);
3250
3251     av_freep(&s->md5_ctx);
3252
3253     av_freep(&s->cabac_state);
3254
3255     for (i = 0; i < 3; i++) {
3256         av_freep(&s->sao_pixel_buffer_h[i]);
3257         av_freep(&s->sao_pixel_buffer_v[i]);
3258     }
3259     av_frame_free(&s->output_frame);
3260
3261     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3262         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3263         av_frame_free(&s->DPB[i].frame);
3264     }
3265
3266     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
3267         av_buffer_unref(&s->ps.vps_list[i]);
3268     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3269         av_buffer_unref(&s->ps.sps_list[i]);
3270     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3271         av_buffer_unref(&s->ps.pps_list[i]);
3272     s->ps.sps = NULL;
3273     s->ps.pps = NULL;
3274     s->ps.vps = NULL;
3275
3276     av_freep(&s->sh.entry_point_offset);
3277     av_freep(&s->sh.offset);
3278     av_freep(&s->sh.size);
3279
3280     for (i = 1; i < s->threads_number; i++) {
3281         HEVCLocalContext *lc = s->HEVClcList[i];
3282         if (lc) {
3283             av_freep(&s->HEVClcList[i]);
3284             av_freep(&s->sList[i]);
3285         }
3286     }
3287     if (s->HEVClc == s->HEVClcList[0])
3288         s->HEVClc = NULL;
3289     av_freep(&s->HEVClcList[0]);
3290
3291     ff_h2645_packet_uninit(&s->pkt);
3292
3293     return 0;
3294 }
3295
3296 static av_cold int hevc_init_context(AVCodecContext *avctx)
3297 {
3298     HEVCContext *s = avctx->priv_data;
3299     int i;
3300
3301     s->avctx = avctx;
3302
3303     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3304     if (!s->HEVClc)
3305         goto fail;
3306     s->HEVClcList[0] = s->HEVClc;
3307     s->sList[0] = s;
3308
3309     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3310     if (!s->cabac_state)
3311         goto fail;
3312
3313     s->output_frame = av_frame_alloc();
3314     if (!s->output_frame)
3315         goto fail;
3316
3317     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3318         s->DPB[i].frame = av_frame_alloc();
3319         if (!s->DPB[i].frame)
3320             goto fail;
3321         s->DPB[i].tf.f = s->DPB[i].frame;
3322     }
3323
3324     s->max_ra = INT_MAX;
3325
3326     s->md5_ctx = av_md5_alloc();
3327     if (!s->md5_ctx)
3328         goto fail;
3329
3330     ff_bswapdsp_init(&s->bdsp);
3331
3332     s->context_initialized = 1;
3333     s->eos = 0;
3334
3335     ff_hevc_reset_sei(&s->sei);
3336
3337     return 0;
3338
3339 fail:
3340     hevc_decode_free(avctx);
3341     return AVERROR(ENOMEM);
3342 }
3343
3344 static int hevc_update_thread_context(AVCodecContext *dst,
3345                                       const AVCodecContext *src)
3346 {
3347     HEVCContext *s  = dst->priv_data;
3348     HEVCContext *s0 = src->priv_data;
3349     int i, ret;
3350
3351     if (!s->context_initialized) {
3352         ret = hevc_init_context(dst);
3353         if (ret < 0)
3354             return ret;
3355     }
3356
3357     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3358         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3359         if (s0->DPB[i].frame->buf[0]) {
3360             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3361             if (ret < 0)
3362                 return ret;
3363         }
3364     }
3365
3366     if (s->ps.sps != s0->ps.sps)
3367         s->ps.sps = NULL;
3368     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3369         av_buffer_unref(&s->ps.vps_list[i]);
3370         if (s0->ps.vps_list[i]) {
3371             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3372             if (!s->ps.vps_list[i])
3373                 return AVERROR(ENOMEM);
3374         }
3375     }
3376
3377     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3378         av_buffer_unref(&s->ps.sps_list[i]);
3379         if (s0->ps.sps_list[i]) {
3380             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3381             if (!s->ps.sps_list[i])
3382                 return AVERROR(ENOMEM);
3383         }
3384     }
3385
3386     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3387         av_buffer_unref(&s->ps.pps_list[i]);
3388         if (s0->ps.pps_list[i]) {
3389             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3390             if (!s->ps.pps_list[i])
3391                 return AVERROR(ENOMEM);
3392         }
3393     }
3394
3395     if (s->ps.sps != s0->ps.sps)
3396         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3397             return ret;
3398
3399     s->seq_decode = s0->seq_decode;
3400     s->seq_output = s0->seq_output;
3401     s->pocTid0    = s0->pocTid0;
3402     s->max_ra     = s0->max_ra;
3403     s->eos        = s0->eos;
3404     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3405
3406     s->is_nalff        = s0->is_nalff;
3407     s->nal_length_size = s0->nal_length_size;
3408
3409     s->threads_number      = s0->threads_number;
3410     s->threads_type        = s0->threads_type;
3411
3412     if (s0->eos) {
3413         s->seq_decode = (s->seq_decode + 1) & 0xff;
3414         s->max_ra = INT_MAX;
3415     }
3416
3417     s->sei.frame_packing        = s0->sei.frame_packing;
3418     s->sei.display_orientation  = s0->sei.display_orientation;
3419     s->sei.mastering_display    = s0->sei.mastering_display;
3420     s->sei.content_light        = s0->sei.content_light;
3421     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3422
3423     return 0;
3424 }
3425
3426 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3427 {
3428     HEVCContext *s = avctx->priv_data;
3429     int ret;
3430
3431     avctx->internal->allocate_progress = 1;
3432
3433     ret = hevc_init_context(avctx);
3434     if (ret < 0)
3435         return ret;
3436
3437     s->enable_parallel_tiles = 0;
3438     s->sei.picture_timing.picture_struct = 0;
3439     s->eos = 1;
3440
3441     atomic_init(&s->wpp_err, 0);
3442
3443     if(avctx->active_thread_type & FF_THREAD_SLICE)
3444         s->threads_number = avctx->thread_count;
3445     else
3446         s->threads_number = 1;
3447
3448     if (avctx->extradata_size > 0 && avctx->extradata) {
3449         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3450         if (ret < 0) {
3451             hevc_decode_free(avctx);
3452             return ret;
3453         }
3454     }
3455
3456     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3457             s->threads_type = FF_THREAD_FRAME;
3458         else
3459             s->threads_type = FF_THREAD_SLICE;
3460
3461     return 0;
3462 }
3463
3464 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3465 {
3466     HEVCContext *s = avctx->priv_data;
3467     int ret;
3468
3469     memset(s, 0, sizeof(*s));
3470
3471     ret = hevc_init_context(avctx);
3472     if (ret < 0)
3473         return ret;
3474
3475     return 0;
3476 }
3477
3478 static void hevc_decode_flush(AVCodecContext *avctx)
3479 {
3480     HEVCContext *s = avctx->priv_data;
3481     ff_hevc_flush_dpb(s);
3482     s->max_ra = INT_MAX;
3483     s->eos = 1;
3484 }
3485
3486 #define OFFSET(x) offsetof(HEVCContext, x)
3487 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3488
3489 static const AVOption options[] = {
3490     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3491         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3492     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3493         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3494     { NULL },
3495 };
3496
3497 static const AVClass hevc_decoder_class = {
3498     .class_name = "HEVC decoder",
3499     .item_name  = av_default_item_name,
3500     .option     = options,
3501     .version    = LIBAVUTIL_VERSION_INT,
3502 };
3503
3504 AVCodec ff_hevc_decoder = {
3505     .name                  = "hevc",
3506     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3507     .type                  = AVMEDIA_TYPE_VIDEO,
3508     .id                    = AV_CODEC_ID_HEVC,
3509     .priv_data_size        = sizeof(HEVCContext),
3510     .priv_class            = &hevc_decoder_class,
3511     .init                  = hevc_decode_init,
3512     .close                 = hevc_decode_free,
3513     .decode                = hevc_decode_frame,
3514     .flush                 = hevc_decode_flush,
3515     .update_thread_context = hevc_update_thread_context,
3516     .init_thread_copy      = hevc_init_thread_copy,
3517     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3518                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3519     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3520     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3521     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3522 #if CONFIG_HEVC_DXVA2_HWACCEL
3523                                HWACCEL_DXVA2(hevc),
3524 #endif
3525 #if CONFIG_HEVC_D3D11VA_HWACCEL
3526                                HWACCEL_D3D11VA(hevc),
3527 #endif
3528 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3529                                HWACCEL_D3D11VA2(hevc),
3530 #endif
3531 #if CONFIG_HEVC_NVDEC_HWACCEL
3532                                HWACCEL_NVDEC(hevc),
3533 #endif
3534 #if CONFIG_HEVC_VAAPI_HWACCEL
3535                                HWACCEL_VAAPI(hevc),
3536 #endif
3537 #if CONFIG_HEVC_VDPAU_HWACCEL
3538                                HWACCEL_VDPAU(hevc),
3539 #endif
3540 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3541                                HWACCEL_VIDEOTOOLBOX(hevc),
3542 #endif
3543                                NULL
3544                            },
3545 };