]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
avformat/argo_asf: initialise file header inline
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/mastering_display_metadata.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
35 #include "libavutil/timecode.h"
36
37 #include "bswapdsp.h"
38 #include "bytestream.h"
39 #include "cabac_functions.h"
40 #include "golomb.h"
41 #include "hevc.h"
42 #include "hevc_data.h"
43 #include "hevc_parse.h"
44 #include "hevcdec.h"
45 #include "hwconfig.h"
46 #include "profiles.h"
47
48 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
49
50 /**
51  * NOTE: Each function hls_foo correspond to the function foo in the
52  * specification (HLS stands for High Level Syntax).
53  */
54
55 /**
56  * Section 5.7
57  */
58
59 /* free everything allocated  by pic_arrays_init() */
60 static void pic_arrays_free(HEVCContext *s)
61 {
62     av_freep(&s->sao);
63     av_freep(&s->deblock);
64
65     av_freep(&s->skip_flag);
66     av_freep(&s->tab_ct_depth);
67
68     av_freep(&s->tab_ipm);
69     av_freep(&s->cbf_luma);
70     av_freep(&s->is_pcm);
71
72     av_freep(&s->qp_y_tab);
73     av_freep(&s->tab_slice_address);
74     av_freep(&s->filter_slice_edges);
75
76     av_freep(&s->horizontal_bs);
77     av_freep(&s->vertical_bs);
78
79     av_freep(&s->sh.entry_point_offset);
80     av_freep(&s->sh.size);
81     av_freep(&s->sh.offset);
82
83     av_buffer_pool_uninit(&s->tab_mvf_pool);
84     av_buffer_pool_uninit(&s->rpl_tab_pool);
85 }
86
87 /* allocate arrays that depend on frame dimensions */
88 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
89 {
90     int log2_min_cb_size = sps->log2_min_cb_size;
91     int width            = sps->width;
92     int height           = sps->height;
93     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
94                            ((height >> log2_min_cb_size) + 1);
95     int ctb_count        = sps->ctb_width * sps->ctb_height;
96     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
97
98     s->bs_width  = (width  >> 2) + 1;
99     s->bs_height = (height >> 2) + 1;
100
101     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
102     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
103     if (!s->sao || !s->deblock)
104         goto fail;
105
106     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
108     if (!s->skip_flag || !s->tab_ct_depth)
109         goto fail;
110
111     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
112     s->tab_ipm  = av_mallocz(min_pu_size);
113     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
114     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
115         goto fail;
116
117     s->filter_slice_edges = av_mallocz(ctb_count);
118     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
119                                       sizeof(*s->tab_slice_address));
120     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
121                                       sizeof(*s->qp_y_tab));
122     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
123         goto fail;
124
125     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
126     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
127     if (!s->horizontal_bs || !s->vertical_bs)
128         goto fail;
129
130     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
131                                           av_buffer_allocz);
132     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
133                                           av_buffer_allocz);
134     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135         goto fail;
136
137     return 0;
138
139 fail:
140     pic_arrays_free(s);
141     return AVERROR(ENOMEM);
142 }
143
144 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
145 {
146     int i = 0;
147     int j = 0;
148     uint8_t luma_weight_l0_flag[16];
149     uint8_t chroma_weight_l0_flag[16];
150     uint8_t luma_weight_l1_flag[16];
151     uint8_t chroma_weight_l1_flag[16];
152     int luma_log2_weight_denom;
153
154     luma_log2_weight_denom = get_ue_golomb_long(gb);
155     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
156         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
157         return AVERROR_INVALIDDATA;
158     }
159     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
160     if (s->ps.sps->chroma_format_idc != 0) {
161         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
162         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
163             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
164             return AVERROR_INVALIDDATA;
165         }
166         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
167     }
168
169     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
170         luma_weight_l0_flag[i] = get_bits1(gb);
171         if (!luma_weight_l0_flag[i]) {
172             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
173             s->sh.luma_offset_l0[i] = 0;
174         }
175     }
176     if (s->ps.sps->chroma_format_idc != 0) {
177         for (i = 0; i < s->sh.nb_refs[L0]; i++)
178             chroma_weight_l0_flag[i] = get_bits1(gb);
179     } else {
180         for (i = 0; i < s->sh.nb_refs[L0]; i++)
181             chroma_weight_l0_flag[i] = 0;
182     }
183     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
184         if (luma_weight_l0_flag[i]) {
185             int delta_luma_weight_l0 = get_se_golomb(gb);
186             if ((int8_t)delta_luma_weight_l0 != delta_luma_weight_l0)
187                 return AVERROR_INVALIDDATA;
188             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
189             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
190         }
191         if (chroma_weight_l0_flag[i]) {
192             for (j = 0; j < 2; j++) {
193                 int delta_chroma_weight_l0 = get_se_golomb(gb);
194                 int delta_chroma_offset_l0 = get_se_golomb(gb);
195
196                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
197                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
198                     return AVERROR_INVALIDDATA;
199                 }
200
201                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
202                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
203                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
204             }
205         } else {
206             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
207             s->sh.chroma_offset_l0[i][0] = 0;
208             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
209             s->sh.chroma_offset_l0[i][1] = 0;
210         }
211     }
212     if (s->sh.slice_type == HEVC_SLICE_B) {
213         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
214             luma_weight_l1_flag[i] = get_bits1(gb);
215             if (!luma_weight_l1_flag[i]) {
216                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
217                 s->sh.luma_offset_l1[i] = 0;
218             }
219         }
220         if (s->ps.sps->chroma_format_idc != 0) {
221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
222                 chroma_weight_l1_flag[i] = get_bits1(gb);
223         } else {
224             for (i = 0; i < s->sh.nb_refs[L1]; i++)
225                 chroma_weight_l1_flag[i] = 0;
226         }
227         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
228             if (luma_weight_l1_flag[i]) {
229                 int delta_luma_weight_l1 = get_se_golomb(gb);
230                 if ((int8_t)delta_luma_weight_l1 != delta_luma_weight_l1)
231                     return AVERROR_INVALIDDATA;
232                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
233                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
234             }
235             if (chroma_weight_l1_flag[i]) {
236                 for (j = 0; j < 2; j++) {
237                     int delta_chroma_weight_l1 = get_se_golomb(gb);
238                     int delta_chroma_offset_l1 = get_se_golomb(gb);
239
240                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
241                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
242                         return AVERROR_INVALIDDATA;
243                     }
244
245                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
246                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
247                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
248                 }
249             } else {
250                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
251                 s->sh.chroma_offset_l1[i][0] = 0;
252                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
253                 s->sh.chroma_offset_l1[i][1] = 0;
254             }
255         }
256     }
257     return 0;
258 }
259
260 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
261 {
262     const HEVCSPS *sps = s->ps.sps;
263     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
264     int prev_delta_msb = 0;
265     unsigned int nb_sps = 0, nb_sh;
266     int i;
267
268     rps->nb_refs = 0;
269     if (!sps->long_term_ref_pics_present_flag)
270         return 0;
271
272     if (sps->num_long_term_ref_pics_sps > 0)
273         nb_sps = get_ue_golomb_long(gb);
274     nb_sh = get_ue_golomb_long(gb);
275
276     if (nb_sps > sps->num_long_term_ref_pics_sps)
277         return AVERROR_INVALIDDATA;
278     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
279         return AVERROR_INVALIDDATA;
280
281     rps->nb_refs = nb_sh + nb_sps;
282
283     for (i = 0; i < rps->nb_refs; i++) {
284
285         if (i < nb_sps) {
286             uint8_t lt_idx_sps = 0;
287
288             if (sps->num_long_term_ref_pics_sps > 1)
289                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
290
291             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
292             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
293         } else {
294             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
295             rps->used[i] = get_bits1(gb);
296         }
297
298         rps->poc_msb_present[i] = get_bits1(gb);
299         if (rps->poc_msb_present[i]) {
300             int64_t delta = get_ue_golomb_long(gb);
301             int64_t poc;
302
303             if (i && i != nb_sps)
304                 delta += prev_delta_msb;
305
306             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
307             if (poc != (int32_t)poc)
308                 return AVERROR_INVALIDDATA;
309             rps->poc[i] = poc;
310             prev_delta_msb = delta;
311         }
312     }
313
314     return 0;
315 }
316
317 static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
318 {
319     AVCodecContext *avctx = s->avctx;
320     const HEVCParamSets *ps = &s->ps;
321     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
322     const HEVCWindow *ow = &sps->output_window;
323     unsigned int num = 0, den = 0;
324
325     avctx->pix_fmt             = sps->pix_fmt;
326     avctx->coded_width         = sps->width;
327     avctx->coded_height        = sps->height;
328     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
329     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
330     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
331     avctx->profile             = sps->ptl.general_ptl.profile_idc;
332     avctx->level               = sps->ptl.general_ptl.level_idc;
333
334     ff_set_sar(avctx, sps->vui.sar);
335
336     if (sps->vui.video_signal_type_present_flag)
337         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
338                                                             : AVCOL_RANGE_MPEG;
339     else
340         avctx->color_range = AVCOL_RANGE_MPEG;
341
342     if (sps->vui.colour_description_present_flag) {
343         avctx->color_primaries = sps->vui.colour_primaries;
344         avctx->color_trc       = sps->vui.transfer_characteristic;
345         avctx->colorspace      = sps->vui.matrix_coeffs;
346     } else {
347         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
348         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
349         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
350     }
351
352     avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
353     if (sps->chroma_format_idc == 1) {
354         if (sps->vui.chroma_loc_info_present_flag) {
355             if (sps->vui.chroma_sample_loc_type_top_field <= 5)
356                 avctx->chroma_sample_location = sps->vui.chroma_sample_loc_type_top_field + 1;
357         } else
358             avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
359     }
360
361     if (vps->vps_timing_info_present_flag) {
362         num = vps->vps_num_units_in_tick;
363         den = vps->vps_time_scale;
364     } else if (sps->vui.vui_timing_info_present_flag) {
365         num = sps->vui.vui_num_units_in_tick;
366         den = sps->vui.vui_time_scale;
367     }
368
369     if (num != 0 && den != 0)
370         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
371                   num, den, 1 << 30);
372
373     if (s->sei.alternative_transfer.present &&
374         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
375         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
376         avctx->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
377     }
378 }
379
380 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
381 {
382 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
383                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
384                      CONFIG_HEVC_NVDEC_HWACCEL + \
385                      CONFIG_HEVC_VAAPI_HWACCEL + \
386                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
387                      CONFIG_HEVC_VDPAU_HWACCEL)
388     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
389
390     switch (sps->pix_fmt) {
391     case AV_PIX_FMT_YUV420P:
392     case AV_PIX_FMT_YUVJ420P:
393 #if CONFIG_HEVC_DXVA2_HWACCEL
394         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
395 #endif
396 #if CONFIG_HEVC_D3D11VA_HWACCEL
397         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
398         *fmt++ = AV_PIX_FMT_D3D11;
399 #endif
400 #if CONFIG_HEVC_VAAPI_HWACCEL
401         *fmt++ = AV_PIX_FMT_VAAPI;
402 #endif
403 #if CONFIG_HEVC_VDPAU_HWACCEL
404         *fmt++ = AV_PIX_FMT_VDPAU;
405 #endif
406 #if CONFIG_HEVC_NVDEC_HWACCEL
407         *fmt++ = AV_PIX_FMT_CUDA;
408 #endif
409 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
410         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
411 #endif
412         break;
413     case AV_PIX_FMT_YUV420P10:
414 #if CONFIG_HEVC_DXVA2_HWACCEL
415         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
416 #endif
417 #if CONFIG_HEVC_D3D11VA_HWACCEL
418         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
419         *fmt++ = AV_PIX_FMT_D3D11;
420 #endif
421 #if CONFIG_HEVC_VAAPI_HWACCEL
422         *fmt++ = AV_PIX_FMT_VAAPI;
423 #endif
424 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
425         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
426 #endif
427 #if CONFIG_HEVC_VDPAU_HWACCEL
428         *fmt++ = AV_PIX_FMT_VDPAU;
429 #endif
430 #if CONFIG_HEVC_NVDEC_HWACCEL
431         *fmt++ = AV_PIX_FMT_CUDA;
432 #endif
433         break;
434     case AV_PIX_FMT_YUV444P:
435 #if CONFIG_HEVC_VDPAU_HWACCEL
436         *fmt++ = AV_PIX_FMT_VDPAU;
437 #endif
438 #if CONFIG_HEVC_NVDEC_HWACCEL
439         *fmt++ = AV_PIX_FMT_CUDA;
440 #endif
441         break;
442     case AV_PIX_FMT_YUV422P:
443     case AV_PIX_FMT_YUV422P10LE:
444 #if CONFIG_HEVC_VAAPI_HWACCEL
445        *fmt++ = AV_PIX_FMT_VAAPI;
446 #endif
447         break;
448     case AV_PIX_FMT_YUV420P12:
449     case AV_PIX_FMT_YUV444P10:
450     case AV_PIX_FMT_YUV444P12:
451 #if CONFIG_HEVC_VDPAU_HWACCEL
452         *fmt++ = AV_PIX_FMT_VDPAU;
453 #endif
454 #if CONFIG_HEVC_NVDEC_HWACCEL
455         *fmt++ = AV_PIX_FMT_CUDA;
456 #endif
457         break;
458     }
459
460     *fmt++ = sps->pix_fmt;
461     *fmt = AV_PIX_FMT_NONE;
462
463     return ff_thread_get_format(s->avctx, pix_fmts);
464 }
465
466 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
467                    enum AVPixelFormat pix_fmt)
468 {
469     int ret, i;
470
471     pic_arrays_free(s);
472     s->ps.sps = NULL;
473     s->ps.vps = NULL;
474
475     if (!sps)
476         return 0;
477
478     ret = pic_arrays_init(s, sps);
479     if (ret < 0)
480         goto fail;
481
482     export_stream_params(s, sps);
483
484     s->avctx->pix_fmt = pix_fmt;
485
486     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
487     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
488     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
489
490     for (i = 0; i < 3; i++) {
491         av_freep(&s->sao_pixel_buffer_h[i]);
492         av_freep(&s->sao_pixel_buffer_v[i]);
493     }
494
495     if (sps->sao_enabled && !s->avctx->hwaccel) {
496         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
497         int c_idx;
498
499         for(c_idx = 0; c_idx < c_count; c_idx++) {
500             int w = sps->width >> sps->hshift[c_idx];
501             int h = sps->height >> sps->vshift[c_idx];
502             s->sao_pixel_buffer_h[c_idx] =
503                 av_malloc((w * 2 * sps->ctb_height) <<
504                           sps->pixel_shift);
505             s->sao_pixel_buffer_v[c_idx] =
506                 av_malloc((h * 2 * sps->ctb_width) <<
507                           sps->pixel_shift);
508         }
509     }
510
511     s->ps.sps = sps;
512     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
513
514     return 0;
515
516 fail:
517     pic_arrays_free(s);
518     s->ps.sps = NULL;
519     return ret;
520 }
521
522 static int hls_slice_header(HEVCContext *s)
523 {
524     GetBitContext *gb = &s->HEVClc->gb;
525     SliceHeader *sh   = &s->sh;
526     int i, ret;
527
528     // Coded parameters
529     sh->first_slice_in_pic_flag = get_bits1(gb);
530     if (s->ref && sh->first_slice_in_pic_flag) {
531         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
532         return 1; // This slice will be skipped later, do not corrupt state
533     }
534
535     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
536         s->seq_decode = (s->seq_decode + 1) & 0xff;
537         s->max_ra     = INT_MAX;
538         if (IS_IDR(s))
539             ff_hevc_clear_refs(s);
540     }
541     sh->no_output_of_prior_pics_flag = 0;
542     if (IS_IRAP(s))
543         sh->no_output_of_prior_pics_flag = get_bits1(gb);
544
545     sh->pps_id = get_ue_golomb_long(gb);
546     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
547         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
548         return AVERROR_INVALIDDATA;
549     }
550     if (!sh->first_slice_in_pic_flag &&
551         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
552         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
553         return AVERROR_INVALIDDATA;
554     }
555     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
556     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
557         sh->no_output_of_prior_pics_flag = 1;
558
559     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
560         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
561         const HEVCSPS *last_sps = s->ps.sps;
562         enum AVPixelFormat pix_fmt;
563
564         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
565             if (sps->width != last_sps->width || sps->height != last_sps->height ||
566                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
567                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
568                 sh->no_output_of_prior_pics_flag = 0;
569         }
570         ff_hevc_clear_refs(s);
571
572         ret = set_sps(s, sps, sps->pix_fmt);
573         if (ret < 0)
574             return ret;
575
576         pix_fmt = get_format(s, sps);
577         if (pix_fmt < 0)
578             return pix_fmt;
579         s->avctx->pix_fmt = pix_fmt;
580
581         s->seq_decode = (s->seq_decode + 1) & 0xff;
582         s->max_ra     = INT_MAX;
583     }
584
585     sh->dependent_slice_segment_flag = 0;
586     if (!sh->first_slice_in_pic_flag) {
587         int slice_address_length;
588
589         if (s->ps.pps->dependent_slice_segments_enabled_flag)
590             sh->dependent_slice_segment_flag = get_bits1(gb);
591
592         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
593                                             s->ps.sps->ctb_height);
594         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
595         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
596             av_log(s->avctx, AV_LOG_ERROR,
597                    "Invalid slice segment address: %u.\n",
598                    sh->slice_segment_addr);
599             return AVERROR_INVALIDDATA;
600         }
601
602         if (!sh->dependent_slice_segment_flag) {
603             sh->slice_addr = sh->slice_segment_addr;
604             s->slice_idx++;
605         }
606     } else {
607         sh->slice_segment_addr = sh->slice_addr = 0;
608         s->slice_idx           = 0;
609         s->slice_initialized   = 0;
610     }
611
612     if (!sh->dependent_slice_segment_flag) {
613         s->slice_initialized = 0;
614
615         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
616             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
617
618         sh->slice_type = get_ue_golomb_long(gb);
619         if (!(sh->slice_type == HEVC_SLICE_I ||
620               sh->slice_type == HEVC_SLICE_P ||
621               sh->slice_type == HEVC_SLICE_B)) {
622             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
623                    sh->slice_type);
624             return AVERROR_INVALIDDATA;
625         }
626         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
627             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
628             return AVERROR_INVALIDDATA;
629         }
630
631         // when flag is not present, picture is inferred to be output
632         sh->pic_output_flag = 1;
633         if (s->ps.pps->output_flag_present_flag)
634             sh->pic_output_flag = get_bits1(gb);
635
636         if (s->ps.sps->separate_colour_plane_flag)
637             sh->colour_plane_id = get_bits(gb, 2);
638
639         if (!IS_IDR(s)) {
640             int poc, pos;
641
642             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
643             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
644             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
645                 av_log(s->avctx, AV_LOG_WARNING,
646                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
647                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
648                     return AVERROR_INVALIDDATA;
649                 poc = s->poc;
650             }
651             s->poc = poc;
652
653             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
654             pos = get_bits_left(gb);
655             if (!sh->short_term_ref_pic_set_sps_flag) {
656                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
657                 if (ret < 0)
658                     return ret;
659
660                 sh->short_term_rps = &sh->slice_rps;
661             } else {
662                 int numbits, rps_idx;
663
664                 if (!s->ps.sps->nb_st_rps) {
665                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
666                     return AVERROR_INVALIDDATA;
667                 }
668
669                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
670                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
671                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
672             }
673             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
674
675             pos = get_bits_left(gb);
676             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
677             if (ret < 0) {
678                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
679                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
680                     return AVERROR_INVALIDDATA;
681             }
682             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
683
684             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
685                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
686             else
687                 sh->slice_temporal_mvp_enabled_flag = 0;
688         } else {
689             s->sh.short_term_rps = NULL;
690             s->poc               = 0;
691         }
692
693         /* 8.3.1 */
694         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
695             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
696             s->nal_unit_type != HEVC_NAL_TSA_N   &&
697             s->nal_unit_type != HEVC_NAL_STSA_N  &&
698             s->nal_unit_type != HEVC_NAL_RADL_N  &&
699             s->nal_unit_type != HEVC_NAL_RADL_R  &&
700             s->nal_unit_type != HEVC_NAL_RASL_N  &&
701             s->nal_unit_type != HEVC_NAL_RASL_R)
702             s->pocTid0 = s->poc;
703
704         if (s->ps.sps->sao_enabled) {
705             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
706             if (s->ps.sps->chroma_format_idc) {
707                 sh->slice_sample_adaptive_offset_flag[1] =
708                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
709             }
710         } else {
711             sh->slice_sample_adaptive_offset_flag[0] = 0;
712             sh->slice_sample_adaptive_offset_flag[1] = 0;
713             sh->slice_sample_adaptive_offset_flag[2] = 0;
714         }
715
716         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
717         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
718             int nb_refs;
719
720             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
721             if (sh->slice_type == HEVC_SLICE_B)
722                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
723
724             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
725                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
726                 if (sh->slice_type == HEVC_SLICE_B)
727                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
728             }
729             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
730                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
731                        sh->nb_refs[L0], sh->nb_refs[L1]);
732                 return AVERROR_INVALIDDATA;
733             }
734
735             sh->rpl_modification_flag[0] = 0;
736             sh->rpl_modification_flag[1] = 0;
737             nb_refs = ff_hevc_frame_nb_refs(s);
738             if (!nb_refs) {
739                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
740                 return AVERROR_INVALIDDATA;
741             }
742
743             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
744                 sh->rpl_modification_flag[0] = get_bits1(gb);
745                 if (sh->rpl_modification_flag[0]) {
746                     for (i = 0; i < sh->nb_refs[L0]; i++)
747                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
748                 }
749
750                 if (sh->slice_type == HEVC_SLICE_B) {
751                     sh->rpl_modification_flag[1] = get_bits1(gb);
752                     if (sh->rpl_modification_flag[1] == 1)
753                         for (i = 0; i < sh->nb_refs[L1]; i++)
754                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
755                 }
756             }
757
758             if (sh->slice_type == HEVC_SLICE_B)
759                 sh->mvd_l1_zero_flag = get_bits1(gb);
760
761             if (s->ps.pps->cabac_init_present_flag)
762                 sh->cabac_init_flag = get_bits1(gb);
763             else
764                 sh->cabac_init_flag = 0;
765
766             sh->collocated_ref_idx = 0;
767             if (sh->slice_temporal_mvp_enabled_flag) {
768                 sh->collocated_list = L0;
769                 if (sh->slice_type == HEVC_SLICE_B)
770                     sh->collocated_list = !get_bits1(gb);
771
772                 if (sh->nb_refs[sh->collocated_list] > 1) {
773                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
774                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
775                         av_log(s->avctx, AV_LOG_ERROR,
776                                "Invalid collocated_ref_idx: %d.\n",
777                                sh->collocated_ref_idx);
778                         return AVERROR_INVALIDDATA;
779                     }
780                 }
781             }
782
783             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
784                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
785                 int ret = pred_weight_table(s, gb);
786                 if (ret < 0)
787                     return ret;
788             }
789
790             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
791             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
792                 av_log(s->avctx, AV_LOG_ERROR,
793                        "Invalid number of merging MVP candidates: %d.\n",
794                        sh->max_num_merge_cand);
795                 return AVERROR_INVALIDDATA;
796             }
797         }
798
799         sh->slice_qp_delta = get_se_golomb(gb);
800
801         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
802             sh->slice_cb_qp_offset = get_se_golomb(gb);
803             sh->slice_cr_qp_offset = get_se_golomb(gb);
804         } else {
805             sh->slice_cb_qp_offset = 0;
806             sh->slice_cr_qp_offset = 0;
807         }
808
809         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
810             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
811         else
812             sh->cu_chroma_qp_offset_enabled_flag = 0;
813
814         if (s->ps.pps->deblocking_filter_control_present_flag) {
815             int deblocking_filter_override_flag = 0;
816
817             if (s->ps.pps->deblocking_filter_override_enabled_flag)
818                 deblocking_filter_override_flag = get_bits1(gb);
819
820             if (deblocking_filter_override_flag) {
821                 sh->disable_deblocking_filter_flag = get_bits1(gb);
822                 if (!sh->disable_deblocking_filter_flag) {
823                     int beta_offset_div2 = get_se_golomb(gb);
824                     int tc_offset_div2   = get_se_golomb(gb) ;
825                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
826                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
827                         av_log(s->avctx, AV_LOG_ERROR,
828                             "Invalid deblock filter offsets: %d, %d\n",
829                             beta_offset_div2, tc_offset_div2);
830                         return AVERROR_INVALIDDATA;
831                     }
832                     sh->beta_offset = beta_offset_div2 * 2;
833                     sh->tc_offset   =   tc_offset_div2 * 2;
834                 }
835             } else {
836                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
837                 sh->beta_offset                    = s->ps.pps->beta_offset;
838                 sh->tc_offset                      = s->ps.pps->tc_offset;
839             }
840         } else {
841             sh->disable_deblocking_filter_flag = 0;
842             sh->beta_offset                    = 0;
843             sh->tc_offset                      = 0;
844         }
845
846         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
847             (sh->slice_sample_adaptive_offset_flag[0] ||
848              sh->slice_sample_adaptive_offset_flag[1] ||
849              !sh->disable_deblocking_filter_flag)) {
850             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
851         } else {
852             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
853         }
854     } else if (!s->slice_initialized) {
855         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
856         return AVERROR_INVALIDDATA;
857     }
858
859     sh->num_entry_point_offsets = 0;
860     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
861         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
862         // It would be possible to bound this tighter but this here is simpler
863         if (num_entry_point_offsets > get_bits_left(gb)) {
864             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
865             return AVERROR_INVALIDDATA;
866         }
867
868         sh->num_entry_point_offsets = num_entry_point_offsets;
869         if (sh->num_entry_point_offsets > 0) {
870             int offset_len = get_ue_golomb_long(gb) + 1;
871
872             if (offset_len < 1 || offset_len > 32) {
873                 sh->num_entry_point_offsets = 0;
874                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
875                 return AVERROR_INVALIDDATA;
876             }
877
878             av_freep(&sh->entry_point_offset);
879             av_freep(&sh->offset);
880             av_freep(&sh->size);
881             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
882             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
883             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
884             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
885                 sh->num_entry_point_offsets = 0;
886                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
887                 return AVERROR(ENOMEM);
888             }
889             for (i = 0; i < sh->num_entry_point_offsets; i++) {
890                 unsigned val = get_bits_long(gb, offset_len);
891                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
892             }
893             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
894                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
895                 s->threads_number = 1;
896             } else
897                 s->enable_parallel_tiles = 0;
898         } else
899             s->enable_parallel_tiles = 0;
900     }
901
902     if (s->ps.pps->slice_header_extension_present_flag) {
903         unsigned int length = get_ue_golomb_long(gb);
904         if (length*8LL > get_bits_left(gb)) {
905             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
906             return AVERROR_INVALIDDATA;
907         }
908         for (i = 0; i < length; i++)
909             skip_bits(gb, 8);  // slice_header_extension_data_byte
910     }
911
912     // Inferred parameters
913     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
914     if (sh->slice_qp > 51 ||
915         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
916         av_log(s->avctx, AV_LOG_ERROR,
917                "The slice_qp %d is outside the valid range "
918                "[%d, 51].\n",
919                sh->slice_qp,
920                -s->ps.sps->qp_bd_offset);
921         return AVERROR_INVALIDDATA;
922     }
923
924     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
925
926     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
927         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
928         return AVERROR_INVALIDDATA;
929     }
930
931     if (get_bits_left(gb) < 0) {
932         av_log(s->avctx, AV_LOG_ERROR,
933                "Overread slice header by %d bits\n", -get_bits_left(gb));
934         return AVERROR_INVALIDDATA;
935     }
936
937     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
938
939     if (!s->ps.pps->cu_qp_delta_enabled_flag)
940         s->HEVClc->qp_y = s->sh.slice_qp;
941
942     s->slice_initialized = 1;
943     s->HEVClc->tu.cu_qp_offset_cb = 0;
944     s->HEVClc->tu.cu_qp_offset_cr = 0;
945
946     return 0;
947 }
948
949 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
950
951 #define SET_SAO(elem, value)                            \
952 do {                                                    \
953     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
954         sao->elem = value;                              \
955     else if (sao_merge_left_flag)                       \
956         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
957     else if (sao_merge_up_flag)                         \
958         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
959     else                                                \
960         sao->elem = 0;                                  \
961 } while (0)
962
963 static void hls_sao_param(HEVCContext *s, int rx, int ry)
964 {
965     HEVCLocalContext *lc    = s->HEVClc;
966     int sao_merge_left_flag = 0;
967     int sao_merge_up_flag   = 0;
968     SAOParams *sao          = &CTB(s->sao, rx, ry);
969     int c_idx, i;
970
971     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
972         s->sh.slice_sample_adaptive_offset_flag[1]) {
973         if (rx > 0) {
974             if (lc->ctb_left_flag)
975                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
976         }
977         if (ry > 0 && !sao_merge_left_flag) {
978             if (lc->ctb_up_flag)
979                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
980         }
981     }
982
983     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
984         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
985                                                  s->ps.pps->log2_sao_offset_scale_chroma;
986
987         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
988             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
989             continue;
990         }
991
992         if (c_idx == 2) {
993             sao->type_idx[2] = sao->type_idx[1];
994             sao->eo_class[2] = sao->eo_class[1];
995         } else {
996             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
997         }
998
999         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
1000             continue;
1001
1002         for (i = 0; i < 4; i++)
1003             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
1004
1005         if (sao->type_idx[c_idx] == SAO_BAND) {
1006             for (i = 0; i < 4; i++) {
1007                 if (sao->offset_abs[c_idx][i]) {
1008                     SET_SAO(offset_sign[c_idx][i],
1009                             ff_hevc_sao_offset_sign_decode(s));
1010                 } else {
1011                     sao->offset_sign[c_idx][i] = 0;
1012                 }
1013             }
1014             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
1015         } else if (c_idx != 2) {
1016             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
1017         }
1018
1019         // Inferred parameters
1020         sao->offset_val[c_idx][0] = 0;
1021         for (i = 0; i < 4; i++) {
1022             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
1023             if (sao->type_idx[c_idx] == SAO_EDGE) {
1024                 if (i > 1)
1025                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1026             } else if (sao->offset_sign[c_idx][i]) {
1027                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1028             }
1029             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
1030         }
1031     }
1032 }
1033
1034 #undef SET_SAO
1035 #undef CTB
1036
1037 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1038     HEVCLocalContext *lc    = s->HEVClc;
1039     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1040
1041     if (log2_res_scale_abs_plus1 !=  0) {
1042         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1043         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1044                                (1 - 2 * res_scale_sign_flag);
1045     } else {
1046         lc->tu.res_scale_val = 0;
1047     }
1048
1049
1050     return 0;
1051 }
1052
1053 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1054                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1055                               int log2_cb_size, int log2_trafo_size,
1056                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1057 {
1058     HEVCLocalContext *lc = s->HEVClc;
1059     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1060     int i;
1061
1062     if (lc->cu.pred_mode == MODE_INTRA) {
1063         int trafo_size = 1 << log2_trafo_size;
1064         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1065
1066         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1067     }
1068
1069     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1070         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1071         int scan_idx   = SCAN_DIAG;
1072         int scan_idx_c = SCAN_DIAG;
1073         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1074                          (s->ps.sps->chroma_format_idc == 2 &&
1075                          (cbf_cb[1] || cbf_cr[1]));
1076
1077         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1078             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1079             if (lc->tu.cu_qp_delta != 0)
1080                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1081                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1082             lc->tu.is_cu_qp_delta_coded = 1;
1083
1084             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1085                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1086                 av_log(s->avctx, AV_LOG_ERROR,
1087                        "The cu_qp_delta %d is outside the valid range "
1088                        "[%d, %d].\n",
1089                        lc->tu.cu_qp_delta,
1090                        -(26 + s->ps.sps->qp_bd_offset / 2),
1091                         (25 + s->ps.sps->qp_bd_offset / 2));
1092                 return AVERROR_INVALIDDATA;
1093             }
1094
1095             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1096         }
1097
1098         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1099             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1100             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1101             if (cu_chroma_qp_offset_flag) {
1102                 int cu_chroma_qp_offset_idx  = 0;
1103                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1104                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1105                     av_log(s->avctx, AV_LOG_ERROR,
1106                         "cu_chroma_qp_offset_idx not yet tested.\n");
1107                 }
1108                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1109                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1110             } else {
1111                 lc->tu.cu_qp_offset_cb = 0;
1112                 lc->tu.cu_qp_offset_cr = 0;
1113             }
1114             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1115         }
1116
1117         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1118             if (lc->tu.intra_pred_mode >= 6 &&
1119                 lc->tu.intra_pred_mode <= 14) {
1120                 scan_idx = SCAN_VERT;
1121             } else if (lc->tu.intra_pred_mode >= 22 &&
1122                        lc->tu.intra_pred_mode <= 30) {
1123                 scan_idx = SCAN_HORIZ;
1124             }
1125
1126             if (lc->tu.intra_pred_mode_c >=  6 &&
1127                 lc->tu.intra_pred_mode_c <= 14) {
1128                 scan_idx_c = SCAN_VERT;
1129             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1130                        lc->tu.intra_pred_mode_c <= 30) {
1131                 scan_idx_c = SCAN_HORIZ;
1132             }
1133         }
1134
1135         lc->tu.cross_pf = 0;
1136
1137         if (cbf_luma)
1138             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1139         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1140             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1141             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1142             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1143                                 (lc->cu.pred_mode == MODE_INTER ||
1144                                  (lc->tu.chroma_mode_c ==  4)));
1145
1146             if (lc->tu.cross_pf) {
1147                 hls_cross_component_pred(s, 0);
1148             }
1149             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1150                 if (lc->cu.pred_mode == MODE_INTRA) {
1151                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1152                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1153                 }
1154                 if (cbf_cb[i])
1155                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1156                                                 log2_trafo_size_c, scan_idx_c, 1);
1157                 else
1158                     if (lc->tu.cross_pf) {
1159                         ptrdiff_t stride = s->frame->linesize[1];
1160                         int hshift = s->ps.sps->hshift[1];
1161                         int vshift = s->ps.sps->vshift[1];
1162                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1163                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1164                         int size = 1 << log2_trafo_size_c;
1165
1166                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1167                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1168                         for (i = 0; i < (size * size); i++) {
1169                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1170                         }
1171                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1172                     }
1173             }
1174
1175             if (lc->tu.cross_pf) {
1176                 hls_cross_component_pred(s, 1);
1177             }
1178             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1179                 if (lc->cu.pred_mode == MODE_INTRA) {
1180                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1181                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1182                 }
1183                 if (cbf_cr[i])
1184                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1185                                                 log2_trafo_size_c, scan_idx_c, 2);
1186                 else
1187                     if (lc->tu.cross_pf) {
1188                         ptrdiff_t stride = s->frame->linesize[2];
1189                         int hshift = s->ps.sps->hshift[2];
1190                         int vshift = s->ps.sps->vshift[2];
1191                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1192                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1193                         int size = 1 << log2_trafo_size_c;
1194
1195                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1196                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1197                         for (i = 0; i < (size * size); i++) {
1198                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1199                         }
1200                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1201                     }
1202             }
1203         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1204             int trafo_size_h = 1 << (log2_trafo_size + 1);
1205             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1206             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1207                 if (lc->cu.pred_mode == MODE_INTRA) {
1208                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1209                                                     trafo_size_h, trafo_size_v);
1210                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1211                 }
1212                 if (cbf_cb[i])
1213                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1214                                                 log2_trafo_size, scan_idx_c, 1);
1215             }
1216             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1217                 if (lc->cu.pred_mode == MODE_INTRA) {
1218                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1219                                                 trafo_size_h, trafo_size_v);
1220                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1221                 }
1222                 if (cbf_cr[i])
1223                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1224                                                 log2_trafo_size, scan_idx_c, 2);
1225             }
1226         }
1227     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1228         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1229             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1230             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1231             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1232             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1233             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1234             if (s->ps.sps->chroma_format_idc == 2) {
1235                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1236                                                 trafo_size_h, trafo_size_v);
1237                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1238                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1239             }
1240         } else if (blk_idx == 3) {
1241             int trafo_size_h = 1 << (log2_trafo_size + 1);
1242             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1243             ff_hevc_set_neighbour_available(s, xBase, yBase,
1244                                             trafo_size_h, trafo_size_v);
1245             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1246             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1247             if (s->ps.sps->chroma_format_idc == 2) {
1248                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1249                                                 trafo_size_h, trafo_size_v);
1250                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1251                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1252             }
1253         }
1254     }
1255
1256     return 0;
1257 }
1258
1259 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1260 {
1261     int cb_size          = 1 << log2_cb_size;
1262     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1263
1264     int min_pu_width     = s->ps.sps->min_pu_width;
1265     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1266     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1267     int i, j;
1268
1269     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1270         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1271             s->is_pcm[i + j * min_pu_width] = 2;
1272 }
1273
1274 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1275                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1276                               int log2_cb_size, int log2_trafo_size,
1277                               int trafo_depth, int blk_idx,
1278                               const int *base_cbf_cb, const int *base_cbf_cr)
1279 {
1280     HEVCLocalContext *lc = s->HEVClc;
1281     uint8_t split_transform_flag;
1282     int cbf_cb[2];
1283     int cbf_cr[2];
1284     int ret;
1285
1286     cbf_cb[0] = base_cbf_cb[0];
1287     cbf_cb[1] = base_cbf_cb[1];
1288     cbf_cr[0] = base_cbf_cr[0];
1289     cbf_cr[1] = base_cbf_cr[1];
1290
1291     if (lc->cu.intra_split_flag) {
1292         if (trafo_depth == 1) {
1293             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1294             if (s->ps.sps->chroma_format_idc == 3) {
1295                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1296                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1297             } else {
1298                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1299                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1300             }
1301         }
1302     } else {
1303         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1304         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1305         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1306     }
1307
1308     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1309         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1310         trafo_depth     < lc->cu.max_trafo_depth       &&
1311         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1312         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1313     } else {
1314         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1315                           lc->cu.pred_mode == MODE_INTER &&
1316                           lc->cu.part_mode != PART_2Nx2N &&
1317                           trafo_depth == 0;
1318
1319         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1320                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1321                                inter_split;
1322     }
1323
1324     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1325         if (trafo_depth == 0 || cbf_cb[0]) {
1326             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1327             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1328                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1329             }
1330         }
1331
1332         if (trafo_depth == 0 || cbf_cr[0]) {
1333             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1334             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1335                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1336             }
1337         }
1338     }
1339
1340     if (split_transform_flag) {
1341         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1342         const int x1 = x0 + trafo_size_split;
1343         const int y1 = y0 + trafo_size_split;
1344
1345 #define SUBDIVIDE(x, y, idx)                                                    \
1346 do {                                                                            \
1347     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1348                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1349                              cbf_cb, cbf_cr);                                   \
1350     if (ret < 0)                                                                \
1351         return ret;                                                             \
1352 } while (0)
1353
1354         SUBDIVIDE(x0, y0, 0);
1355         SUBDIVIDE(x1, y0, 1);
1356         SUBDIVIDE(x0, y1, 2);
1357         SUBDIVIDE(x1, y1, 3);
1358
1359 #undef SUBDIVIDE
1360     } else {
1361         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1362         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1363         int min_tu_width     = s->ps.sps->min_tb_width;
1364         int cbf_luma         = 1;
1365
1366         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1367             cbf_cb[0] || cbf_cr[0] ||
1368             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1369             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1370         }
1371
1372         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1373                                  log2_cb_size, log2_trafo_size,
1374                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1375         if (ret < 0)
1376             return ret;
1377         // TODO: store cbf_luma somewhere else
1378         if (cbf_luma) {
1379             int i, j;
1380             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1381                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1382                     int x_tu = (x0 + j) >> log2_min_tu_size;
1383                     int y_tu = (y0 + i) >> log2_min_tu_size;
1384                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1385                 }
1386         }
1387         if (!s->sh.disable_deblocking_filter_flag) {
1388             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1389             if (s->ps.pps->transquant_bypass_enable_flag &&
1390                 lc->cu.cu_transquant_bypass_flag)
1391                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1392         }
1393     }
1394     return 0;
1395 }
1396
1397 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1398 {
1399     HEVCLocalContext *lc = s->HEVClc;
1400     GetBitContext gb;
1401     int cb_size   = 1 << log2_cb_size;
1402     ptrdiff_t stride0 = s->frame->linesize[0];
1403     ptrdiff_t stride1 = s->frame->linesize[1];
1404     ptrdiff_t stride2 = s->frame->linesize[2];
1405     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1406     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1407     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1408
1409     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1410                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1411                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1412                           s->ps.sps->pcm.bit_depth_chroma;
1413     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1414     int ret;
1415
1416     if (!s->sh.disable_deblocking_filter_flag)
1417         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1418
1419     ret = init_get_bits(&gb, pcm, length);
1420     if (ret < 0)
1421         return ret;
1422
1423     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1424     if (s->ps.sps->chroma_format_idc) {
1425         s->hevcdsp.put_pcm(dst1, stride1,
1426                            cb_size >> s->ps.sps->hshift[1],
1427                            cb_size >> s->ps.sps->vshift[1],
1428                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1429         s->hevcdsp.put_pcm(dst2, stride2,
1430                            cb_size >> s->ps.sps->hshift[2],
1431                            cb_size >> s->ps.sps->vshift[2],
1432                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1433     }
1434
1435     return 0;
1436 }
1437
1438 /**
1439  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1440  *
1441  * @param s HEVC decoding context
1442  * @param dst target buffer for block data at block position
1443  * @param dststride stride of the dst buffer
1444  * @param ref reference picture buffer at origin (0, 0)
1445  * @param mv motion vector (relative to block position) to get pixel data from
1446  * @param x_off horizontal position of block from origin (0, 0)
1447  * @param y_off vertical position of block from origin (0, 0)
1448  * @param block_w width of block
1449  * @param block_h height of block
1450  * @param luma_weight weighting factor applied to the luma prediction
1451  * @param luma_offset additive offset applied to the luma prediction value
1452  */
1453
1454 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1455                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1456                         int block_w, int block_h, int luma_weight, int luma_offset)
1457 {
1458     HEVCLocalContext *lc = s->HEVClc;
1459     uint8_t *src         = ref->data[0];
1460     ptrdiff_t srcstride  = ref->linesize[0];
1461     int pic_width        = s->ps.sps->width;
1462     int pic_height       = s->ps.sps->height;
1463     int mx               = mv->x & 3;
1464     int my               = mv->y & 3;
1465     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1466                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1467     int idx              = ff_hevc_pel_weight[block_w];
1468
1469     x_off += mv->x >> 2;
1470     y_off += mv->y >> 2;
1471     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1472
1473     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1474         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1475         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1476         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1477         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1478         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1479
1480         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1481                                  edge_emu_stride, srcstride,
1482                                  block_w + QPEL_EXTRA,
1483                                  block_h + QPEL_EXTRA,
1484                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1485                                  pic_width, pic_height);
1486         src = lc->edge_emu_buffer + buf_offset;
1487         srcstride = edge_emu_stride;
1488     }
1489
1490     if (!weight_flag)
1491         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1492                                                       block_h, mx, my, block_w);
1493     else
1494         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1495                                                         block_h, s->sh.luma_log2_weight_denom,
1496                                                         luma_weight, luma_offset, mx, my, block_w);
1497 }
1498
1499 /**
1500  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1501  *
1502  * @param s HEVC decoding context
1503  * @param dst target buffer for block data at block position
1504  * @param dststride stride of the dst buffer
1505  * @param ref0 reference picture0 buffer at origin (0, 0)
1506  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1507  * @param x_off horizontal position of block from origin (0, 0)
1508  * @param y_off vertical position of block from origin (0, 0)
1509  * @param block_w width of block
1510  * @param block_h height of block
1511  * @param ref1 reference picture1 buffer at origin (0, 0)
1512  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1513  * @param current_mv current motion vector structure
1514  */
1515  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1516                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1517                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1518 {
1519     HEVCLocalContext *lc = s->HEVClc;
1520     ptrdiff_t src0stride  = ref0->linesize[0];
1521     ptrdiff_t src1stride  = ref1->linesize[0];
1522     int pic_width        = s->ps.sps->width;
1523     int pic_height       = s->ps.sps->height;
1524     int mx0              = mv0->x & 3;
1525     int my0              = mv0->y & 3;
1526     int mx1              = mv1->x & 3;
1527     int my1              = mv1->y & 3;
1528     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1529                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1530     int x_off0           = x_off + (mv0->x >> 2);
1531     int y_off0           = y_off + (mv0->y >> 2);
1532     int x_off1           = x_off + (mv1->x >> 2);
1533     int y_off1           = y_off + (mv1->y >> 2);
1534     int idx              = ff_hevc_pel_weight[block_w];
1535
1536     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1537     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1538
1539     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1540         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1541         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1542         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1543         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1544         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1545
1546         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1547                                  edge_emu_stride, src0stride,
1548                                  block_w + QPEL_EXTRA,
1549                                  block_h + QPEL_EXTRA,
1550                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1551                                  pic_width, pic_height);
1552         src0 = lc->edge_emu_buffer + buf_offset;
1553         src0stride = edge_emu_stride;
1554     }
1555
1556     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1557         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1558         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1559         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1560         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1561         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1562
1563         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1564                                  edge_emu_stride, src1stride,
1565                                  block_w + QPEL_EXTRA,
1566                                  block_h + QPEL_EXTRA,
1567                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1568                                  pic_width, pic_height);
1569         src1 = lc->edge_emu_buffer2 + buf_offset;
1570         src1stride = edge_emu_stride;
1571     }
1572
1573     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1574                                                 block_h, mx0, my0, block_w);
1575     if (!weight_flag)
1576         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1577                                                        block_h, mx1, my1, block_w);
1578     else
1579         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1580                                                          block_h, s->sh.luma_log2_weight_denom,
1581                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1582                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1583                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1584                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1585                                                          mx1, my1, block_w);
1586
1587 }
1588
1589 /**
1590  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1591  *
1592  * @param s HEVC decoding context
1593  * @param dst1 target buffer for block data at block position (U plane)
1594  * @param dst2 target buffer for block data at block position (V plane)
1595  * @param dststride stride of the dst1 and dst2 buffers
1596  * @param ref reference picture buffer at origin (0, 0)
1597  * @param mv motion vector (relative to block position) to get pixel data from
1598  * @param x_off horizontal position of block from origin (0, 0)
1599  * @param y_off vertical position of block from origin (0, 0)
1600  * @param block_w width of block
1601  * @param block_h height of block
1602  * @param chroma_weight weighting factor applied to the chroma prediction
1603  * @param chroma_offset additive offset applied to the chroma prediction value
1604  */
1605
1606 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1607                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1608                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1609 {
1610     HEVCLocalContext *lc = s->HEVClc;
1611     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1612     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1613     const Mv *mv         = &current_mv->mv[reflist];
1614     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1615                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1616     int idx              = ff_hevc_pel_weight[block_w];
1617     int hshift           = s->ps.sps->hshift[1];
1618     int vshift           = s->ps.sps->vshift[1];
1619     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1620     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1621     intptr_t _mx         = mx << (1 - hshift);
1622     intptr_t _my         = my << (1 - vshift);
1623
1624     x_off += mv->x >> (2 + hshift);
1625     y_off += mv->y >> (2 + vshift);
1626     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1627
1628     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1629         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1630         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1631         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1632         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1633         int buf_offset0 = EPEL_EXTRA_BEFORE *
1634                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1635         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1636                                  edge_emu_stride, srcstride,
1637                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1638                                  x_off - EPEL_EXTRA_BEFORE,
1639                                  y_off - EPEL_EXTRA_BEFORE,
1640                                  pic_width, pic_height);
1641
1642         src0 = lc->edge_emu_buffer + buf_offset0;
1643         srcstride = edge_emu_stride;
1644     }
1645     if (!weight_flag)
1646         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1647                                                   block_h, _mx, _my, block_w);
1648     else
1649         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1650                                                         block_h, s->sh.chroma_log2_weight_denom,
1651                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1652 }
1653
1654 /**
1655  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1656  *
1657  * @param s HEVC decoding context
1658  * @param dst target buffer for block data at block position
1659  * @param dststride stride of the dst buffer
1660  * @param ref0 reference picture0 buffer at origin (0, 0)
1661  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1662  * @param x_off horizontal position of block from origin (0, 0)
1663  * @param y_off vertical position of block from origin (0, 0)
1664  * @param block_w width of block
1665  * @param block_h height of block
1666  * @param ref1 reference picture1 buffer at origin (0, 0)
1667  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1668  * @param current_mv current motion vector structure
1669  * @param cidx chroma component(cb, cr)
1670  */
1671 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1672                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1673 {
1674     HEVCLocalContext *lc = s->HEVClc;
1675     uint8_t *src1        = ref0->data[cidx+1];
1676     uint8_t *src2        = ref1->data[cidx+1];
1677     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1678     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1679     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1680                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1681     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1682     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1683     Mv *mv0              = &current_mv->mv[0];
1684     Mv *mv1              = &current_mv->mv[1];
1685     int hshift = s->ps.sps->hshift[1];
1686     int vshift = s->ps.sps->vshift[1];
1687
1688     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1689     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1690     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1691     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1692     intptr_t _mx0 = mx0 << (1 - hshift);
1693     intptr_t _my0 = my0 << (1 - vshift);
1694     intptr_t _mx1 = mx1 << (1 - hshift);
1695     intptr_t _my1 = my1 << (1 - vshift);
1696
1697     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1698     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1699     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1700     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1701     int idx = ff_hevc_pel_weight[block_w];
1702     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1703     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1704
1705     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1706         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1707         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1708         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1709         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1710         int buf_offset1 = EPEL_EXTRA_BEFORE *
1711                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1712
1713         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1714                                  edge_emu_stride, src1stride,
1715                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1716                                  x_off0 - EPEL_EXTRA_BEFORE,
1717                                  y_off0 - EPEL_EXTRA_BEFORE,
1718                                  pic_width, pic_height);
1719
1720         src1 = lc->edge_emu_buffer + buf_offset1;
1721         src1stride = edge_emu_stride;
1722     }
1723
1724     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1725         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1726         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1727         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1728         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1729         int buf_offset1 = EPEL_EXTRA_BEFORE *
1730                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1731
1732         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1733                                  edge_emu_stride, src2stride,
1734                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1735                                  x_off1 - EPEL_EXTRA_BEFORE,
1736                                  y_off1 - EPEL_EXTRA_BEFORE,
1737                                  pic_width, pic_height);
1738
1739         src2 = lc->edge_emu_buffer2 + buf_offset1;
1740         src2stride = edge_emu_stride;
1741     }
1742
1743     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1744                                                 block_h, _mx0, _my0, block_w);
1745     if (!weight_flag)
1746         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1747                                                        src2, src2stride, lc->tmp,
1748                                                        block_h, _mx1, _my1, block_w);
1749     else
1750         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1751                                                          src2, src2stride, lc->tmp,
1752                                                          block_h,
1753                                                          s->sh.chroma_log2_weight_denom,
1754                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1755                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1756                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1757                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1758                                                          _mx1, _my1, block_w);
1759 }
1760
1761 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1762                                 const Mv *mv, int y0, int height)
1763 {
1764     if (s->threads_type == FF_THREAD_FRAME ) {
1765         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1766
1767         ff_thread_await_progress(&ref->tf, y, 0);
1768     }
1769 }
1770
1771 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1772                                   int nPbH, int log2_cb_size, int part_idx,
1773                                   int merge_idx, MvField *mv)
1774 {
1775     HEVCLocalContext *lc = s->HEVClc;
1776     enum InterPredIdc inter_pred_idc = PRED_L0;
1777     int mvp_flag;
1778
1779     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1780     mv->pred_flag = 0;
1781     if (s->sh.slice_type == HEVC_SLICE_B)
1782         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1783
1784     if (inter_pred_idc != PRED_L1) {
1785         if (s->sh.nb_refs[L0])
1786             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1787
1788         mv->pred_flag = PF_L0;
1789         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1790         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1791         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1792                                  part_idx, merge_idx, mv, mvp_flag, 0);
1793         mv->mv[0].x += lc->pu.mvd.x;
1794         mv->mv[0].y += lc->pu.mvd.y;
1795     }
1796
1797     if (inter_pred_idc != PRED_L0) {
1798         if (s->sh.nb_refs[L1])
1799             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1800
1801         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1802             AV_ZERO32(&lc->pu.mvd);
1803         } else {
1804             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1805         }
1806
1807         mv->pred_flag += PF_L1;
1808         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1809         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1810                                  part_idx, merge_idx, mv, mvp_flag, 1);
1811         mv->mv[1].x += lc->pu.mvd.x;
1812         mv->mv[1].y += lc->pu.mvd.y;
1813     }
1814 }
1815
1816 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1817                                 int nPbW, int nPbH,
1818                                 int log2_cb_size, int partIdx, int idx)
1819 {
1820 #define POS(c_idx, x, y)                                                              \
1821     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1822                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1823     HEVCLocalContext *lc = s->HEVClc;
1824     int merge_idx = 0;
1825     struct MvField current_mv = {{{ 0 }}};
1826
1827     int min_pu_width = s->ps.sps->min_pu_width;
1828
1829     MvField *tab_mvf = s->ref->tab_mvf;
1830     RefPicList  *refPicList = s->ref->refPicList;
1831     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1832     uint8_t *dst0 = POS(0, x0, y0);
1833     uint8_t *dst1 = POS(1, x0, y0);
1834     uint8_t *dst2 = POS(2, x0, y0);
1835     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1836     int min_cb_width     = s->ps.sps->min_cb_width;
1837     int x_cb             = x0 >> log2_min_cb_size;
1838     int y_cb             = y0 >> log2_min_cb_size;
1839     int x_pu, y_pu;
1840     int i, j;
1841
1842     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1843
1844     if (!skip_flag)
1845         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1846
1847     if (skip_flag || lc->pu.merge_flag) {
1848         if (s->sh.max_num_merge_cand > 1)
1849             merge_idx = ff_hevc_merge_idx_decode(s);
1850         else
1851             merge_idx = 0;
1852
1853         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1854                                    partIdx, merge_idx, &current_mv);
1855     } else {
1856         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1857                               partIdx, merge_idx, &current_mv);
1858     }
1859
1860     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1861     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1862
1863     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1864         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1865             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1866
1867     if (current_mv.pred_flag & PF_L0) {
1868         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1869         if (!ref0)
1870             return;
1871         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1872     }
1873     if (current_mv.pred_flag & PF_L1) {
1874         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1875         if (!ref1)
1876             return;
1877         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1878     }
1879
1880     if (current_mv.pred_flag == PF_L0) {
1881         int x0_c = x0 >> s->ps.sps->hshift[1];
1882         int y0_c = y0 >> s->ps.sps->vshift[1];
1883         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1884         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1885
1886         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1887                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1888                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1889                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1890
1891         if (s->ps.sps->chroma_format_idc) {
1892             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1893                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1894                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1895             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1896                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1897                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1898         }
1899     } else if (current_mv.pred_flag == PF_L1) {
1900         int x0_c = x0 >> s->ps.sps->hshift[1];
1901         int y0_c = y0 >> s->ps.sps->vshift[1];
1902         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1903         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1904
1905         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1906                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1907                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1908                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1909
1910         if (s->ps.sps->chroma_format_idc) {
1911             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1912                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1913                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1914
1915             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1916                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1917                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1918         }
1919     } else if (current_mv.pred_flag == PF_BI) {
1920         int x0_c = x0 >> s->ps.sps->hshift[1];
1921         int y0_c = y0 >> s->ps.sps->vshift[1];
1922         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1923         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1924
1925         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1926                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1927                    ref1->frame, &current_mv.mv[1], &current_mv);
1928
1929         if (s->ps.sps->chroma_format_idc) {
1930             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1931                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1932
1933             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1934                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1935         }
1936     }
1937 }
1938
1939 /**
1940  * 8.4.1
1941  */
1942 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1943                                 int prev_intra_luma_pred_flag)
1944 {
1945     HEVCLocalContext *lc = s->HEVClc;
1946     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1947     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1948     int min_pu_width     = s->ps.sps->min_pu_width;
1949     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1950     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1951     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1952
1953     int cand_up   = (lc->ctb_up_flag || y0b) ?
1954                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1955     int cand_left = (lc->ctb_left_flag || x0b) ?
1956                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1957
1958     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1959
1960     MvField *tab_mvf = s->ref->tab_mvf;
1961     int intra_pred_mode;
1962     int candidate[3];
1963     int i, j;
1964
1965     // intra_pred_mode prediction does not cross vertical CTB boundaries
1966     if ((y0 - 1) < y_ctb)
1967         cand_up = INTRA_DC;
1968
1969     if (cand_left == cand_up) {
1970         if (cand_left < 2) {
1971             candidate[0] = INTRA_PLANAR;
1972             candidate[1] = INTRA_DC;
1973             candidate[2] = INTRA_ANGULAR_26;
1974         } else {
1975             candidate[0] = cand_left;
1976             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1977             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1978         }
1979     } else {
1980         candidate[0] = cand_left;
1981         candidate[1] = cand_up;
1982         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1983             candidate[2] = INTRA_PLANAR;
1984         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1985             candidate[2] = INTRA_DC;
1986         } else {
1987             candidate[2] = INTRA_ANGULAR_26;
1988         }
1989     }
1990
1991     if (prev_intra_luma_pred_flag) {
1992         intra_pred_mode = candidate[lc->pu.mpm_idx];
1993     } else {
1994         if (candidate[0] > candidate[1])
1995             FFSWAP(uint8_t, candidate[0], candidate[1]);
1996         if (candidate[0] > candidate[2])
1997             FFSWAP(uint8_t, candidate[0], candidate[2]);
1998         if (candidate[1] > candidate[2])
1999             FFSWAP(uint8_t, candidate[1], candidate[2]);
2000
2001         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
2002         for (i = 0; i < 3; i++)
2003             if (intra_pred_mode >= candidate[i])
2004                 intra_pred_mode++;
2005     }
2006
2007     /* write the intra prediction units into the mv array */
2008     if (!size_in_pus)
2009         size_in_pus = 1;
2010     for (i = 0; i < size_in_pus; i++) {
2011         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
2012                intra_pred_mode, size_in_pus);
2013
2014         for (j = 0; j < size_in_pus; j++) {
2015             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
2016         }
2017     }
2018
2019     return intra_pred_mode;
2020 }
2021
2022 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2023                                           int log2_cb_size, int ct_depth)
2024 {
2025     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
2026     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
2027     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
2028     int y;
2029
2030     for (y = 0; y < length; y++)
2031         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2032                ct_depth, length);
2033 }
2034
2035 static const uint8_t tab_mode_idx[] = {
2036      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2037     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2038
2039 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2040                                   int log2_cb_size)
2041 {
2042     HEVCLocalContext *lc = s->HEVClc;
2043     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2044     uint8_t prev_intra_luma_pred_flag[4];
2045     int split   = lc->cu.part_mode == PART_NxN;
2046     int pb_size = (1 << log2_cb_size) >> split;
2047     int side    = split + 1;
2048     int chroma_mode;
2049     int i, j;
2050
2051     for (i = 0; i < side; i++)
2052         for (j = 0; j < side; j++)
2053             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2054
2055     for (i = 0; i < side; i++) {
2056         for (j = 0; j < side; j++) {
2057             if (prev_intra_luma_pred_flag[2 * i + j])
2058                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2059             else
2060                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2061
2062             lc->pu.intra_pred_mode[2 * i + j] =
2063                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2064                                      prev_intra_luma_pred_flag[2 * i + j]);
2065         }
2066     }
2067
2068     if (s->ps.sps->chroma_format_idc == 3) {
2069         for (i = 0; i < side; i++) {
2070             for (j = 0; j < side; j++) {
2071                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2072                 if (chroma_mode != 4) {
2073                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2074                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2075                     else
2076                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2077                 } else {
2078                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2079                 }
2080             }
2081         }
2082     } else if (s->ps.sps->chroma_format_idc == 2) {
2083         int mode_idx;
2084         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2085         if (chroma_mode != 4) {
2086             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2087                 mode_idx = 34;
2088             else
2089                 mode_idx = intra_chroma_table[chroma_mode];
2090         } else {
2091             mode_idx = lc->pu.intra_pred_mode[0];
2092         }
2093         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2094     } else if (s->ps.sps->chroma_format_idc != 0) {
2095         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2096         if (chroma_mode != 4) {
2097             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2098                 lc->pu.intra_pred_mode_c[0] = 34;
2099             else
2100                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2101         } else {
2102             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2103         }
2104     }
2105 }
2106
2107 static void intra_prediction_unit_default_value(HEVCContext *s,
2108                                                 int x0, int y0,
2109                                                 int log2_cb_size)
2110 {
2111     HEVCLocalContext *lc = s->HEVClc;
2112     int pb_size          = 1 << log2_cb_size;
2113     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2114     int min_pu_width     = s->ps.sps->min_pu_width;
2115     MvField *tab_mvf     = s->ref->tab_mvf;
2116     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2117     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2118     int j, k;
2119
2120     if (size_in_pus == 0)
2121         size_in_pus = 1;
2122     for (j = 0; j < size_in_pus; j++)
2123         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2124     if (lc->cu.pred_mode == MODE_INTRA)
2125         for (j = 0; j < size_in_pus; j++)
2126             for (k = 0; k < size_in_pus; k++)
2127                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2128 }
2129
2130 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2131 {
2132     int cb_size          = 1 << log2_cb_size;
2133     HEVCLocalContext *lc = s->HEVClc;
2134     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2135     int length           = cb_size >> log2_min_cb_size;
2136     int min_cb_width     = s->ps.sps->min_cb_width;
2137     int x_cb             = x0 >> log2_min_cb_size;
2138     int y_cb             = y0 >> log2_min_cb_size;
2139     int idx              = log2_cb_size - 2;
2140     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2141     int x, y, ret;
2142
2143     lc->cu.x                = x0;
2144     lc->cu.y                = y0;
2145     lc->cu.pred_mode        = MODE_INTRA;
2146     lc->cu.part_mode        = PART_2Nx2N;
2147     lc->cu.intra_split_flag = 0;
2148
2149     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2150     for (x = 0; x < 4; x++)
2151         lc->pu.intra_pred_mode[x] = 1;
2152     if (s->ps.pps->transquant_bypass_enable_flag) {
2153         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2154         if (lc->cu.cu_transquant_bypass_flag)
2155             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2156     } else
2157         lc->cu.cu_transquant_bypass_flag = 0;
2158
2159     if (s->sh.slice_type != HEVC_SLICE_I) {
2160         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2161
2162         x = y_cb * min_cb_width + x_cb;
2163         for (y = 0; y < length; y++) {
2164             memset(&s->skip_flag[x], skip_flag, length);
2165             x += min_cb_width;
2166         }
2167         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2168     } else {
2169         x = y_cb * min_cb_width + x_cb;
2170         for (y = 0; y < length; y++) {
2171             memset(&s->skip_flag[x], 0, length);
2172             x += min_cb_width;
2173         }
2174     }
2175
2176     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2177         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2178         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2179
2180         if (!s->sh.disable_deblocking_filter_flag)
2181             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2182     } else {
2183         int pcm_flag = 0;
2184
2185         if (s->sh.slice_type != HEVC_SLICE_I)
2186             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2187         if (lc->cu.pred_mode != MODE_INTRA ||
2188             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2189             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2190             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2191                                       lc->cu.pred_mode == MODE_INTRA;
2192         }
2193
2194         if (lc->cu.pred_mode == MODE_INTRA) {
2195             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2196                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2197                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2198                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2199             }
2200             if (pcm_flag) {
2201                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2202                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2203                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2204                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2205
2206                 if (ret < 0)
2207                     return ret;
2208             } else {
2209                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2210             }
2211         } else {
2212             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2213             switch (lc->cu.part_mode) {
2214             case PART_2Nx2N:
2215                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2216                 break;
2217             case PART_2NxN:
2218                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2219                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2220                 break;
2221             case PART_Nx2N:
2222                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2223                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2224                 break;
2225             case PART_2NxnU:
2226                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2227                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2228                 break;
2229             case PART_2NxnD:
2230                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2231                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2232                 break;
2233             case PART_nLx2N:
2234                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2235                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2236                 break;
2237             case PART_nRx2N:
2238                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2239                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2240                 break;
2241             case PART_NxN:
2242                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2243                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2244                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2245                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2246                 break;
2247             }
2248         }
2249
2250         if (!pcm_flag) {
2251             int rqt_root_cbf = 1;
2252
2253             if (lc->cu.pred_mode != MODE_INTRA &&
2254                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2255                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2256             }
2257             if (rqt_root_cbf) {
2258                 const static int cbf[2] = { 0 };
2259                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2260                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2261                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2262                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2263                                          log2_cb_size,
2264                                          log2_cb_size, 0, 0, cbf, cbf);
2265                 if (ret < 0)
2266                     return ret;
2267             } else {
2268                 if (!s->sh.disable_deblocking_filter_flag)
2269                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2270             }
2271         }
2272     }
2273
2274     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2275         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2276
2277     x = y_cb * min_cb_width + x_cb;
2278     for (y = 0; y < length; y++) {
2279         memset(&s->qp_y_tab[x], lc->qp_y, length);
2280         x += min_cb_width;
2281     }
2282
2283     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2284        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2285         lc->qPy_pred = lc->qp_y;
2286     }
2287
2288     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2289
2290     return 0;
2291 }
2292
2293 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2294                                int log2_cb_size, int cb_depth)
2295 {
2296     HEVCLocalContext *lc = s->HEVClc;
2297     const int cb_size    = 1 << log2_cb_size;
2298     int ret;
2299     int split_cu;
2300
2301     lc->ct_depth = cb_depth;
2302     if (x0 + cb_size <= s->ps.sps->width  &&
2303         y0 + cb_size <= s->ps.sps->height &&
2304         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2305         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2306     } else {
2307         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2308     }
2309     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2310         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2311         lc->tu.is_cu_qp_delta_coded = 0;
2312         lc->tu.cu_qp_delta          = 0;
2313     }
2314
2315     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2316         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2317         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2318     }
2319
2320     if (split_cu) {
2321         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2322         const int cb_size_split = cb_size >> 1;
2323         const int x1 = x0 + cb_size_split;
2324         const int y1 = y0 + cb_size_split;
2325
2326         int more_data = 0;
2327
2328         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2329         if (more_data < 0)
2330             return more_data;
2331
2332         if (more_data && x1 < s->ps.sps->width) {
2333             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2334             if (more_data < 0)
2335                 return more_data;
2336         }
2337         if (more_data && y1 < s->ps.sps->height) {
2338             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2339             if (more_data < 0)
2340                 return more_data;
2341         }
2342         if (more_data && x1 < s->ps.sps->width &&
2343             y1 < s->ps.sps->height) {
2344             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2345             if (more_data < 0)
2346                 return more_data;
2347         }
2348
2349         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2350             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2351             lc->qPy_pred = lc->qp_y;
2352
2353         if (more_data)
2354             return ((x1 + cb_size_split) < s->ps.sps->width ||
2355                     (y1 + cb_size_split) < s->ps.sps->height);
2356         else
2357             return 0;
2358     } else {
2359         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2360         if (ret < 0)
2361             return ret;
2362         if ((!((x0 + cb_size) %
2363                (1 << (s->ps.sps->log2_ctb_size))) ||
2364              (x0 + cb_size >= s->ps.sps->width)) &&
2365             (!((y0 + cb_size) %
2366                (1 << (s->ps.sps->log2_ctb_size))) ||
2367              (y0 + cb_size >= s->ps.sps->height))) {
2368             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2369             return !end_of_slice_flag;
2370         } else {
2371             return 1;
2372         }
2373     }
2374
2375     return 0;
2376 }
2377
2378 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2379                                  int ctb_addr_ts)
2380 {
2381     HEVCLocalContext *lc  = s->HEVClc;
2382     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2383     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2384     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2385
2386     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2387
2388     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2389         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2390             lc->first_qp_group = 1;
2391         lc->end_of_tiles_x = s->ps.sps->width;
2392     } else if (s->ps.pps->tiles_enabled_flag) {
2393         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2394             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2395             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2396             lc->first_qp_group   = 1;
2397         }
2398     } else {
2399         lc->end_of_tiles_x = s->ps.sps->width;
2400     }
2401
2402     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2403
2404     lc->boundary_flags = 0;
2405     if (s->ps.pps->tiles_enabled_flag) {
2406         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2407             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2408         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2409             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2410         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2411             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2412         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2413             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2414     } else {
2415         if (ctb_addr_in_slice <= 0)
2416             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2417         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2418             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2419     }
2420
2421     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2422     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2423     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2424     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2425 }
2426
2427 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2428 {
2429     HEVCContext *s  = avctxt->priv_data;
2430     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2431     int more_data   = 1;
2432     int x_ctb       = 0;
2433     int y_ctb       = 0;
2434     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2435     int ret;
2436
2437     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2438         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2439         return AVERROR_INVALIDDATA;
2440     }
2441
2442     if (s->sh.dependent_slice_segment_flag) {
2443         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2444         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2445             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2446             return AVERROR_INVALIDDATA;
2447         }
2448     }
2449
2450     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2451         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2452
2453         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2454         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2455         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2456
2457         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2458         if (ret < 0) {
2459             s->tab_slice_address[ctb_addr_rs] = -1;
2460             return ret;
2461         }
2462
2463         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2464
2465         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2466         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2467         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2468
2469         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2470         if (more_data < 0) {
2471             s->tab_slice_address[ctb_addr_rs] = -1;
2472             return more_data;
2473         }
2474
2475
2476         ctb_addr_ts++;
2477         ff_hevc_save_states(s, ctb_addr_ts);
2478         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2479     }
2480
2481     if (x_ctb + ctb_size >= s->ps.sps->width &&
2482         y_ctb + ctb_size >= s->ps.sps->height)
2483         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2484
2485     return ctb_addr_ts;
2486 }
2487
2488 static int hls_slice_data(HEVCContext *s)
2489 {
2490     int arg[2];
2491     int ret[2];
2492
2493     arg[0] = 0;
2494     arg[1] = 1;
2495
2496     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2497     return ret[0];
2498 }
2499 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2500 {
2501     HEVCContext *s1  = avctxt->priv_data, *s;
2502     HEVCLocalContext *lc;
2503     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2504     int more_data   = 1;
2505     int *ctb_row_p    = input_ctb_row;
2506     int ctb_row = ctb_row_p[job];
2507     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2508     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2509     int thread = ctb_row % s1->threads_number;
2510     int ret;
2511
2512     s = s1->sList[self_id];
2513     lc = s->HEVClc;
2514
2515     if(ctb_row) {
2516         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2517         if (ret < 0)
2518             goto error;
2519         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2520     }
2521
2522     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2523         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2524         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2525
2526         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2527
2528         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2529
2530         if (atomic_load(&s1->wpp_err)) {
2531             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2532             return 0;
2533         }
2534
2535         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2536         if (ret < 0)
2537             goto error;
2538         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2539         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2540
2541         if (more_data < 0) {
2542             ret = more_data;
2543             goto error;
2544         }
2545
2546         ctb_addr_ts++;
2547
2548         ff_hevc_save_states(s, ctb_addr_ts);
2549         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2550         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2551
2552         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2553             atomic_store(&s1->wpp_err, 1);
2554             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2555             return 0;
2556         }
2557
2558         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2559             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2560             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2561             return ctb_addr_ts;
2562         }
2563         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2564         x_ctb+=ctb_size;
2565
2566         if(x_ctb >= s->ps.sps->width) {
2567             break;
2568         }
2569     }
2570     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2571
2572     return 0;
2573 error:
2574     s->tab_slice_address[ctb_addr_rs] = -1;
2575     atomic_store(&s1->wpp_err, 1);
2576     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2577     return ret;
2578 }
2579
2580 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2581 {
2582     const uint8_t *data = nal->data;
2583     int length          = nal->size;
2584     HEVCLocalContext *lc = s->HEVClc;
2585     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2586     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2587     int64_t offset;
2588     int64_t startheader, cmpt = 0;
2589     int i, j, res = 0;
2590
2591     if (!ret || !arg) {
2592         av_free(ret);
2593         av_free(arg);
2594         return AVERROR(ENOMEM);
2595     }
2596
2597     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2598         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2599             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2600             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2601         );
2602         res = AVERROR_INVALIDDATA;
2603         goto error;
2604     }
2605
2606     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2607
2608     if (!s->sList[1]) {
2609         for (i = 1; i < s->threads_number; i++) {
2610             s->sList[i] = av_malloc(sizeof(HEVCContext));
2611             memcpy(s->sList[i], s, sizeof(HEVCContext));
2612             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2613             s->sList[i]->HEVClc = s->HEVClcList[i];
2614         }
2615     }
2616
2617     offset = (lc->gb.index >> 3);
2618
2619     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2620         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2621             startheader--;
2622             cmpt++;
2623         }
2624     }
2625
2626     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2627         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2628         for (j = 0, cmpt = 0, startheader = offset
2629              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2630             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2631                 startheader--;
2632                 cmpt++;
2633             }
2634         }
2635         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2636         s->sh.offset[i - 1] = offset;
2637
2638     }
2639     if (s->sh.num_entry_point_offsets != 0) {
2640         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2641         if (length < offset) {
2642             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2643             res = AVERROR_INVALIDDATA;
2644             goto error;
2645         }
2646         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2647         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2648
2649     }
2650     s->data = data;
2651
2652     for (i = 1; i < s->threads_number; i++) {
2653         s->sList[i]->HEVClc->first_qp_group = 1;
2654         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2655         memcpy(s->sList[i], s, sizeof(HEVCContext));
2656         s->sList[i]->HEVClc = s->HEVClcList[i];
2657     }
2658
2659     atomic_store(&s->wpp_err, 0);
2660     ff_reset_entries(s->avctx);
2661
2662     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2663         arg[i] = i;
2664         ret[i] = 0;
2665     }
2666
2667     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2668         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2669
2670     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2671         res += ret[i];
2672 error:
2673     av_free(ret);
2674     av_free(arg);
2675     return res;
2676 }
2677
2678 static int set_side_data(HEVCContext *s)
2679 {
2680     AVFrame *out = s->ref->frame;
2681
2682     if (s->sei.frame_packing.present &&
2683         s->sei.frame_packing.arrangement_type >= 3 &&
2684         s->sei.frame_packing.arrangement_type <= 5 &&
2685         s->sei.frame_packing.content_interpretation_type > 0 &&
2686         s->sei.frame_packing.content_interpretation_type < 3) {
2687         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2688         if (!stereo)
2689             return AVERROR(ENOMEM);
2690
2691         switch (s->sei.frame_packing.arrangement_type) {
2692         case 3:
2693             if (s->sei.frame_packing.quincunx_subsampling)
2694                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2695             else
2696                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2697             break;
2698         case 4:
2699             stereo->type = AV_STEREO3D_TOPBOTTOM;
2700             break;
2701         case 5:
2702             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2703             break;
2704         }
2705
2706         if (s->sei.frame_packing.content_interpretation_type == 2)
2707             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2708
2709         if (s->sei.frame_packing.arrangement_type == 5) {
2710             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2711                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2712             else
2713                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2714         }
2715     }
2716
2717     if (s->sei.display_orientation.present &&
2718         (s->sei.display_orientation.anticlockwise_rotation ||
2719          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2720         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2721         AVFrameSideData *rotation = av_frame_new_side_data(out,
2722                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2723                                                            sizeof(int32_t) * 9);
2724         if (!rotation)
2725             return AVERROR(ENOMEM);
2726
2727         av_display_rotation_set((int32_t *)rotation->data, angle);
2728         av_display_matrix_flip((int32_t *)rotation->data,
2729                                s->sei.display_orientation.hflip,
2730                                s->sei.display_orientation.vflip);
2731     }
2732
2733     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2734     // so the side data persists for the entire coded video sequence.
2735     if (s->sei.mastering_display.present > 0 &&
2736         IS_IRAP(s) && s->no_rasl_output_flag) {
2737         s->sei.mastering_display.present--;
2738     }
2739     if (s->sei.mastering_display.present) {
2740         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2741         const int mapping[3] = {2, 0, 1};
2742         const int chroma_den = 50000;
2743         const int luma_den = 10000;
2744         int i;
2745         AVMasteringDisplayMetadata *metadata =
2746             av_mastering_display_metadata_create_side_data(out);
2747         if (!metadata)
2748             return AVERROR(ENOMEM);
2749
2750         for (i = 0; i < 3; i++) {
2751             const int j = mapping[i];
2752             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2753             metadata->display_primaries[i][0].den = chroma_den;
2754             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2755             metadata->display_primaries[i][1].den = chroma_den;
2756         }
2757         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2758         metadata->white_point[0].den = chroma_den;
2759         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2760         metadata->white_point[1].den = chroma_den;
2761
2762         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2763         metadata->max_luminance.den = luma_den;
2764         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2765         metadata->min_luminance.den = luma_den;
2766         metadata->has_luminance = 1;
2767         metadata->has_primaries = 1;
2768
2769         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2770         av_log(s->avctx, AV_LOG_DEBUG,
2771                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2772                av_q2d(metadata->display_primaries[0][0]),
2773                av_q2d(metadata->display_primaries[0][1]),
2774                av_q2d(metadata->display_primaries[1][0]),
2775                av_q2d(metadata->display_primaries[1][1]),
2776                av_q2d(metadata->display_primaries[2][0]),
2777                av_q2d(metadata->display_primaries[2][1]),
2778                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2779         av_log(s->avctx, AV_LOG_DEBUG,
2780                "min_luminance=%f, max_luminance=%f\n",
2781                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2782     }
2783     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2784     // so the side data persists for the entire coded video sequence.
2785     if (s->sei.content_light.present > 0 &&
2786         IS_IRAP(s) && s->no_rasl_output_flag) {
2787         s->sei.content_light.present--;
2788     }
2789     if (s->sei.content_light.present) {
2790         AVContentLightMetadata *metadata =
2791             av_content_light_metadata_create_side_data(out);
2792         if (!metadata)
2793             return AVERROR(ENOMEM);
2794         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2795         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2796
2797         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2798         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2799                metadata->MaxCLL, metadata->MaxFALL);
2800     }
2801
2802     if (s->sei.a53_caption.buf_ref) {
2803         HEVCSEIA53Caption *a53 = &s->sei.a53_caption;
2804
2805         AVFrameSideData *sd = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_A53_CC, a53->buf_ref);
2806         if (!sd)
2807             av_buffer_unref(&a53->buf_ref);
2808         a53->buf_ref = NULL;
2809
2810         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2811     }
2812
2813     for (int i = 0; i < s->sei.unregistered.nb_buf_ref; i++) {
2814         HEVCSEIUnregistered *unreg = &s->sei.unregistered;
2815
2816         if (unreg->buf_ref[i]) {
2817             AVFrameSideData *sd = av_frame_new_side_data_from_buf(out,
2818                     AV_FRAME_DATA_SEI_UNREGISTERED,
2819                     unreg->buf_ref[i]);
2820             if (!sd)
2821                 av_buffer_unref(&unreg->buf_ref[i]);
2822             unreg->buf_ref[i] = NULL;
2823         }
2824     }
2825     s->sei.unregistered.nb_buf_ref = 0;
2826
2827     if (s->sei.timecode.present) {
2828         uint32_t *tc_sd;
2829         char tcbuf[AV_TIMECODE_STR_SIZE];
2830         AVFrameSideData *tcside = av_frame_new_side_data(out, AV_FRAME_DATA_S12M_TIMECODE,
2831                                                          sizeof(uint32_t) * 4);
2832         if (!tcside)
2833             return AVERROR(ENOMEM);
2834
2835         tc_sd = (uint32_t*)tcside->data;
2836         tc_sd[0] = s->sei.timecode.num_clock_ts;
2837
2838         for (int i = 0; i < tc_sd[0]; i++) {
2839             int drop = s->sei.timecode.cnt_dropped_flag[i];
2840             int   hh = s->sei.timecode.hours_value[i];
2841             int   mm = s->sei.timecode.minutes_value[i];
2842             int   ss = s->sei.timecode.seconds_value[i];
2843             int   ff = s->sei.timecode.n_frames[i];
2844
2845             tc_sd[i + 1] = av_timecode_get_smpte(s->avctx->framerate, drop, hh, mm, ss, ff);
2846             av_timecode_make_smpte_tc_string2(tcbuf, s->avctx->framerate, tc_sd[i + 1], 0, 0);
2847             av_dict_set(&out->metadata, "timecode", tcbuf, 0);
2848         }
2849
2850         s->sei.timecode.num_clock_ts = 0;
2851     }
2852
2853     return 0;
2854 }
2855
2856 static int hevc_frame_start(HEVCContext *s)
2857 {
2858     HEVCLocalContext *lc = s->HEVClc;
2859     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2860                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2861     int ret;
2862
2863     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2864     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2865     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2866     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2867     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2868
2869     s->is_decoded        = 0;
2870     s->first_nal_type    = s->nal_unit_type;
2871
2872     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2873
2874     if (s->ps.pps->tiles_enabled_flag)
2875         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2876
2877     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2878     if (ret < 0)
2879         goto fail;
2880
2881     ret = ff_hevc_frame_rps(s);
2882     if (ret < 0) {
2883         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2884         goto fail;
2885     }
2886
2887     s->ref->frame->key_frame = IS_IRAP(s);
2888
2889     ret = set_side_data(s);
2890     if (ret < 0)
2891         goto fail;
2892
2893     s->frame->pict_type = 3 - s->sh.slice_type;
2894
2895     if (!IS_IRAP(s))
2896         ff_hevc_bump_frame(s);
2897
2898     av_frame_unref(s->output_frame);
2899     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2900     if (ret < 0)
2901         goto fail;
2902
2903     if (!s->avctx->hwaccel)
2904         ff_thread_finish_setup(s->avctx);
2905
2906     return 0;
2907
2908 fail:
2909     if (s->ref)
2910         ff_hevc_unref_frame(s, s->ref, ~0);
2911     s->ref = NULL;
2912     return ret;
2913 }
2914
2915 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2916 {
2917     HEVCLocalContext *lc = s->HEVClc;
2918     GetBitContext *gb    = &lc->gb;
2919     int ctb_addr_ts, ret;
2920
2921     *gb              = nal->gb;
2922     s->nal_unit_type = nal->type;
2923     s->temporal_id   = nal->temporal_id;
2924
2925     switch (s->nal_unit_type) {
2926     case HEVC_NAL_VPS:
2927         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2928             ret = s->avctx->hwaccel->decode_params(s->avctx,
2929                                                    nal->type,
2930                                                    nal->raw_data,
2931                                                    nal->raw_size);
2932             if (ret < 0)
2933                 goto fail;
2934         }
2935         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2936         if (ret < 0)
2937             goto fail;
2938         break;
2939     case HEVC_NAL_SPS:
2940         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2941             ret = s->avctx->hwaccel->decode_params(s->avctx,
2942                                                    nal->type,
2943                                                    nal->raw_data,
2944                                                    nal->raw_size);
2945             if (ret < 0)
2946                 goto fail;
2947         }
2948         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2949                                      s->apply_defdispwin);
2950         if (ret < 0)
2951             goto fail;
2952         break;
2953     case HEVC_NAL_PPS:
2954         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2955             ret = s->avctx->hwaccel->decode_params(s->avctx,
2956                                                    nal->type,
2957                                                    nal->raw_data,
2958                                                    nal->raw_size);
2959             if (ret < 0)
2960                 goto fail;
2961         }
2962         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2963         if (ret < 0)
2964             goto fail;
2965         break;
2966     case HEVC_NAL_SEI_PREFIX:
2967     case HEVC_NAL_SEI_SUFFIX:
2968         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2969             ret = s->avctx->hwaccel->decode_params(s->avctx,
2970                                                    nal->type,
2971                                                    nal->raw_data,
2972                                                    nal->raw_size);
2973             if (ret < 0)
2974                 goto fail;
2975         }
2976         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2977         if (ret < 0)
2978             goto fail;
2979         break;
2980     case HEVC_NAL_TRAIL_R:
2981     case HEVC_NAL_TRAIL_N:
2982     case HEVC_NAL_TSA_N:
2983     case HEVC_NAL_TSA_R:
2984     case HEVC_NAL_STSA_N:
2985     case HEVC_NAL_STSA_R:
2986     case HEVC_NAL_BLA_W_LP:
2987     case HEVC_NAL_BLA_W_RADL:
2988     case HEVC_NAL_BLA_N_LP:
2989     case HEVC_NAL_IDR_W_RADL:
2990     case HEVC_NAL_IDR_N_LP:
2991     case HEVC_NAL_CRA_NUT:
2992     case HEVC_NAL_RADL_N:
2993     case HEVC_NAL_RADL_R:
2994     case HEVC_NAL_RASL_N:
2995     case HEVC_NAL_RASL_R:
2996         ret = hls_slice_header(s);
2997         if (ret < 0)
2998             return ret;
2999         if (ret == 1) {
3000             ret = AVERROR_INVALIDDATA;
3001             goto fail;
3002         }
3003
3004
3005         if (
3006             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
3007             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
3008             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
3009             break;
3010         }
3011
3012         if (s->sh.first_slice_in_pic_flag) {
3013             if (s->max_ra == INT_MAX) {
3014                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
3015                     s->max_ra = s->poc;
3016                 } else {
3017                     if (IS_IDR(s))
3018                         s->max_ra = INT_MIN;
3019                 }
3020             }
3021
3022             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
3023                 s->poc <= s->max_ra) {
3024                 s->is_decoded = 0;
3025                 break;
3026             } else {
3027                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
3028                     s->max_ra = INT_MIN;
3029             }
3030
3031             s->overlap ++;
3032             ret = hevc_frame_start(s);
3033             if (ret < 0)
3034                 return ret;
3035         } else if (!s->ref) {
3036             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
3037             goto fail;
3038         }
3039
3040         if (s->nal_unit_type != s->first_nal_type) {
3041             av_log(s->avctx, AV_LOG_ERROR,
3042                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
3043                    s->first_nal_type, s->nal_unit_type);
3044             return AVERROR_INVALIDDATA;
3045         }
3046
3047         if (!s->sh.dependent_slice_segment_flag &&
3048             s->sh.slice_type != HEVC_SLICE_I) {
3049             ret = ff_hevc_slice_rpl(s);
3050             if (ret < 0) {
3051                 av_log(s->avctx, AV_LOG_WARNING,
3052                        "Error constructing the reference lists for the current slice.\n");
3053                 goto fail;
3054             }
3055         }
3056
3057         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
3058             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
3059             if (ret < 0)
3060                 goto fail;
3061         }
3062
3063         if (s->avctx->hwaccel) {
3064             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
3065             if (ret < 0)
3066                 goto fail;
3067         } else {
3068             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3069                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3070             else
3071                 ctb_addr_ts = hls_slice_data(s);
3072             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3073                 s->is_decoded = 1;
3074             }
3075
3076             if (ctb_addr_ts < 0) {
3077                 ret = ctb_addr_ts;
3078                 goto fail;
3079             }
3080         }
3081         break;
3082     case HEVC_NAL_EOS_NUT:
3083     case HEVC_NAL_EOB_NUT:
3084         s->seq_decode = (s->seq_decode + 1) & 0xff;
3085         s->max_ra     = INT_MAX;
3086         break;
3087     case HEVC_NAL_AUD:
3088     case HEVC_NAL_FD_NUT:
3089         break;
3090     default:
3091         av_log(s->avctx, AV_LOG_INFO,
3092                "Skipping NAL unit %d\n", s->nal_unit_type);
3093     }
3094
3095     return 0;
3096 fail:
3097     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3098         return ret;
3099     return 0;
3100 }
3101
3102 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3103 {
3104     int i, ret = 0;
3105     int eos_at_start = 1;
3106
3107     s->ref = NULL;
3108     s->last_eos = s->eos;
3109     s->eos = 0;
3110     s->overlap = 0;
3111
3112     /* split the input packet into NAL units, so we know the upper bound on the
3113      * number of slices in the frame */
3114     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3115                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3116     if (ret < 0) {
3117         av_log(s->avctx, AV_LOG_ERROR,
3118                "Error splitting the input into NAL units.\n");
3119         return ret;
3120     }
3121
3122     for (i = 0; i < s->pkt.nb_nals; i++) {
3123         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3124             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3125             if (eos_at_start) {
3126                 s->last_eos = 1;
3127             } else {
3128                 s->eos = 1;
3129             }
3130         } else {
3131             eos_at_start = 0;
3132         }
3133     }
3134
3135     /* decode the NAL units */
3136     for (i = 0; i < s->pkt.nb_nals; i++) {
3137         H2645NAL *nal = &s->pkt.nals[i];
3138
3139         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3140             (s->avctx->skip_frame >= AVDISCARD_NONREF
3141             && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
3142             continue;
3143
3144         ret = decode_nal_unit(s, nal);
3145         if (ret >= 0 && s->overlap > 2)
3146             ret = AVERROR_INVALIDDATA;
3147         if (ret < 0) {
3148             av_log(s->avctx, AV_LOG_WARNING,
3149                    "Error parsing NAL unit #%d.\n", i);
3150             goto fail;
3151         }
3152     }
3153
3154 fail:
3155     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3156         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3157
3158     return ret;
3159 }
3160
3161 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3162 {
3163     int i;
3164     for (i = 0; i < 16; i++)
3165         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3166 }
3167
3168 static int verify_md5(HEVCContext *s, AVFrame *frame)
3169 {
3170     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3171     int pixel_shift;
3172     int i, j;
3173
3174     if (!desc)
3175         return AVERROR(EINVAL);
3176
3177     pixel_shift = desc->comp[0].depth > 8;
3178
3179     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3180            s->poc);
3181
3182     /* the checksums are LE, so we have to byteswap for >8bpp formats
3183      * on BE arches */
3184 #if HAVE_BIGENDIAN
3185     if (pixel_shift && !s->checksum_buf) {
3186         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3187                        FFMAX3(frame->linesize[0], frame->linesize[1],
3188                               frame->linesize[2]));
3189         if (!s->checksum_buf)
3190             return AVERROR(ENOMEM);
3191     }
3192 #endif
3193
3194     for (i = 0; frame->data[i]; i++) {
3195         int width  = s->avctx->coded_width;
3196         int height = s->avctx->coded_height;
3197         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3198         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3199         uint8_t md5[16];
3200
3201         av_md5_init(s->md5_ctx);
3202         for (j = 0; j < h; j++) {
3203             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3204 #if HAVE_BIGENDIAN
3205             if (pixel_shift) {
3206                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3207                                     (const uint16_t *) src, w);
3208                 src = s->checksum_buf;
3209             }
3210 #endif
3211             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3212         }
3213         av_md5_final(s->md5_ctx, md5);
3214
3215         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3216             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3217             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3218             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3219         } else {
3220             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3221             print_md5(s->avctx, AV_LOG_ERROR, md5);
3222             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3223             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3224             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3225             return AVERROR_INVALIDDATA;
3226         }
3227     }
3228
3229     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3230
3231     return 0;
3232 }
3233
3234 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3235 {
3236     int ret, i;
3237
3238     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3239                                    &s->nal_length_size, s->avctx->err_recognition,
3240                                    s->apply_defdispwin, s->avctx);
3241     if (ret < 0)
3242         return ret;
3243
3244     /* export stream parameters from the first SPS */
3245     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3246         if (first && s->ps.sps_list[i]) {
3247             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3248             export_stream_params(s, sps);
3249             break;
3250         }
3251     }
3252
3253     return 0;
3254 }
3255
3256 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3257                              AVPacket *avpkt)
3258 {
3259     int ret;
3260     int new_extradata_size;
3261     uint8_t *new_extradata;
3262     HEVCContext *s = avctx->priv_data;
3263
3264     if (!avpkt->size) {
3265         ret = ff_hevc_output_frame(s, data, 1);
3266         if (ret < 0)
3267             return ret;
3268
3269         *got_output = ret;
3270         return 0;
3271     }
3272
3273     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3274                                             &new_extradata_size);
3275     if (new_extradata && new_extradata_size > 0) {
3276         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3277         if (ret < 0)
3278             return ret;
3279     }
3280
3281     s->ref = NULL;
3282     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3283     if (ret < 0)
3284         return ret;
3285
3286     if (avctx->hwaccel) {
3287         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3288             av_log(avctx, AV_LOG_ERROR,
3289                    "hardware accelerator failed to decode picture\n");
3290             ff_hevc_unref_frame(s, s->ref, ~0);
3291             return ret;
3292         }
3293     } else {
3294         /* verify the SEI checksum */
3295         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3296             s->sei.picture_hash.is_md5) {
3297             ret = verify_md5(s, s->ref->frame);
3298             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3299                 ff_hevc_unref_frame(s, s->ref, ~0);
3300                 return ret;
3301             }
3302         }
3303     }
3304     s->sei.picture_hash.is_md5 = 0;
3305
3306     if (s->is_decoded) {
3307         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3308         s->is_decoded = 0;
3309     }
3310
3311     if (s->output_frame->buf[0]) {
3312         av_frame_move_ref(data, s->output_frame);
3313         *got_output = 1;
3314     }
3315
3316     return avpkt->size;
3317 }
3318
3319 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3320 {
3321     int ret;
3322
3323     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3324     if (ret < 0)
3325         return ret;
3326
3327     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3328     if (!dst->tab_mvf_buf)
3329         goto fail;
3330     dst->tab_mvf = src->tab_mvf;
3331
3332     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3333     if (!dst->rpl_tab_buf)
3334         goto fail;
3335     dst->rpl_tab = src->rpl_tab;
3336
3337     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3338     if (!dst->rpl_buf)
3339         goto fail;
3340
3341     dst->poc        = src->poc;
3342     dst->ctb_count  = src->ctb_count;
3343     dst->flags      = src->flags;
3344     dst->sequence   = src->sequence;
3345
3346     if (src->hwaccel_picture_private) {
3347         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3348         if (!dst->hwaccel_priv_buf)
3349             goto fail;
3350         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3351     }
3352
3353     return 0;
3354 fail:
3355     ff_hevc_unref_frame(s, dst, ~0);
3356     return AVERROR(ENOMEM);
3357 }
3358
3359 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3360 {
3361     HEVCContext       *s = avctx->priv_data;
3362     int i;
3363
3364     pic_arrays_free(s);
3365
3366     av_freep(&s->md5_ctx);
3367
3368     av_freep(&s->cabac_state);
3369
3370     for (i = 0; i < 3; i++) {
3371         av_freep(&s->sao_pixel_buffer_h[i]);
3372         av_freep(&s->sao_pixel_buffer_v[i]);
3373     }
3374     av_frame_free(&s->output_frame);
3375
3376     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3377         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3378         av_frame_free(&s->DPB[i].frame);
3379     }
3380
3381     ff_hevc_ps_uninit(&s->ps);
3382
3383     av_freep(&s->sh.entry_point_offset);
3384     av_freep(&s->sh.offset);
3385     av_freep(&s->sh.size);
3386
3387     for (i = 1; i < s->threads_number; i++) {
3388         HEVCLocalContext *lc = s->HEVClcList[i];
3389         if (lc) {
3390             av_freep(&s->HEVClcList[i]);
3391             av_freep(&s->sList[i]);
3392         }
3393     }
3394     if (s->HEVClc == s->HEVClcList[0])
3395         s->HEVClc = NULL;
3396     av_freep(&s->HEVClcList[0]);
3397
3398     ff_h2645_packet_uninit(&s->pkt);
3399
3400     ff_hevc_reset_sei(&s->sei);
3401
3402     return 0;
3403 }
3404
3405 static av_cold int hevc_init_context(AVCodecContext *avctx)
3406 {
3407     HEVCContext *s = avctx->priv_data;
3408     int i;
3409
3410     s->avctx = avctx;
3411
3412     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3413     if (!s->HEVClc)
3414         goto fail;
3415     s->HEVClcList[0] = s->HEVClc;
3416     s->sList[0] = s;
3417
3418     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3419     if (!s->cabac_state)
3420         goto fail;
3421
3422     s->output_frame = av_frame_alloc();
3423     if (!s->output_frame)
3424         goto fail;
3425
3426     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3427         s->DPB[i].frame = av_frame_alloc();
3428         if (!s->DPB[i].frame)
3429             goto fail;
3430         s->DPB[i].tf.f = s->DPB[i].frame;
3431     }
3432
3433     s->max_ra = INT_MAX;
3434
3435     s->md5_ctx = av_md5_alloc();
3436     if (!s->md5_ctx)
3437         goto fail;
3438
3439     ff_bswapdsp_init(&s->bdsp);
3440
3441     s->context_initialized = 1;
3442     s->eos = 0;
3443
3444     ff_hevc_reset_sei(&s->sei);
3445
3446     return 0;
3447
3448 fail:
3449     hevc_decode_free(avctx);
3450     return AVERROR(ENOMEM);
3451 }
3452
3453 #if HAVE_THREADS
3454 static int hevc_update_thread_context(AVCodecContext *dst,
3455                                       const AVCodecContext *src)
3456 {
3457     HEVCContext *s  = dst->priv_data;
3458     HEVCContext *s0 = src->priv_data;
3459     int i, ret;
3460
3461     if (!s->context_initialized) {
3462         ret = hevc_init_context(dst);
3463         if (ret < 0)
3464             return ret;
3465     }
3466
3467     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3468         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3469         if (s0->DPB[i].frame->buf[0]) {
3470             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3471             if (ret < 0)
3472                 return ret;
3473         }
3474     }
3475
3476     if (s->ps.sps != s0->ps.sps)
3477         s->ps.sps = NULL;
3478     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3479         av_buffer_unref(&s->ps.vps_list[i]);
3480         if (s0->ps.vps_list[i]) {
3481             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3482             if (!s->ps.vps_list[i])
3483                 return AVERROR(ENOMEM);
3484         }
3485     }
3486
3487     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3488         av_buffer_unref(&s->ps.sps_list[i]);
3489         if (s0->ps.sps_list[i]) {
3490             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3491             if (!s->ps.sps_list[i])
3492                 return AVERROR(ENOMEM);
3493         }
3494     }
3495
3496     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3497         av_buffer_unref(&s->ps.pps_list[i]);
3498         if (s0->ps.pps_list[i]) {
3499             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3500             if (!s->ps.pps_list[i])
3501                 return AVERROR(ENOMEM);
3502         }
3503     }
3504
3505     if (s->ps.sps != s0->ps.sps)
3506         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3507             return ret;
3508
3509     s->seq_decode = s0->seq_decode;
3510     s->seq_output = s0->seq_output;
3511     s->pocTid0    = s0->pocTid0;
3512     s->max_ra     = s0->max_ra;
3513     s->eos        = s0->eos;
3514     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3515
3516     s->is_nalff        = s0->is_nalff;
3517     s->nal_length_size = s0->nal_length_size;
3518
3519     s->threads_number      = s0->threads_number;
3520     s->threads_type        = s0->threads_type;
3521
3522     if (s0->eos) {
3523         s->seq_decode = (s->seq_decode + 1) & 0xff;
3524         s->max_ra = INT_MAX;
3525     }
3526
3527     av_buffer_unref(&s->sei.a53_caption.buf_ref);
3528     if (s0->sei.a53_caption.buf_ref) {
3529         s->sei.a53_caption.buf_ref = av_buffer_ref(s0->sei.a53_caption.buf_ref);
3530         if (!s->sei.a53_caption.buf_ref)
3531             return AVERROR(ENOMEM);
3532     }
3533
3534     s->sei.frame_packing        = s0->sei.frame_packing;
3535     s->sei.display_orientation  = s0->sei.display_orientation;
3536     s->sei.mastering_display    = s0->sei.mastering_display;
3537     s->sei.content_light        = s0->sei.content_light;
3538     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3539
3540     return 0;
3541 }
3542 #endif
3543
3544 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3545 {
3546     HEVCContext *s = avctx->priv_data;
3547     int ret;
3548
3549     ret = hevc_init_context(avctx);
3550     if (ret < 0)
3551         return ret;
3552
3553     s->enable_parallel_tiles = 0;
3554     s->sei.picture_timing.picture_struct = 0;
3555     s->eos = 1;
3556
3557     atomic_init(&s->wpp_err, 0);
3558
3559     if(avctx->active_thread_type & FF_THREAD_SLICE)
3560         s->threads_number = avctx->thread_count;
3561     else
3562         s->threads_number = 1;
3563
3564     if (!avctx->internal->is_copy) {
3565         if (avctx->extradata_size > 0 && avctx->extradata) {
3566             ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3567             if (ret < 0) {
3568                 hevc_decode_free(avctx);
3569                 return ret;
3570             }
3571         }
3572     }
3573
3574     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3575             s->threads_type = FF_THREAD_FRAME;
3576         else
3577             s->threads_type = FF_THREAD_SLICE;
3578
3579     return 0;
3580 }
3581
3582 static void hevc_decode_flush(AVCodecContext *avctx)
3583 {
3584     HEVCContext *s = avctx->priv_data;
3585     ff_hevc_flush_dpb(s);
3586     ff_hevc_reset_sei(&s->sei);
3587     s->max_ra = INT_MAX;
3588     s->eos = 1;
3589 }
3590
3591 #define OFFSET(x) offsetof(HEVCContext, x)
3592 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3593
3594 static const AVOption options[] = {
3595     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3596         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3597     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3598         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3599     { NULL },
3600 };
3601
3602 static const AVClass hevc_decoder_class = {
3603     .class_name = "HEVC decoder",
3604     .item_name  = av_default_item_name,
3605     .option     = options,
3606     .version    = LIBAVUTIL_VERSION_INT,
3607 };
3608
3609 AVCodec ff_hevc_decoder = {
3610     .name                  = "hevc",
3611     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3612     .type                  = AVMEDIA_TYPE_VIDEO,
3613     .id                    = AV_CODEC_ID_HEVC,
3614     .priv_data_size        = sizeof(HEVCContext),
3615     .priv_class            = &hevc_decoder_class,
3616     .init                  = hevc_decode_init,
3617     .close                 = hevc_decode_free,
3618     .decode                = hevc_decode_frame,
3619     .flush                 = hevc_decode_flush,
3620     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3621     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3622                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3623     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING |
3624                              FF_CODEC_CAP_ALLOCATE_PROGRESS,
3625     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3626     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3627 #if CONFIG_HEVC_DXVA2_HWACCEL
3628                                HWACCEL_DXVA2(hevc),
3629 #endif
3630 #if CONFIG_HEVC_D3D11VA_HWACCEL
3631                                HWACCEL_D3D11VA(hevc),
3632 #endif
3633 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3634                                HWACCEL_D3D11VA2(hevc),
3635 #endif
3636 #if CONFIG_HEVC_NVDEC_HWACCEL
3637                                HWACCEL_NVDEC(hevc),
3638 #endif
3639 #if CONFIG_HEVC_VAAPI_HWACCEL
3640                                HWACCEL_VAAPI(hevc),
3641 #endif
3642 #if CONFIG_HEVC_VDPAU_HWACCEL
3643                                HWACCEL_VDPAU(hevc),
3644 #endif
3645 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3646                                HWACCEL_VIDEOTOOLBOX(hevc),
3647 #endif
3648                                NULL
3649                            },
3650 };