]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
Merge commit '156ea66c91b1986a87916f187216978d686725f6'
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/mastering_display_metadata.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
35
36 #include "bswapdsp.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
39 #include "golomb.h"
40 #include "hevc.h"
41 #include "hevc_data.h"
42 #include "hevc_parse.h"
43 #include "hevcdec.h"
44 #include "hwaccel.h"
45 #include "profiles.h"
46
47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
48
49 /**
50  * NOTE: Each function hls_foo correspond to the function foo in the
51  * specification (HLS stands for High Level Syntax).
52  */
53
54 /**
55  * Section 5.7
56  */
57
58 /* free everything allocated  by pic_arrays_init() */
59 static void pic_arrays_free(HEVCContext *s)
60 {
61     av_freep(&s->sao);
62     av_freep(&s->deblock);
63
64     av_freep(&s->skip_flag);
65     av_freep(&s->tab_ct_depth);
66
67     av_freep(&s->tab_ipm);
68     av_freep(&s->cbf_luma);
69     av_freep(&s->is_pcm);
70
71     av_freep(&s->qp_y_tab);
72     av_freep(&s->tab_slice_address);
73     av_freep(&s->filter_slice_edges);
74
75     av_freep(&s->horizontal_bs);
76     av_freep(&s->vertical_bs);
77
78     av_freep(&s->sh.entry_point_offset);
79     av_freep(&s->sh.size);
80     av_freep(&s->sh.offset);
81
82     av_buffer_pool_uninit(&s->tab_mvf_pool);
83     av_buffer_pool_uninit(&s->rpl_tab_pool);
84 }
85
86 /* allocate arrays that depend on frame dimensions */
87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
88 {
89     int log2_min_cb_size = sps->log2_min_cb_size;
90     int width            = sps->width;
91     int height           = sps->height;
92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
93                            ((height >> log2_min_cb_size) + 1);
94     int ctb_count        = sps->ctb_width * sps->ctb_height;
95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
96
97     s->bs_width  = (width  >> 2) + 1;
98     s->bs_height = (height >> 2) + 1;
99
100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
102     if (!s->sao || !s->deblock)
103         goto fail;
104
105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107     if (!s->skip_flag || !s->tab_ct_depth)
108         goto fail;
109
110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
111     s->tab_ipm  = av_mallocz(min_pu_size);
112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
114         goto fail;
115
116     s->filter_slice_edges = av_mallocz(ctb_count);
117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
118                                       sizeof(*s->tab_slice_address));
119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
120                                       sizeof(*s->qp_y_tab));
121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
122         goto fail;
123
124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
126     if (!s->horizontal_bs || !s->vertical_bs)
127         goto fail;
128
129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
130                                           av_buffer_allocz);
131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
132                                           av_buffer_allocz);
133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
134         goto fail;
135
136     return 0;
137
138 fail:
139     pic_arrays_free(s);
140     return AVERROR(ENOMEM);
141 }
142
143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
144 {
145     int i = 0;
146     int j = 0;
147     uint8_t luma_weight_l0_flag[16];
148     uint8_t chroma_weight_l0_flag[16];
149     uint8_t luma_weight_l1_flag[16];
150     uint8_t chroma_weight_l1_flag[16];
151     int luma_log2_weight_denom;
152
153     luma_log2_weight_denom = get_ue_golomb_long(gb);
154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
156         return AVERROR_INVALIDDATA;
157     }
158     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
159     if (s->ps.sps->chroma_format_idc != 0) {
160         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
161         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
162             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
163             return AVERROR_INVALIDDATA;
164         }
165         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
166     }
167
168     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
169         luma_weight_l0_flag[i] = get_bits1(gb);
170         if (!luma_weight_l0_flag[i]) {
171             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
172             s->sh.luma_offset_l0[i] = 0;
173         }
174     }
175     if (s->ps.sps->chroma_format_idc != 0) {
176         for (i = 0; i < s->sh.nb_refs[L0]; i++)
177             chroma_weight_l0_flag[i] = get_bits1(gb);
178     } else {
179         for (i = 0; i < s->sh.nb_refs[L0]; i++)
180             chroma_weight_l0_flag[i] = 0;
181     }
182     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
183         if (luma_weight_l0_flag[i]) {
184             int delta_luma_weight_l0 = get_se_golomb(gb);
185             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
186             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
187         }
188         if (chroma_weight_l0_flag[i]) {
189             for (j = 0; j < 2; j++) {
190                 int delta_chroma_weight_l0 = get_se_golomb(gb);
191                 int delta_chroma_offset_l0 = get_se_golomb(gb);
192
193                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
194                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
195                     return AVERROR_INVALIDDATA;
196                 }
197
198                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
199                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
200                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
201             }
202         } else {
203             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
204             s->sh.chroma_offset_l0[i][0] = 0;
205             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
206             s->sh.chroma_offset_l0[i][1] = 0;
207         }
208     }
209     if (s->sh.slice_type == HEVC_SLICE_B) {
210         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
211             luma_weight_l1_flag[i] = get_bits1(gb);
212             if (!luma_weight_l1_flag[i]) {
213                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
214                 s->sh.luma_offset_l1[i] = 0;
215             }
216         }
217         if (s->ps.sps->chroma_format_idc != 0) {
218             for (i = 0; i < s->sh.nb_refs[L1]; i++)
219                 chroma_weight_l1_flag[i] = get_bits1(gb);
220         } else {
221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
222                 chroma_weight_l1_flag[i] = 0;
223         }
224         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
225             if (luma_weight_l1_flag[i]) {
226                 int delta_luma_weight_l1 = get_se_golomb(gb);
227                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
228                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
229             }
230             if (chroma_weight_l1_flag[i]) {
231                 for (j = 0; j < 2; j++) {
232                     int delta_chroma_weight_l1 = get_se_golomb(gb);
233                     int delta_chroma_offset_l1 = get_se_golomb(gb);
234
235                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
236                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
237                         return AVERROR_INVALIDDATA;
238                     }
239
240                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
241                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
242                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
243                 }
244             } else {
245                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
246                 s->sh.chroma_offset_l1[i][0] = 0;
247                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
248                 s->sh.chroma_offset_l1[i][1] = 0;
249             }
250         }
251     }
252     return 0;
253 }
254
255 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
256 {
257     const HEVCSPS *sps = s->ps.sps;
258     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
259     int prev_delta_msb = 0;
260     unsigned int nb_sps = 0, nb_sh;
261     int i;
262
263     rps->nb_refs = 0;
264     if (!sps->long_term_ref_pics_present_flag)
265         return 0;
266
267     if (sps->num_long_term_ref_pics_sps > 0)
268         nb_sps = get_ue_golomb_long(gb);
269     nb_sh = get_ue_golomb_long(gb);
270
271     if (nb_sps > sps->num_long_term_ref_pics_sps)
272         return AVERROR_INVALIDDATA;
273     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
274         return AVERROR_INVALIDDATA;
275
276     rps->nb_refs = nb_sh + nb_sps;
277
278     for (i = 0; i < rps->nb_refs; i++) {
279         uint8_t delta_poc_msb_present;
280
281         if (i < nb_sps) {
282             uint8_t lt_idx_sps = 0;
283
284             if (sps->num_long_term_ref_pics_sps > 1)
285                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
286
287             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
288             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
289         } else {
290             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
291             rps->used[i] = get_bits1(gb);
292         }
293
294         delta_poc_msb_present = get_bits1(gb);
295         if (delta_poc_msb_present) {
296             int64_t delta = get_ue_golomb_long(gb);
297             int64_t poc;
298
299             if (i && i != nb_sps)
300                 delta += prev_delta_msb;
301
302             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
303             if (poc != (int32_t)poc)
304                 return AVERROR_INVALIDDATA;
305             rps->poc[i] = poc;
306             prev_delta_msb = delta;
307         }
308     }
309
310     return 0;
311 }
312
313 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
314                                  const HEVCSPS *sps)
315 {
316     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
317     const HEVCWindow *ow = &sps->output_window;
318     unsigned int num = 0, den = 0;
319
320     avctx->pix_fmt             = sps->pix_fmt;
321     avctx->coded_width         = sps->width;
322     avctx->coded_height        = sps->height;
323     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
324     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
325     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
326     avctx->profile             = sps->ptl.general_ptl.profile_idc;
327     avctx->level               = sps->ptl.general_ptl.level_idc;
328
329     ff_set_sar(avctx, sps->vui.sar);
330
331     if (sps->vui.video_signal_type_present_flag)
332         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
333                                                             : AVCOL_RANGE_MPEG;
334     else
335         avctx->color_range = AVCOL_RANGE_MPEG;
336
337     if (sps->vui.colour_description_present_flag) {
338         avctx->color_primaries = sps->vui.colour_primaries;
339         avctx->color_trc       = sps->vui.transfer_characteristic;
340         avctx->colorspace      = sps->vui.matrix_coeffs;
341     } else {
342         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
343         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
344         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
345     }
346
347     if (vps->vps_timing_info_present_flag) {
348         num = vps->vps_num_units_in_tick;
349         den = vps->vps_time_scale;
350     } else if (sps->vui.vui_timing_info_present_flag) {
351         num = sps->vui.vui_num_units_in_tick;
352         den = sps->vui.vui_time_scale;
353     }
354
355     if (num != 0 && den != 0)
356         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
357                   num, den, 1 << 30);
358 }
359
360 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
361 {
362 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
363                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
364                      CONFIG_HEVC_NVDEC_HWACCEL + \
365                      CONFIG_HEVC_VAAPI_HWACCEL + \
366                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
367                      CONFIG_HEVC_VDPAU_HWACCEL)
368     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
369
370     switch (sps->pix_fmt) {
371     case AV_PIX_FMT_YUV420P:
372     case AV_PIX_FMT_YUVJ420P:
373 #if CONFIG_HEVC_DXVA2_HWACCEL
374         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
375 #endif
376 #if CONFIG_HEVC_D3D11VA_HWACCEL
377         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
378         *fmt++ = AV_PIX_FMT_D3D11;
379 #endif
380 #if CONFIG_HEVC_VAAPI_HWACCEL
381         *fmt++ = AV_PIX_FMT_VAAPI;
382 #endif
383 #if CONFIG_HEVC_VDPAU_HWACCEL
384         *fmt++ = AV_PIX_FMT_VDPAU;
385 #endif
386 #if CONFIG_HEVC_NVDEC_HWACCEL
387         *fmt++ = AV_PIX_FMT_CUDA;
388 #endif
389 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
390         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
391 #endif
392         break;
393     case AV_PIX_FMT_YUV420P10:
394 #if CONFIG_HEVC_DXVA2_HWACCEL
395         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
396 #endif
397 #if CONFIG_HEVC_D3D11VA_HWACCEL
398         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
399         *fmt++ = AV_PIX_FMT_D3D11;
400 #endif
401 #if CONFIG_HEVC_VAAPI_HWACCEL
402         *fmt++ = AV_PIX_FMT_VAAPI;
403 #endif
404 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
405         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
406 #endif
407 #if CONFIG_HEVC_NVDEC_HWACCEL
408         *fmt++ = AV_PIX_FMT_CUDA;
409 #endif
410         break;
411     case AV_PIX_FMT_YUV420P12:
412     case AV_PIX_FMT_YUV444P:
413     case AV_PIX_FMT_YUV444P10:
414     case AV_PIX_FMT_YUV444P12:
415 #if CONFIG_HEVC_NVDEC_HWACCEL
416         *fmt++ = AV_PIX_FMT_CUDA;
417 #endif
418         break;
419     }
420
421     *fmt++ = sps->pix_fmt;
422     *fmt = AV_PIX_FMT_NONE;
423
424     return ff_thread_get_format(s->avctx, pix_fmts);
425 }
426
427 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
428                    enum AVPixelFormat pix_fmt)
429 {
430     int ret, i;
431
432     pic_arrays_free(s);
433     s->ps.sps = NULL;
434     s->ps.vps = NULL;
435
436     if (!sps)
437         return 0;
438
439     ret = pic_arrays_init(s, sps);
440     if (ret < 0)
441         goto fail;
442
443     export_stream_params(s->avctx, &s->ps, sps);
444
445     s->avctx->pix_fmt = pix_fmt;
446
447     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
448     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
449     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
450
451     for (i = 0; i < 3; i++) {
452         av_freep(&s->sao_pixel_buffer_h[i]);
453         av_freep(&s->sao_pixel_buffer_v[i]);
454     }
455
456     if (sps->sao_enabled && !s->avctx->hwaccel) {
457         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
458         int c_idx;
459
460         for(c_idx = 0; c_idx < c_count; c_idx++) {
461             int w = sps->width >> sps->hshift[c_idx];
462             int h = sps->height >> sps->vshift[c_idx];
463             s->sao_pixel_buffer_h[c_idx] =
464                 av_malloc((w * 2 * sps->ctb_height) <<
465                           sps->pixel_shift);
466             s->sao_pixel_buffer_v[c_idx] =
467                 av_malloc((h * 2 * sps->ctb_width) <<
468                           sps->pixel_shift);
469         }
470     }
471
472     s->ps.sps = sps;
473     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
474
475     return 0;
476
477 fail:
478     pic_arrays_free(s);
479     s->ps.sps = NULL;
480     return ret;
481 }
482
483 static int hls_slice_header(HEVCContext *s)
484 {
485     GetBitContext *gb = &s->HEVClc->gb;
486     SliceHeader *sh   = &s->sh;
487     int i, ret;
488
489     // Coded parameters
490     sh->first_slice_in_pic_flag = get_bits1(gb);
491     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
492         s->seq_decode = (s->seq_decode + 1) & 0xff;
493         s->max_ra     = INT_MAX;
494         if (IS_IDR(s))
495             ff_hevc_clear_refs(s);
496     }
497     sh->no_output_of_prior_pics_flag = 0;
498     if (IS_IRAP(s))
499         sh->no_output_of_prior_pics_flag = get_bits1(gb);
500
501     sh->pps_id = get_ue_golomb_long(gb);
502     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
503         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
504         return AVERROR_INVALIDDATA;
505     }
506     if (!sh->first_slice_in_pic_flag &&
507         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
508         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
509         return AVERROR_INVALIDDATA;
510     }
511     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
512     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
513         sh->no_output_of_prior_pics_flag = 1;
514
515     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
516         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
517         const HEVCSPS *last_sps = s->ps.sps;
518         enum AVPixelFormat pix_fmt;
519
520         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
521             if (sps->width != last_sps->width || sps->height != last_sps->height ||
522                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
523                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
524                 sh->no_output_of_prior_pics_flag = 0;
525         }
526         ff_hevc_clear_refs(s);
527
528         ret = set_sps(s, sps, sps->pix_fmt);
529         if (ret < 0)
530             return ret;
531
532         pix_fmt = get_format(s, sps);
533         if (pix_fmt < 0)
534             return pix_fmt;
535         s->avctx->pix_fmt = pix_fmt;
536
537         s->seq_decode = (s->seq_decode + 1) & 0xff;
538         s->max_ra     = INT_MAX;
539     }
540
541     sh->dependent_slice_segment_flag = 0;
542     if (!sh->first_slice_in_pic_flag) {
543         int slice_address_length;
544
545         if (s->ps.pps->dependent_slice_segments_enabled_flag)
546             sh->dependent_slice_segment_flag = get_bits1(gb);
547
548         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
549                                             s->ps.sps->ctb_height);
550         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
551         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
552             av_log(s->avctx, AV_LOG_ERROR,
553                    "Invalid slice segment address: %u.\n",
554                    sh->slice_segment_addr);
555             return AVERROR_INVALIDDATA;
556         }
557
558         if (!sh->dependent_slice_segment_flag) {
559             sh->slice_addr = sh->slice_segment_addr;
560             s->slice_idx++;
561         }
562     } else {
563         sh->slice_segment_addr = sh->slice_addr = 0;
564         s->slice_idx           = 0;
565         s->slice_initialized   = 0;
566     }
567
568     if (!sh->dependent_slice_segment_flag) {
569         s->slice_initialized = 0;
570
571         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
572             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
573
574         sh->slice_type = get_ue_golomb_long(gb);
575         if (!(sh->slice_type == HEVC_SLICE_I ||
576               sh->slice_type == HEVC_SLICE_P ||
577               sh->slice_type == HEVC_SLICE_B)) {
578             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
579                    sh->slice_type);
580             return AVERROR_INVALIDDATA;
581         }
582         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
583             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
584             return AVERROR_INVALIDDATA;
585         }
586
587         // when flag is not present, picture is inferred to be output
588         sh->pic_output_flag = 1;
589         if (s->ps.pps->output_flag_present_flag)
590             sh->pic_output_flag = get_bits1(gb);
591
592         if (s->ps.sps->separate_colour_plane_flag)
593             sh->colour_plane_id = get_bits(gb, 2);
594
595         if (!IS_IDR(s)) {
596             int poc, pos;
597
598             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
599             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
600             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
601                 av_log(s->avctx, AV_LOG_WARNING,
602                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
603                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
604                     return AVERROR_INVALIDDATA;
605                 poc = s->poc;
606             }
607             s->poc = poc;
608
609             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
610             pos = get_bits_left(gb);
611             if (!sh->short_term_ref_pic_set_sps_flag) {
612                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
613                 if (ret < 0)
614                     return ret;
615
616                 sh->short_term_rps = &sh->slice_rps;
617             } else {
618                 int numbits, rps_idx;
619
620                 if (!s->ps.sps->nb_st_rps) {
621                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
622                     return AVERROR_INVALIDDATA;
623                 }
624
625                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
626                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
627                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
628             }
629             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
630
631             pos = get_bits_left(gb);
632             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
633             if (ret < 0) {
634                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
635                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
636                     return AVERROR_INVALIDDATA;
637             }
638             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
639
640             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
641                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
642             else
643                 sh->slice_temporal_mvp_enabled_flag = 0;
644         } else {
645             s->sh.short_term_rps = NULL;
646             s->poc               = 0;
647         }
648
649         /* 8.3.1 */
650         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
651             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
652             s->nal_unit_type != HEVC_NAL_TSA_N   &&
653             s->nal_unit_type != HEVC_NAL_STSA_N  &&
654             s->nal_unit_type != HEVC_NAL_RADL_N  &&
655             s->nal_unit_type != HEVC_NAL_RADL_R  &&
656             s->nal_unit_type != HEVC_NAL_RASL_N  &&
657             s->nal_unit_type != HEVC_NAL_RASL_R)
658             s->pocTid0 = s->poc;
659
660         if (s->ps.sps->sao_enabled) {
661             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
662             if (s->ps.sps->chroma_format_idc) {
663                 sh->slice_sample_adaptive_offset_flag[1] =
664                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
665             }
666         } else {
667             sh->slice_sample_adaptive_offset_flag[0] = 0;
668             sh->slice_sample_adaptive_offset_flag[1] = 0;
669             sh->slice_sample_adaptive_offset_flag[2] = 0;
670         }
671
672         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
673         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
674             int nb_refs;
675
676             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
677             if (sh->slice_type == HEVC_SLICE_B)
678                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
679
680             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
681                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
682                 if (sh->slice_type == HEVC_SLICE_B)
683                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
684             }
685             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
686                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
687                        sh->nb_refs[L0], sh->nb_refs[L1]);
688                 return AVERROR_INVALIDDATA;
689             }
690
691             sh->rpl_modification_flag[0] = 0;
692             sh->rpl_modification_flag[1] = 0;
693             nb_refs = ff_hevc_frame_nb_refs(s);
694             if (!nb_refs) {
695                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
696                 return AVERROR_INVALIDDATA;
697             }
698
699             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
700                 sh->rpl_modification_flag[0] = get_bits1(gb);
701                 if (sh->rpl_modification_flag[0]) {
702                     for (i = 0; i < sh->nb_refs[L0]; i++)
703                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
704                 }
705
706                 if (sh->slice_type == HEVC_SLICE_B) {
707                     sh->rpl_modification_flag[1] = get_bits1(gb);
708                     if (sh->rpl_modification_flag[1] == 1)
709                         for (i = 0; i < sh->nb_refs[L1]; i++)
710                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
711                 }
712             }
713
714             if (sh->slice_type == HEVC_SLICE_B)
715                 sh->mvd_l1_zero_flag = get_bits1(gb);
716
717             if (s->ps.pps->cabac_init_present_flag)
718                 sh->cabac_init_flag = get_bits1(gb);
719             else
720                 sh->cabac_init_flag = 0;
721
722             sh->collocated_ref_idx = 0;
723             if (sh->slice_temporal_mvp_enabled_flag) {
724                 sh->collocated_list = L0;
725                 if (sh->slice_type == HEVC_SLICE_B)
726                     sh->collocated_list = !get_bits1(gb);
727
728                 if (sh->nb_refs[sh->collocated_list] > 1) {
729                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
730                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
731                         av_log(s->avctx, AV_LOG_ERROR,
732                                "Invalid collocated_ref_idx: %d.\n",
733                                sh->collocated_ref_idx);
734                         return AVERROR_INVALIDDATA;
735                     }
736                 }
737             }
738
739             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
740                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
741                 int ret = pred_weight_table(s, gb);
742                 if (ret < 0)
743                     return ret;
744             }
745
746             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
747             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
748                 av_log(s->avctx, AV_LOG_ERROR,
749                        "Invalid number of merging MVP candidates: %d.\n",
750                        sh->max_num_merge_cand);
751                 return AVERROR_INVALIDDATA;
752             }
753         }
754
755         sh->slice_qp_delta = get_se_golomb(gb);
756
757         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
758             sh->slice_cb_qp_offset = get_se_golomb(gb);
759             sh->slice_cr_qp_offset = get_se_golomb(gb);
760         } else {
761             sh->slice_cb_qp_offset = 0;
762             sh->slice_cr_qp_offset = 0;
763         }
764
765         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
766             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
767         else
768             sh->cu_chroma_qp_offset_enabled_flag = 0;
769
770         if (s->ps.pps->deblocking_filter_control_present_flag) {
771             int deblocking_filter_override_flag = 0;
772
773             if (s->ps.pps->deblocking_filter_override_enabled_flag)
774                 deblocking_filter_override_flag = get_bits1(gb);
775
776             if (deblocking_filter_override_flag) {
777                 sh->disable_deblocking_filter_flag = get_bits1(gb);
778                 if (!sh->disable_deblocking_filter_flag) {
779                     int beta_offset_div2 = get_se_golomb(gb);
780                     int tc_offset_div2   = get_se_golomb(gb) ;
781                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
782                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
783                         av_log(s->avctx, AV_LOG_ERROR,
784                             "Invalid deblock filter offsets: %d, %d\n",
785                             beta_offset_div2, tc_offset_div2);
786                         return AVERROR_INVALIDDATA;
787                     }
788                     sh->beta_offset = beta_offset_div2 * 2;
789                     sh->tc_offset   =   tc_offset_div2 * 2;
790                 }
791             } else {
792                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
793                 sh->beta_offset                    = s->ps.pps->beta_offset;
794                 sh->tc_offset                      = s->ps.pps->tc_offset;
795             }
796         } else {
797             sh->disable_deblocking_filter_flag = 0;
798             sh->beta_offset                    = 0;
799             sh->tc_offset                      = 0;
800         }
801
802         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
803             (sh->slice_sample_adaptive_offset_flag[0] ||
804              sh->slice_sample_adaptive_offset_flag[1] ||
805              !sh->disable_deblocking_filter_flag)) {
806             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
807         } else {
808             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
809         }
810     } else if (!s->slice_initialized) {
811         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
812         return AVERROR_INVALIDDATA;
813     }
814
815     sh->num_entry_point_offsets = 0;
816     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
817         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
818         // It would be possible to bound this tighter but this here is simpler
819         if (num_entry_point_offsets > get_bits_left(gb)) {
820             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
821             return AVERROR_INVALIDDATA;
822         }
823
824         sh->num_entry_point_offsets = num_entry_point_offsets;
825         if (sh->num_entry_point_offsets > 0) {
826             int offset_len = get_ue_golomb_long(gb) + 1;
827
828             if (offset_len < 1 || offset_len > 32) {
829                 sh->num_entry_point_offsets = 0;
830                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
831                 return AVERROR_INVALIDDATA;
832             }
833
834             av_freep(&sh->entry_point_offset);
835             av_freep(&sh->offset);
836             av_freep(&sh->size);
837             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
838             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
839             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
840             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
841                 sh->num_entry_point_offsets = 0;
842                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
843                 return AVERROR(ENOMEM);
844             }
845             for (i = 0; i < sh->num_entry_point_offsets; i++) {
846                 unsigned val = get_bits_long(gb, offset_len);
847                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
848             }
849             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
850                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
851                 s->threads_number = 1;
852             } else
853                 s->enable_parallel_tiles = 0;
854         } else
855             s->enable_parallel_tiles = 0;
856     }
857
858     if (s->ps.pps->slice_header_extension_present_flag) {
859         unsigned int length = get_ue_golomb_long(gb);
860         if (length*8LL > get_bits_left(gb)) {
861             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
862             return AVERROR_INVALIDDATA;
863         }
864         for (i = 0; i < length; i++)
865             skip_bits(gb, 8);  // slice_header_extension_data_byte
866     }
867
868     // Inferred parameters
869     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
870     if (sh->slice_qp > 51 ||
871         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
872         av_log(s->avctx, AV_LOG_ERROR,
873                "The slice_qp %d is outside the valid range "
874                "[%d, 51].\n",
875                sh->slice_qp,
876                -s->ps.sps->qp_bd_offset);
877         return AVERROR_INVALIDDATA;
878     }
879
880     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
881
882     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
883         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
884         return AVERROR_INVALIDDATA;
885     }
886
887     if (get_bits_left(gb) < 0) {
888         av_log(s->avctx, AV_LOG_ERROR,
889                "Overread slice header by %d bits\n", -get_bits_left(gb));
890         return AVERROR_INVALIDDATA;
891     }
892
893     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
894
895     if (!s->ps.pps->cu_qp_delta_enabled_flag)
896         s->HEVClc->qp_y = s->sh.slice_qp;
897
898     s->slice_initialized = 1;
899     s->HEVClc->tu.cu_qp_offset_cb = 0;
900     s->HEVClc->tu.cu_qp_offset_cr = 0;
901
902     return 0;
903 }
904
905 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
906
907 #define SET_SAO(elem, value)                            \
908 do {                                                    \
909     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
910         sao->elem = value;                              \
911     else if (sao_merge_left_flag)                       \
912         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
913     else if (sao_merge_up_flag)                         \
914         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
915     else                                                \
916         sao->elem = 0;                                  \
917 } while (0)
918
919 static void hls_sao_param(HEVCContext *s, int rx, int ry)
920 {
921     HEVCLocalContext *lc    = s->HEVClc;
922     int sao_merge_left_flag = 0;
923     int sao_merge_up_flag   = 0;
924     SAOParams *sao          = &CTB(s->sao, rx, ry);
925     int c_idx, i;
926
927     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
928         s->sh.slice_sample_adaptive_offset_flag[1]) {
929         if (rx > 0) {
930             if (lc->ctb_left_flag)
931                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
932         }
933         if (ry > 0 && !sao_merge_left_flag) {
934             if (lc->ctb_up_flag)
935                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
936         }
937     }
938
939     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
940         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
941                                                  s->ps.pps->log2_sao_offset_scale_chroma;
942
943         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
944             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
945             continue;
946         }
947
948         if (c_idx == 2) {
949             sao->type_idx[2] = sao->type_idx[1];
950             sao->eo_class[2] = sao->eo_class[1];
951         } else {
952             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
953         }
954
955         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
956             continue;
957
958         for (i = 0; i < 4; i++)
959             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
960
961         if (sao->type_idx[c_idx] == SAO_BAND) {
962             for (i = 0; i < 4; i++) {
963                 if (sao->offset_abs[c_idx][i]) {
964                     SET_SAO(offset_sign[c_idx][i],
965                             ff_hevc_sao_offset_sign_decode(s));
966                 } else {
967                     sao->offset_sign[c_idx][i] = 0;
968                 }
969             }
970             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
971         } else if (c_idx != 2) {
972             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
973         }
974
975         // Inferred parameters
976         sao->offset_val[c_idx][0] = 0;
977         for (i = 0; i < 4; i++) {
978             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
979             if (sao->type_idx[c_idx] == SAO_EDGE) {
980                 if (i > 1)
981                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
982             } else if (sao->offset_sign[c_idx][i]) {
983                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
984             }
985             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
986         }
987     }
988 }
989
990 #undef SET_SAO
991 #undef CTB
992
993 static int hls_cross_component_pred(HEVCContext *s, int idx) {
994     HEVCLocalContext *lc    = s->HEVClc;
995     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
996
997     if (log2_res_scale_abs_plus1 !=  0) {
998         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
999         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1000                                (1 - 2 * res_scale_sign_flag);
1001     } else {
1002         lc->tu.res_scale_val = 0;
1003     }
1004
1005
1006     return 0;
1007 }
1008
1009 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1010                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1011                               int log2_cb_size, int log2_trafo_size,
1012                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1013 {
1014     HEVCLocalContext *lc = s->HEVClc;
1015     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1016     int i;
1017
1018     if (lc->cu.pred_mode == MODE_INTRA) {
1019         int trafo_size = 1 << log2_trafo_size;
1020         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1021
1022         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1023     }
1024
1025     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1026         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1027         int scan_idx   = SCAN_DIAG;
1028         int scan_idx_c = SCAN_DIAG;
1029         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1030                          (s->ps.sps->chroma_format_idc == 2 &&
1031                          (cbf_cb[1] || cbf_cr[1]));
1032
1033         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1034             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1035             if (lc->tu.cu_qp_delta != 0)
1036                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1037                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1038             lc->tu.is_cu_qp_delta_coded = 1;
1039
1040             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1041                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1042                 av_log(s->avctx, AV_LOG_ERROR,
1043                        "The cu_qp_delta %d is outside the valid range "
1044                        "[%d, %d].\n",
1045                        lc->tu.cu_qp_delta,
1046                        -(26 + s->ps.sps->qp_bd_offset / 2),
1047                         (25 + s->ps.sps->qp_bd_offset / 2));
1048                 return AVERROR_INVALIDDATA;
1049             }
1050
1051             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1052         }
1053
1054         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1055             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1056             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1057             if (cu_chroma_qp_offset_flag) {
1058                 int cu_chroma_qp_offset_idx  = 0;
1059                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1060                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1061                     av_log(s->avctx, AV_LOG_ERROR,
1062                         "cu_chroma_qp_offset_idx not yet tested.\n");
1063                 }
1064                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1065                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1066             } else {
1067                 lc->tu.cu_qp_offset_cb = 0;
1068                 lc->tu.cu_qp_offset_cr = 0;
1069             }
1070             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1071         }
1072
1073         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1074             if (lc->tu.intra_pred_mode >= 6 &&
1075                 lc->tu.intra_pred_mode <= 14) {
1076                 scan_idx = SCAN_VERT;
1077             } else if (lc->tu.intra_pred_mode >= 22 &&
1078                        lc->tu.intra_pred_mode <= 30) {
1079                 scan_idx = SCAN_HORIZ;
1080             }
1081
1082             if (lc->tu.intra_pred_mode_c >=  6 &&
1083                 lc->tu.intra_pred_mode_c <= 14) {
1084                 scan_idx_c = SCAN_VERT;
1085             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1086                        lc->tu.intra_pred_mode_c <= 30) {
1087                 scan_idx_c = SCAN_HORIZ;
1088             }
1089         }
1090
1091         lc->tu.cross_pf = 0;
1092
1093         if (cbf_luma)
1094             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1095         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1096             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1097             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1098             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1099                                 (lc->cu.pred_mode == MODE_INTER ||
1100                                  (lc->tu.chroma_mode_c ==  4)));
1101
1102             if (lc->tu.cross_pf) {
1103                 hls_cross_component_pred(s, 0);
1104             }
1105             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1106                 if (lc->cu.pred_mode == MODE_INTRA) {
1107                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1108                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1109                 }
1110                 if (cbf_cb[i])
1111                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1112                                                 log2_trafo_size_c, scan_idx_c, 1);
1113                 else
1114                     if (lc->tu.cross_pf) {
1115                         ptrdiff_t stride = s->frame->linesize[1];
1116                         int hshift = s->ps.sps->hshift[1];
1117                         int vshift = s->ps.sps->vshift[1];
1118                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1119                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1120                         int size = 1 << log2_trafo_size_c;
1121
1122                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1123                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1124                         for (i = 0; i < (size * size); i++) {
1125                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1126                         }
1127                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1128                     }
1129             }
1130
1131             if (lc->tu.cross_pf) {
1132                 hls_cross_component_pred(s, 1);
1133             }
1134             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1135                 if (lc->cu.pred_mode == MODE_INTRA) {
1136                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1137                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1138                 }
1139                 if (cbf_cr[i])
1140                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1141                                                 log2_trafo_size_c, scan_idx_c, 2);
1142                 else
1143                     if (lc->tu.cross_pf) {
1144                         ptrdiff_t stride = s->frame->linesize[2];
1145                         int hshift = s->ps.sps->hshift[2];
1146                         int vshift = s->ps.sps->vshift[2];
1147                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1148                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1149                         int size = 1 << log2_trafo_size_c;
1150
1151                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1152                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1153                         for (i = 0; i < (size * size); i++) {
1154                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1155                         }
1156                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1157                     }
1158             }
1159         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1160             int trafo_size_h = 1 << (log2_trafo_size + 1);
1161             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1162             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1163                 if (lc->cu.pred_mode == MODE_INTRA) {
1164                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1165                                                     trafo_size_h, trafo_size_v);
1166                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1167                 }
1168                 if (cbf_cb[i])
1169                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1170                                                 log2_trafo_size, scan_idx_c, 1);
1171             }
1172             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1173                 if (lc->cu.pred_mode == MODE_INTRA) {
1174                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1175                                                 trafo_size_h, trafo_size_v);
1176                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1177                 }
1178                 if (cbf_cr[i])
1179                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1180                                                 log2_trafo_size, scan_idx_c, 2);
1181             }
1182         }
1183     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1184         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1185             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1186             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1187             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1188             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1189             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1190             if (s->ps.sps->chroma_format_idc == 2) {
1191                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1192                                                 trafo_size_h, trafo_size_v);
1193                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1194                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1195             }
1196         } else if (blk_idx == 3) {
1197             int trafo_size_h = 1 << (log2_trafo_size + 1);
1198             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1199             ff_hevc_set_neighbour_available(s, xBase, yBase,
1200                                             trafo_size_h, trafo_size_v);
1201             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1202             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1203             if (s->ps.sps->chroma_format_idc == 2) {
1204                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1205                                                 trafo_size_h, trafo_size_v);
1206                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1207                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1208             }
1209         }
1210     }
1211
1212     return 0;
1213 }
1214
1215 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1216 {
1217     int cb_size          = 1 << log2_cb_size;
1218     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1219
1220     int min_pu_width     = s->ps.sps->min_pu_width;
1221     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1222     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1223     int i, j;
1224
1225     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1226         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1227             s->is_pcm[i + j * min_pu_width] = 2;
1228 }
1229
1230 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1231                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1232                               int log2_cb_size, int log2_trafo_size,
1233                               int trafo_depth, int blk_idx,
1234                               const int *base_cbf_cb, const int *base_cbf_cr)
1235 {
1236     HEVCLocalContext *lc = s->HEVClc;
1237     uint8_t split_transform_flag;
1238     int cbf_cb[2];
1239     int cbf_cr[2];
1240     int ret;
1241
1242     cbf_cb[0] = base_cbf_cb[0];
1243     cbf_cb[1] = base_cbf_cb[1];
1244     cbf_cr[0] = base_cbf_cr[0];
1245     cbf_cr[1] = base_cbf_cr[1];
1246
1247     if (lc->cu.intra_split_flag) {
1248         if (trafo_depth == 1) {
1249             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1250             if (s->ps.sps->chroma_format_idc == 3) {
1251                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1252                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1253             } else {
1254                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1255                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1256             }
1257         }
1258     } else {
1259         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1260         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1261         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1262     }
1263
1264     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1265         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1266         trafo_depth     < lc->cu.max_trafo_depth       &&
1267         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1268         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1269     } else {
1270         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1271                           lc->cu.pred_mode == MODE_INTER &&
1272                           lc->cu.part_mode != PART_2Nx2N &&
1273                           trafo_depth == 0;
1274
1275         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1276                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1277                                inter_split;
1278     }
1279
1280     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1281         if (trafo_depth == 0 || cbf_cb[0]) {
1282             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1283             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1284                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1285             }
1286         }
1287
1288         if (trafo_depth == 0 || cbf_cr[0]) {
1289             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1290             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1291                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1292             }
1293         }
1294     }
1295
1296     if (split_transform_flag) {
1297         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1298         const int x1 = x0 + trafo_size_split;
1299         const int y1 = y0 + trafo_size_split;
1300
1301 #define SUBDIVIDE(x, y, idx)                                                    \
1302 do {                                                                            \
1303     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1304                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1305                              cbf_cb, cbf_cr);                                   \
1306     if (ret < 0)                                                                \
1307         return ret;                                                             \
1308 } while (0)
1309
1310         SUBDIVIDE(x0, y0, 0);
1311         SUBDIVIDE(x1, y0, 1);
1312         SUBDIVIDE(x0, y1, 2);
1313         SUBDIVIDE(x1, y1, 3);
1314
1315 #undef SUBDIVIDE
1316     } else {
1317         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1318         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1319         int min_tu_width     = s->ps.sps->min_tb_width;
1320         int cbf_luma         = 1;
1321
1322         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1323             cbf_cb[0] || cbf_cr[0] ||
1324             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1325             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1326         }
1327
1328         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1329                                  log2_cb_size, log2_trafo_size,
1330                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1331         if (ret < 0)
1332             return ret;
1333         // TODO: store cbf_luma somewhere else
1334         if (cbf_luma) {
1335             int i, j;
1336             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1337                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1338                     int x_tu = (x0 + j) >> log2_min_tu_size;
1339                     int y_tu = (y0 + i) >> log2_min_tu_size;
1340                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1341                 }
1342         }
1343         if (!s->sh.disable_deblocking_filter_flag) {
1344             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1345             if (s->ps.pps->transquant_bypass_enable_flag &&
1346                 lc->cu.cu_transquant_bypass_flag)
1347                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1348         }
1349     }
1350     return 0;
1351 }
1352
1353 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1354 {
1355     HEVCLocalContext *lc = s->HEVClc;
1356     GetBitContext gb;
1357     int cb_size   = 1 << log2_cb_size;
1358     ptrdiff_t stride0 = s->frame->linesize[0];
1359     ptrdiff_t stride1 = s->frame->linesize[1];
1360     ptrdiff_t stride2 = s->frame->linesize[2];
1361     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1362     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1363     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1364
1365     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1366                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1367                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1368                           s->ps.sps->pcm.bit_depth_chroma;
1369     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1370     int ret;
1371
1372     if (!s->sh.disable_deblocking_filter_flag)
1373         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1374
1375     ret = init_get_bits(&gb, pcm, length);
1376     if (ret < 0)
1377         return ret;
1378
1379     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1380     if (s->ps.sps->chroma_format_idc) {
1381         s->hevcdsp.put_pcm(dst1, stride1,
1382                            cb_size >> s->ps.sps->hshift[1],
1383                            cb_size >> s->ps.sps->vshift[1],
1384                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1385         s->hevcdsp.put_pcm(dst2, stride2,
1386                            cb_size >> s->ps.sps->hshift[2],
1387                            cb_size >> s->ps.sps->vshift[2],
1388                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1389     }
1390
1391     return 0;
1392 }
1393
1394 /**
1395  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1396  *
1397  * @param s HEVC decoding context
1398  * @param dst target buffer for block data at block position
1399  * @param dststride stride of the dst buffer
1400  * @param ref reference picture buffer at origin (0, 0)
1401  * @param mv motion vector (relative to block position) to get pixel data from
1402  * @param x_off horizontal position of block from origin (0, 0)
1403  * @param y_off vertical position of block from origin (0, 0)
1404  * @param block_w width of block
1405  * @param block_h height of block
1406  * @param luma_weight weighting factor applied to the luma prediction
1407  * @param luma_offset additive offset applied to the luma prediction value
1408  */
1409
1410 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1411                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1412                         int block_w, int block_h, int luma_weight, int luma_offset)
1413 {
1414     HEVCLocalContext *lc = s->HEVClc;
1415     uint8_t *src         = ref->data[0];
1416     ptrdiff_t srcstride  = ref->linesize[0];
1417     int pic_width        = s->ps.sps->width;
1418     int pic_height       = s->ps.sps->height;
1419     int mx               = mv->x & 3;
1420     int my               = mv->y & 3;
1421     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1422                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1423     int idx              = ff_hevc_pel_weight[block_w];
1424
1425     x_off += mv->x >> 2;
1426     y_off += mv->y >> 2;
1427     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1428
1429     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1430         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1431         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1432         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1433         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1434         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1435
1436         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1437                                  edge_emu_stride, srcstride,
1438                                  block_w + QPEL_EXTRA,
1439                                  block_h + QPEL_EXTRA,
1440                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1441                                  pic_width, pic_height);
1442         src = lc->edge_emu_buffer + buf_offset;
1443         srcstride = edge_emu_stride;
1444     }
1445
1446     if (!weight_flag)
1447         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1448                                                       block_h, mx, my, block_w);
1449     else
1450         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1451                                                         block_h, s->sh.luma_log2_weight_denom,
1452                                                         luma_weight, luma_offset, mx, my, block_w);
1453 }
1454
1455 /**
1456  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1457  *
1458  * @param s HEVC decoding context
1459  * @param dst target buffer for block data at block position
1460  * @param dststride stride of the dst buffer
1461  * @param ref0 reference picture0 buffer at origin (0, 0)
1462  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1463  * @param x_off horizontal position of block from origin (0, 0)
1464  * @param y_off vertical position of block from origin (0, 0)
1465  * @param block_w width of block
1466  * @param block_h height of block
1467  * @param ref1 reference picture1 buffer at origin (0, 0)
1468  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1469  * @param current_mv current motion vector structure
1470  */
1471  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1472                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1473                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1474 {
1475     HEVCLocalContext *lc = s->HEVClc;
1476     ptrdiff_t src0stride  = ref0->linesize[0];
1477     ptrdiff_t src1stride  = ref1->linesize[0];
1478     int pic_width        = s->ps.sps->width;
1479     int pic_height       = s->ps.sps->height;
1480     int mx0              = mv0->x & 3;
1481     int my0              = mv0->y & 3;
1482     int mx1              = mv1->x & 3;
1483     int my1              = mv1->y & 3;
1484     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1485                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1486     int x_off0           = x_off + (mv0->x >> 2);
1487     int y_off0           = y_off + (mv0->y >> 2);
1488     int x_off1           = x_off + (mv1->x >> 2);
1489     int y_off1           = y_off + (mv1->y >> 2);
1490     int idx              = ff_hevc_pel_weight[block_w];
1491
1492     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1493     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1494
1495     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1496         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1497         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1498         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1499         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1500         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1501
1502         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1503                                  edge_emu_stride, src0stride,
1504                                  block_w + QPEL_EXTRA,
1505                                  block_h + QPEL_EXTRA,
1506                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1507                                  pic_width, pic_height);
1508         src0 = lc->edge_emu_buffer + buf_offset;
1509         src0stride = edge_emu_stride;
1510     }
1511
1512     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1513         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1514         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1515         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1516         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1517         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1518
1519         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1520                                  edge_emu_stride, src1stride,
1521                                  block_w + QPEL_EXTRA,
1522                                  block_h + QPEL_EXTRA,
1523                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1524                                  pic_width, pic_height);
1525         src1 = lc->edge_emu_buffer2 + buf_offset;
1526         src1stride = edge_emu_stride;
1527     }
1528
1529     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1530                                                 block_h, mx0, my0, block_w);
1531     if (!weight_flag)
1532         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1533                                                        block_h, mx1, my1, block_w);
1534     else
1535         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1536                                                          block_h, s->sh.luma_log2_weight_denom,
1537                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1538                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1539                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1540                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1541                                                          mx1, my1, block_w);
1542
1543 }
1544
1545 /**
1546  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1547  *
1548  * @param s HEVC decoding context
1549  * @param dst1 target buffer for block data at block position (U plane)
1550  * @param dst2 target buffer for block data at block position (V plane)
1551  * @param dststride stride of the dst1 and dst2 buffers
1552  * @param ref reference picture buffer at origin (0, 0)
1553  * @param mv motion vector (relative to block position) to get pixel data from
1554  * @param x_off horizontal position of block from origin (0, 0)
1555  * @param y_off vertical position of block from origin (0, 0)
1556  * @param block_w width of block
1557  * @param block_h height of block
1558  * @param chroma_weight weighting factor applied to the chroma prediction
1559  * @param chroma_offset additive offset applied to the chroma prediction value
1560  */
1561
1562 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1563                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1564                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1565 {
1566     HEVCLocalContext *lc = s->HEVClc;
1567     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1568     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1569     const Mv *mv         = &current_mv->mv[reflist];
1570     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1571                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1572     int idx              = ff_hevc_pel_weight[block_w];
1573     int hshift           = s->ps.sps->hshift[1];
1574     int vshift           = s->ps.sps->vshift[1];
1575     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1576     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1577     intptr_t _mx         = mx << (1 - hshift);
1578     intptr_t _my         = my << (1 - vshift);
1579
1580     x_off += mv->x >> (2 + hshift);
1581     y_off += mv->y >> (2 + vshift);
1582     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1583
1584     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1585         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1586         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1587         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1588         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1589         int buf_offset0 = EPEL_EXTRA_BEFORE *
1590                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1591         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1592                                  edge_emu_stride, srcstride,
1593                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1594                                  x_off - EPEL_EXTRA_BEFORE,
1595                                  y_off - EPEL_EXTRA_BEFORE,
1596                                  pic_width, pic_height);
1597
1598         src0 = lc->edge_emu_buffer + buf_offset0;
1599         srcstride = edge_emu_stride;
1600     }
1601     if (!weight_flag)
1602         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1603                                                   block_h, _mx, _my, block_w);
1604     else
1605         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1606                                                         block_h, s->sh.chroma_log2_weight_denom,
1607                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1608 }
1609
1610 /**
1611  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1612  *
1613  * @param s HEVC decoding context
1614  * @param dst target buffer for block data at block position
1615  * @param dststride stride of the dst buffer
1616  * @param ref0 reference picture0 buffer at origin (0, 0)
1617  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1618  * @param x_off horizontal position of block from origin (0, 0)
1619  * @param y_off vertical position of block from origin (0, 0)
1620  * @param block_w width of block
1621  * @param block_h height of block
1622  * @param ref1 reference picture1 buffer at origin (0, 0)
1623  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1624  * @param current_mv current motion vector structure
1625  * @param cidx chroma component(cb, cr)
1626  */
1627 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1628                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1629 {
1630     HEVCLocalContext *lc = s->HEVClc;
1631     uint8_t *src1        = ref0->data[cidx+1];
1632     uint8_t *src2        = ref1->data[cidx+1];
1633     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1634     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1635     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1636                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1637     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1638     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1639     Mv *mv0              = &current_mv->mv[0];
1640     Mv *mv1              = &current_mv->mv[1];
1641     int hshift = s->ps.sps->hshift[1];
1642     int vshift = s->ps.sps->vshift[1];
1643
1644     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1645     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1646     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1647     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1648     intptr_t _mx0 = mx0 << (1 - hshift);
1649     intptr_t _my0 = my0 << (1 - vshift);
1650     intptr_t _mx1 = mx1 << (1 - hshift);
1651     intptr_t _my1 = my1 << (1 - vshift);
1652
1653     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1654     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1655     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1656     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1657     int idx = ff_hevc_pel_weight[block_w];
1658     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1659     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1660
1661     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1662         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1663         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1664         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1665         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1666         int buf_offset1 = EPEL_EXTRA_BEFORE *
1667                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1668
1669         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1670                                  edge_emu_stride, src1stride,
1671                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1672                                  x_off0 - EPEL_EXTRA_BEFORE,
1673                                  y_off0 - EPEL_EXTRA_BEFORE,
1674                                  pic_width, pic_height);
1675
1676         src1 = lc->edge_emu_buffer + buf_offset1;
1677         src1stride = edge_emu_stride;
1678     }
1679
1680     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1681         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1682         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1683         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1684         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1685         int buf_offset1 = EPEL_EXTRA_BEFORE *
1686                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1687
1688         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1689                                  edge_emu_stride, src2stride,
1690                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1691                                  x_off1 - EPEL_EXTRA_BEFORE,
1692                                  y_off1 - EPEL_EXTRA_BEFORE,
1693                                  pic_width, pic_height);
1694
1695         src2 = lc->edge_emu_buffer2 + buf_offset1;
1696         src2stride = edge_emu_stride;
1697     }
1698
1699     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1700                                                 block_h, _mx0, _my0, block_w);
1701     if (!weight_flag)
1702         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1703                                                        src2, src2stride, lc->tmp,
1704                                                        block_h, _mx1, _my1, block_w);
1705     else
1706         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1707                                                          src2, src2stride, lc->tmp,
1708                                                          block_h,
1709                                                          s->sh.chroma_log2_weight_denom,
1710                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1711                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1712                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1713                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1714                                                          _mx1, _my1, block_w);
1715 }
1716
1717 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1718                                 const Mv *mv, int y0, int height)
1719 {
1720     if (s->threads_type == FF_THREAD_FRAME ) {
1721         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1722
1723         ff_thread_await_progress(&ref->tf, y, 0);
1724     }
1725 }
1726
1727 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1728                                   int nPbH, int log2_cb_size, int part_idx,
1729                                   int merge_idx, MvField *mv)
1730 {
1731     HEVCLocalContext *lc = s->HEVClc;
1732     enum InterPredIdc inter_pred_idc = PRED_L0;
1733     int mvp_flag;
1734
1735     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1736     mv->pred_flag = 0;
1737     if (s->sh.slice_type == HEVC_SLICE_B)
1738         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1739
1740     if (inter_pred_idc != PRED_L1) {
1741         if (s->sh.nb_refs[L0])
1742             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1743
1744         mv->pred_flag = PF_L0;
1745         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1746         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1747         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1748                                  part_idx, merge_idx, mv, mvp_flag, 0);
1749         mv->mv[0].x += lc->pu.mvd.x;
1750         mv->mv[0].y += lc->pu.mvd.y;
1751     }
1752
1753     if (inter_pred_idc != PRED_L0) {
1754         if (s->sh.nb_refs[L1])
1755             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1756
1757         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1758             AV_ZERO32(&lc->pu.mvd);
1759         } else {
1760             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1761         }
1762
1763         mv->pred_flag += PF_L1;
1764         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1765         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1766                                  part_idx, merge_idx, mv, mvp_flag, 1);
1767         mv->mv[1].x += lc->pu.mvd.x;
1768         mv->mv[1].y += lc->pu.mvd.y;
1769     }
1770 }
1771
1772 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1773                                 int nPbW, int nPbH,
1774                                 int log2_cb_size, int partIdx, int idx)
1775 {
1776 #define POS(c_idx, x, y)                                                              \
1777     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1778                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1779     HEVCLocalContext *lc = s->HEVClc;
1780     int merge_idx = 0;
1781     struct MvField current_mv = {{{ 0 }}};
1782
1783     int min_pu_width = s->ps.sps->min_pu_width;
1784
1785     MvField *tab_mvf = s->ref->tab_mvf;
1786     RefPicList  *refPicList = s->ref->refPicList;
1787     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1788     uint8_t *dst0 = POS(0, x0, y0);
1789     uint8_t *dst1 = POS(1, x0, y0);
1790     uint8_t *dst2 = POS(2, x0, y0);
1791     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1792     int min_cb_width     = s->ps.sps->min_cb_width;
1793     int x_cb             = x0 >> log2_min_cb_size;
1794     int y_cb             = y0 >> log2_min_cb_size;
1795     int x_pu, y_pu;
1796     int i, j;
1797
1798     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1799
1800     if (!skip_flag)
1801         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1802
1803     if (skip_flag || lc->pu.merge_flag) {
1804         if (s->sh.max_num_merge_cand > 1)
1805             merge_idx = ff_hevc_merge_idx_decode(s);
1806         else
1807             merge_idx = 0;
1808
1809         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1810                                    partIdx, merge_idx, &current_mv);
1811     } else {
1812         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1813                               partIdx, merge_idx, &current_mv);
1814     }
1815
1816     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1817     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1818
1819     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1820         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1821             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1822
1823     if (current_mv.pred_flag & PF_L0) {
1824         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1825         if (!ref0)
1826             return;
1827         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1828     }
1829     if (current_mv.pred_flag & PF_L1) {
1830         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1831         if (!ref1)
1832             return;
1833         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1834     }
1835
1836     if (current_mv.pred_flag == PF_L0) {
1837         int x0_c = x0 >> s->ps.sps->hshift[1];
1838         int y0_c = y0 >> s->ps.sps->vshift[1];
1839         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1840         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1841
1842         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1843                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1844                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1845                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1846
1847         if (s->ps.sps->chroma_format_idc) {
1848             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1849                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1850                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1851             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1852                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1853                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1854         }
1855     } else if (current_mv.pred_flag == PF_L1) {
1856         int x0_c = x0 >> s->ps.sps->hshift[1];
1857         int y0_c = y0 >> s->ps.sps->vshift[1];
1858         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1859         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1860
1861         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1862                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1863                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1864                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1865
1866         if (s->ps.sps->chroma_format_idc) {
1867             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1868                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1869                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1870
1871             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1872                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1873                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1874         }
1875     } else if (current_mv.pred_flag == PF_BI) {
1876         int x0_c = x0 >> s->ps.sps->hshift[1];
1877         int y0_c = y0 >> s->ps.sps->vshift[1];
1878         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1879         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1880
1881         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1882                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1883                    ref1->frame, &current_mv.mv[1], &current_mv);
1884
1885         if (s->ps.sps->chroma_format_idc) {
1886             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1887                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1888
1889             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1890                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1891         }
1892     }
1893 }
1894
1895 /**
1896  * 8.4.1
1897  */
1898 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1899                                 int prev_intra_luma_pred_flag)
1900 {
1901     HEVCLocalContext *lc = s->HEVClc;
1902     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1903     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1904     int min_pu_width     = s->ps.sps->min_pu_width;
1905     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1906     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1907     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1908
1909     int cand_up   = (lc->ctb_up_flag || y0b) ?
1910                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1911     int cand_left = (lc->ctb_left_flag || x0b) ?
1912                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1913
1914     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1915
1916     MvField *tab_mvf = s->ref->tab_mvf;
1917     int intra_pred_mode;
1918     int candidate[3];
1919     int i, j;
1920
1921     // intra_pred_mode prediction does not cross vertical CTB boundaries
1922     if ((y0 - 1) < y_ctb)
1923         cand_up = INTRA_DC;
1924
1925     if (cand_left == cand_up) {
1926         if (cand_left < 2) {
1927             candidate[0] = INTRA_PLANAR;
1928             candidate[1] = INTRA_DC;
1929             candidate[2] = INTRA_ANGULAR_26;
1930         } else {
1931             candidate[0] = cand_left;
1932             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1933             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1934         }
1935     } else {
1936         candidate[0] = cand_left;
1937         candidate[1] = cand_up;
1938         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1939             candidate[2] = INTRA_PLANAR;
1940         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1941             candidate[2] = INTRA_DC;
1942         } else {
1943             candidate[2] = INTRA_ANGULAR_26;
1944         }
1945     }
1946
1947     if (prev_intra_luma_pred_flag) {
1948         intra_pred_mode = candidate[lc->pu.mpm_idx];
1949     } else {
1950         if (candidate[0] > candidate[1])
1951             FFSWAP(uint8_t, candidate[0], candidate[1]);
1952         if (candidate[0] > candidate[2])
1953             FFSWAP(uint8_t, candidate[0], candidate[2]);
1954         if (candidate[1] > candidate[2])
1955             FFSWAP(uint8_t, candidate[1], candidate[2]);
1956
1957         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1958         for (i = 0; i < 3; i++)
1959             if (intra_pred_mode >= candidate[i])
1960                 intra_pred_mode++;
1961     }
1962
1963     /* write the intra prediction units into the mv array */
1964     if (!size_in_pus)
1965         size_in_pus = 1;
1966     for (i = 0; i < size_in_pus; i++) {
1967         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1968                intra_pred_mode, size_in_pus);
1969
1970         for (j = 0; j < size_in_pus; j++) {
1971             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1972         }
1973     }
1974
1975     return intra_pred_mode;
1976 }
1977
1978 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1979                                           int log2_cb_size, int ct_depth)
1980 {
1981     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1982     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1983     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1984     int y;
1985
1986     for (y = 0; y < length; y++)
1987         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1988                ct_depth, length);
1989 }
1990
1991 static const uint8_t tab_mode_idx[] = {
1992      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1993     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1994
1995 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1996                                   int log2_cb_size)
1997 {
1998     HEVCLocalContext *lc = s->HEVClc;
1999     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2000     uint8_t prev_intra_luma_pred_flag[4];
2001     int split   = lc->cu.part_mode == PART_NxN;
2002     int pb_size = (1 << log2_cb_size) >> split;
2003     int side    = split + 1;
2004     int chroma_mode;
2005     int i, j;
2006
2007     for (i = 0; i < side; i++)
2008         for (j = 0; j < side; j++)
2009             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2010
2011     for (i = 0; i < side; i++) {
2012         for (j = 0; j < side; j++) {
2013             if (prev_intra_luma_pred_flag[2 * i + j])
2014                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2015             else
2016                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2017
2018             lc->pu.intra_pred_mode[2 * i + j] =
2019                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2020                                      prev_intra_luma_pred_flag[2 * i + j]);
2021         }
2022     }
2023
2024     if (s->ps.sps->chroma_format_idc == 3) {
2025         for (i = 0; i < side; i++) {
2026             for (j = 0; j < side; j++) {
2027                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2028                 if (chroma_mode != 4) {
2029                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2030                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2031                     else
2032                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2033                 } else {
2034                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2035                 }
2036             }
2037         }
2038     } else if (s->ps.sps->chroma_format_idc == 2) {
2039         int mode_idx;
2040         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2041         if (chroma_mode != 4) {
2042             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2043                 mode_idx = 34;
2044             else
2045                 mode_idx = intra_chroma_table[chroma_mode];
2046         } else {
2047             mode_idx = lc->pu.intra_pred_mode[0];
2048         }
2049         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2050     } else if (s->ps.sps->chroma_format_idc != 0) {
2051         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2052         if (chroma_mode != 4) {
2053             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2054                 lc->pu.intra_pred_mode_c[0] = 34;
2055             else
2056                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2057         } else {
2058             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2059         }
2060     }
2061 }
2062
2063 static void intra_prediction_unit_default_value(HEVCContext *s,
2064                                                 int x0, int y0,
2065                                                 int log2_cb_size)
2066 {
2067     HEVCLocalContext *lc = s->HEVClc;
2068     int pb_size          = 1 << log2_cb_size;
2069     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2070     int min_pu_width     = s->ps.sps->min_pu_width;
2071     MvField *tab_mvf     = s->ref->tab_mvf;
2072     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2073     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2074     int j, k;
2075
2076     if (size_in_pus == 0)
2077         size_in_pus = 1;
2078     for (j = 0; j < size_in_pus; j++)
2079         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2080     if (lc->cu.pred_mode == MODE_INTRA)
2081         for (j = 0; j < size_in_pus; j++)
2082             for (k = 0; k < size_in_pus; k++)
2083                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2084 }
2085
2086 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2087 {
2088     int cb_size          = 1 << log2_cb_size;
2089     HEVCLocalContext *lc = s->HEVClc;
2090     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2091     int length           = cb_size >> log2_min_cb_size;
2092     int min_cb_width     = s->ps.sps->min_cb_width;
2093     int x_cb             = x0 >> log2_min_cb_size;
2094     int y_cb             = y0 >> log2_min_cb_size;
2095     int idx              = log2_cb_size - 2;
2096     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2097     int x, y, ret;
2098
2099     lc->cu.x                = x0;
2100     lc->cu.y                = y0;
2101     lc->cu.pred_mode        = MODE_INTRA;
2102     lc->cu.part_mode        = PART_2Nx2N;
2103     lc->cu.intra_split_flag = 0;
2104
2105     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2106     for (x = 0; x < 4; x++)
2107         lc->pu.intra_pred_mode[x] = 1;
2108     if (s->ps.pps->transquant_bypass_enable_flag) {
2109         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2110         if (lc->cu.cu_transquant_bypass_flag)
2111             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2112     } else
2113         lc->cu.cu_transquant_bypass_flag = 0;
2114
2115     if (s->sh.slice_type != HEVC_SLICE_I) {
2116         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2117
2118         x = y_cb * min_cb_width + x_cb;
2119         for (y = 0; y < length; y++) {
2120             memset(&s->skip_flag[x], skip_flag, length);
2121             x += min_cb_width;
2122         }
2123         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2124     } else {
2125         x = y_cb * min_cb_width + x_cb;
2126         for (y = 0; y < length; y++) {
2127             memset(&s->skip_flag[x], 0, length);
2128             x += min_cb_width;
2129         }
2130     }
2131
2132     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2133         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2134         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2135
2136         if (!s->sh.disable_deblocking_filter_flag)
2137             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2138     } else {
2139         int pcm_flag = 0;
2140
2141         if (s->sh.slice_type != HEVC_SLICE_I)
2142             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2143         if (lc->cu.pred_mode != MODE_INTRA ||
2144             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2145             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2146             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2147                                       lc->cu.pred_mode == MODE_INTRA;
2148         }
2149
2150         if (lc->cu.pred_mode == MODE_INTRA) {
2151             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2152                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2153                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2154                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2155             }
2156             if (pcm_flag) {
2157                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2158                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2159                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2160                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2161
2162                 if (ret < 0)
2163                     return ret;
2164             } else {
2165                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2166             }
2167         } else {
2168             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2169             switch (lc->cu.part_mode) {
2170             case PART_2Nx2N:
2171                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2172                 break;
2173             case PART_2NxN:
2174                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2175                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2176                 break;
2177             case PART_Nx2N:
2178                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2179                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2180                 break;
2181             case PART_2NxnU:
2182                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2183                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2184                 break;
2185             case PART_2NxnD:
2186                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2187                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2188                 break;
2189             case PART_nLx2N:
2190                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2191                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2192                 break;
2193             case PART_nRx2N:
2194                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2195                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2196                 break;
2197             case PART_NxN:
2198                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2199                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2200                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2201                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2202                 break;
2203             }
2204         }
2205
2206         if (!pcm_flag) {
2207             int rqt_root_cbf = 1;
2208
2209             if (lc->cu.pred_mode != MODE_INTRA &&
2210                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2211                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2212             }
2213             if (rqt_root_cbf) {
2214                 const static int cbf[2] = { 0 };
2215                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2216                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2217                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2218                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2219                                          log2_cb_size,
2220                                          log2_cb_size, 0, 0, cbf, cbf);
2221                 if (ret < 0)
2222                     return ret;
2223             } else {
2224                 if (!s->sh.disable_deblocking_filter_flag)
2225                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2226             }
2227         }
2228     }
2229
2230     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2231         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2232
2233     x = y_cb * min_cb_width + x_cb;
2234     for (y = 0; y < length; y++) {
2235         memset(&s->qp_y_tab[x], lc->qp_y, length);
2236         x += min_cb_width;
2237     }
2238
2239     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2240        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2241         lc->qPy_pred = lc->qp_y;
2242     }
2243
2244     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2245
2246     return 0;
2247 }
2248
2249 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2250                                int log2_cb_size, int cb_depth)
2251 {
2252     HEVCLocalContext *lc = s->HEVClc;
2253     const int cb_size    = 1 << log2_cb_size;
2254     int ret;
2255     int split_cu;
2256
2257     lc->ct_depth = cb_depth;
2258     if (x0 + cb_size <= s->ps.sps->width  &&
2259         y0 + cb_size <= s->ps.sps->height &&
2260         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2261         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2262     } else {
2263         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2264     }
2265     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2266         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2267         lc->tu.is_cu_qp_delta_coded = 0;
2268         lc->tu.cu_qp_delta          = 0;
2269     }
2270
2271     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2272         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2273         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2274     }
2275
2276     if (split_cu) {
2277         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2278         const int cb_size_split = cb_size >> 1;
2279         const int x1 = x0 + cb_size_split;
2280         const int y1 = y0 + cb_size_split;
2281
2282         int more_data = 0;
2283
2284         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2285         if (more_data < 0)
2286             return more_data;
2287
2288         if (more_data && x1 < s->ps.sps->width) {
2289             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2290             if (more_data < 0)
2291                 return more_data;
2292         }
2293         if (more_data && y1 < s->ps.sps->height) {
2294             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2295             if (more_data < 0)
2296                 return more_data;
2297         }
2298         if (more_data && x1 < s->ps.sps->width &&
2299             y1 < s->ps.sps->height) {
2300             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2301             if (more_data < 0)
2302                 return more_data;
2303         }
2304
2305         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2306             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2307             lc->qPy_pred = lc->qp_y;
2308
2309         if (more_data)
2310             return ((x1 + cb_size_split) < s->ps.sps->width ||
2311                     (y1 + cb_size_split) < s->ps.sps->height);
2312         else
2313             return 0;
2314     } else {
2315         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2316         if (ret < 0)
2317             return ret;
2318         if ((!((x0 + cb_size) %
2319                (1 << (s->ps.sps->log2_ctb_size))) ||
2320              (x0 + cb_size >= s->ps.sps->width)) &&
2321             (!((y0 + cb_size) %
2322                (1 << (s->ps.sps->log2_ctb_size))) ||
2323              (y0 + cb_size >= s->ps.sps->height))) {
2324             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2325             return !end_of_slice_flag;
2326         } else {
2327             return 1;
2328         }
2329     }
2330
2331     return 0;
2332 }
2333
2334 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2335                                  int ctb_addr_ts)
2336 {
2337     HEVCLocalContext *lc  = s->HEVClc;
2338     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2339     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2340     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2341
2342     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2343
2344     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2345         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2346             lc->first_qp_group = 1;
2347         lc->end_of_tiles_x = s->ps.sps->width;
2348     } else if (s->ps.pps->tiles_enabled_flag) {
2349         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2350             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2351             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2352             lc->first_qp_group   = 1;
2353         }
2354     } else {
2355         lc->end_of_tiles_x = s->ps.sps->width;
2356     }
2357
2358     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2359
2360     lc->boundary_flags = 0;
2361     if (s->ps.pps->tiles_enabled_flag) {
2362         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2363             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2364         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2365             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2366         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2367             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2368         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2369             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2370     } else {
2371         if (ctb_addr_in_slice <= 0)
2372             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2373         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2374             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2375     }
2376
2377     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2378     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2379     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2380     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2381 }
2382
2383 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2384 {
2385     HEVCContext *s  = avctxt->priv_data;
2386     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2387     int more_data   = 1;
2388     int x_ctb       = 0;
2389     int y_ctb       = 0;
2390     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2391     int ret;
2392
2393     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2394         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2395         return AVERROR_INVALIDDATA;
2396     }
2397
2398     if (s->sh.dependent_slice_segment_flag) {
2399         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2400         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2401             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2402             return AVERROR_INVALIDDATA;
2403         }
2404     }
2405
2406     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2407         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2408
2409         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2410         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2411         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2412
2413         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2414         if (ret < 0) {
2415             s->tab_slice_address[ctb_addr_rs] = -1;
2416             return ret;
2417         }
2418
2419         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2420
2421         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2422         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2423         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2424
2425         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2426         if (more_data < 0) {
2427             s->tab_slice_address[ctb_addr_rs] = -1;
2428             return more_data;
2429         }
2430
2431
2432         ctb_addr_ts++;
2433         ff_hevc_save_states(s, ctb_addr_ts);
2434         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2435     }
2436
2437     if (x_ctb + ctb_size >= s->ps.sps->width &&
2438         y_ctb + ctb_size >= s->ps.sps->height)
2439         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2440
2441     return ctb_addr_ts;
2442 }
2443
2444 static int hls_slice_data(HEVCContext *s)
2445 {
2446     int arg[2];
2447     int ret[2];
2448
2449     arg[0] = 0;
2450     arg[1] = 1;
2451
2452     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2453     return ret[0];
2454 }
2455 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2456 {
2457     HEVCContext *s1  = avctxt->priv_data, *s;
2458     HEVCLocalContext *lc;
2459     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2460     int more_data   = 1;
2461     int *ctb_row_p    = input_ctb_row;
2462     int ctb_row = ctb_row_p[job];
2463     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2464     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2465     int thread = ctb_row % s1->threads_number;
2466     int ret;
2467
2468     s = s1->sList[self_id];
2469     lc = s->HEVClc;
2470
2471     if(ctb_row) {
2472         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2473         if (ret < 0)
2474             goto error;
2475         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2476     }
2477
2478     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2479         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2480         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2481
2482         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2483
2484         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2485
2486         if (atomic_load(&s1->wpp_err)) {
2487             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2488             return 0;
2489         }
2490
2491         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2492         if (ret < 0)
2493             goto error;
2494         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2495         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2496
2497         if (more_data < 0) {
2498             ret = more_data;
2499             goto error;
2500         }
2501
2502         ctb_addr_ts++;
2503
2504         ff_hevc_save_states(s, ctb_addr_ts);
2505         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2506         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2507
2508         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2509             atomic_store(&s1->wpp_err, 1);
2510             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2511             return 0;
2512         }
2513
2514         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2515             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2516             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2517             return ctb_addr_ts;
2518         }
2519         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2520         x_ctb+=ctb_size;
2521
2522         if(x_ctb >= s->ps.sps->width) {
2523             break;
2524         }
2525     }
2526     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2527
2528     return 0;
2529 error:
2530     s->tab_slice_address[ctb_addr_rs] = -1;
2531     atomic_store(&s1->wpp_err, 1);
2532     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2533     return ret;
2534 }
2535
2536 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2537 {
2538     const uint8_t *data = nal->data;
2539     int length          = nal->size;
2540     HEVCLocalContext *lc = s->HEVClc;
2541     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2542     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2543     int64_t offset;
2544     int64_t startheader, cmpt = 0;
2545     int i, j, res = 0;
2546
2547     if (!ret || !arg) {
2548         av_free(ret);
2549         av_free(arg);
2550         return AVERROR(ENOMEM);
2551     }
2552
2553     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2554         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2555             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2556             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2557         );
2558         res = AVERROR_INVALIDDATA;
2559         goto error;
2560     }
2561
2562     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2563
2564     if (!s->sList[1]) {
2565         for (i = 1; i < s->threads_number; i++) {
2566             s->sList[i] = av_malloc(sizeof(HEVCContext));
2567             memcpy(s->sList[i], s, sizeof(HEVCContext));
2568             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2569             s->sList[i]->HEVClc = s->HEVClcList[i];
2570         }
2571     }
2572
2573     offset = (lc->gb.index >> 3);
2574
2575     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2576         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2577             startheader--;
2578             cmpt++;
2579         }
2580     }
2581
2582     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2583         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2584         for (j = 0, cmpt = 0, startheader = offset
2585              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2586             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2587                 startheader--;
2588                 cmpt++;
2589             }
2590         }
2591         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2592         s->sh.offset[i - 1] = offset;
2593
2594     }
2595     if (s->sh.num_entry_point_offsets != 0) {
2596         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2597         if (length < offset) {
2598             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2599             res = AVERROR_INVALIDDATA;
2600             goto error;
2601         }
2602         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2603         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2604
2605     }
2606     s->data = data;
2607
2608     for (i = 1; i < s->threads_number; i++) {
2609         s->sList[i]->HEVClc->first_qp_group = 1;
2610         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2611         memcpy(s->sList[i], s, sizeof(HEVCContext));
2612         s->sList[i]->HEVClc = s->HEVClcList[i];
2613     }
2614
2615     atomic_store(&s->wpp_err, 0);
2616     ff_reset_entries(s->avctx);
2617
2618     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2619         arg[i] = i;
2620         ret[i] = 0;
2621     }
2622
2623     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2624         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2625
2626     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2627         res += ret[i];
2628 error:
2629     av_free(ret);
2630     av_free(arg);
2631     return res;
2632 }
2633
2634 static int set_side_data(HEVCContext *s)
2635 {
2636     AVFrame *out = s->ref->frame;
2637
2638     if (s->sei.frame_packing.present &&
2639         s->sei.frame_packing.arrangement_type >= 3 &&
2640         s->sei.frame_packing.arrangement_type <= 5 &&
2641         s->sei.frame_packing.content_interpretation_type > 0 &&
2642         s->sei.frame_packing.content_interpretation_type < 3) {
2643         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2644         if (!stereo)
2645             return AVERROR(ENOMEM);
2646
2647         switch (s->sei.frame_packing.arrangement_type) {
2648         case 3:
2649             if (s->sei.frame_packing.quincunx_subsampling)
2650                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2651             else
2652                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2653             break;
2654         case 4:
2655             stereo->type = AV_STEREO3D_TOPBOTTOM;
2656             break;
2657         case 5:
2658             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2659             break;
2660         }
2661
2662         if (s->sei.frame_packing.content_interpretation_type == 2)
2663             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2664
2665         if (s->sei.frame_packing.arrangement_type == 5) {
2666             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2667                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2668             else
2669                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2670         }
2671     }
2672
2673     if (s->sei.display_orientation.present &&
2674         (s->sei.display_orientation.anticlockwise_rotation ||
2675          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2676         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2677         AVFrameSideData *rotation = av_frame_new_side_data(out,
2678                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2679                                                            sizeof(int32_t) * 9);
2680         if (!rotation)
2681             return AVERROR(ENOMEM);
2682
2683         av_display_rotation_set((int32_t *)rotation->data, angle);
2684         av_display_matrix_flip((int32_t *)rotation->data,
2685                                s->sei.display_orientation.hflip,
2686                                s->sei.display_orientation.vflip);
2687     }
2688
2689     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2690     // so the side data persists for the entire coded video sequence.
2691     if (s->sei.mastering_display.present > 0 &&
2692         IS_IRAP(s) && s->no_rasl_output_flag) {
2693         s->sei.mastering_display.present--;
2694     }
2695     if (s->sei.mastering_display.present) {
2696         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2697         const int mapping[3] = {2, 0, 1};
2698         const int chroma_den = 50000;
2699         const int luma_den = 10000;
2700         int i;
2701         AVMasteringDisplayMetadata *metadata =
2702             av_mastering_display_metadata_create_side_data(out);
2703         if (!metadata)
2704             return AVERROR(ENOMEM);
2705
2706         for (i = 0; i < 3; i++) {
2707             const int j = mapping[i];
2708             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2709             metadata->display_primaries[i][0].den = chroma_den;
2710             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2711             metadata->display_primaries[i][1].den = chroma_den;
2712         }
2713         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2714         metadata->white_point[0].den = chroma_den;
2715         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2716         metadata->white_point[1].den = chroma_den;
2717
2718         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2719         metadata->max_luminance.den = luma_den;
2720         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2721         metadata->min_luminance.den = luma_den;
2722         metadata->has_luminance = 1;
2723         metadata->has_primaries = 1;
2724
2725         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2726         av_log(s->avctx, AV_LOG_DEBUG,
2727                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2728                av_q2d(metadata->display_primaries[0][0]),
2729                av_q2d(metadata->display_primaries[0][1]),
2730                av_q2d(metadata->display_primaries[1][0]),
2731                av_q2d(metadata->display_primaries[1][1]),
2732                av_q2d(metadata->display_primaries[2][0]),
2733                av_q2d(metadata->display_primaries[2][1]),
2734                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2735         av_log(s->avctx, AV_LOG_DEBUG,
2736                "min_luminance=%f, max_luminance=%f\n",
2737                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2738     }
2739     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2740     // so the side data persists for the entire coded video sequence.
2741     if (s->sei.content_light.present > 0 &&
2742         IS_IRAP(s) && s->no_rasl_output_flag) {
2743         s->sei.content_light.present--;
2744     }
2745     if (s->sei.content_light.present) {
2746         AVContentLightMetadata *metadata =
2747             av_content_light_metadata_create_side_data(out);
2748         if (!metadata)
2749             return AVERROR(ENOMEM);
2750         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2751         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2752
2753         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2754         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2755                metadata->MaxCLL, metadata->MaxFALL);
2756     }
2757
2758     if (s->sei.a53_caption.a53_caption) {
2759         AVFrameSideData* sd = av_frame_new_side_data(out,
2760                                                      AV_FRAME_DATA_A53_CC,
2761                                                      s->sei.a53_caption.a53_caption_size);
2762         if (sd)
2763             memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
2764         av_freep(&s->sei.a53_caption.a53_caption);
2765         s->sei.a53_caption.a53_caption_size = 0;
2766         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2767     }
2768
2769     if (s->sei.alternative_transfer.present &&
2770         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
2771         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2772         s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2773     }
2774
2775     return 0;
2776 }
2777
2778 static int hevc_frame_start(HEVCContext *s)
2779 {
2780     HEVCLocalContext *lc = s->HEVClc;
2781     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2782                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2783     int ret;
2784
2785     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2786     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2787     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2788     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2789     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2790
2791     s->is_decoded        = 0;
2792     s->first_nal_type    = s->nal_unit_type;
2793
2794     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2795
2796     if (s->ps.pps->tiles_enabled_flag)
2797         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2798
2799     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2800     if (ret < 0)
2801         goto fail;
2802
2803     ret = ff_hevc_frame_rps(s);
2804     if (ret < 0) {
2805         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2806         goto fail;
2807     }
2808
2809     s->ref->frame->key_frame = IS_IRAP(s);
2810
2811     ret = set_side_data(s);
2812     if (ret < 0)
2813         goto fail;
2814
2815     s->frame->pict_type = 3 - s->sh.slice_type;
2816
2817     if (!IS_IRAP(s))
2818         ff_hevc_bump_frame(s);
2819
2820     av_frame_unref(s->output_frame);
2821     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2822     if (ret < 0)
2823         goto fail;
2824
2825     if (!s->avctx->hwaccel)
2826         ff_thread_finish_setup(s->avctx);
2827
2828     return 0;
2829
2830 fail:
2831     if (s->ref)
2832         ff_hevc_unref_frame(s, s->ref, ~0);
2833     s->ref = NULL;
2834     return ret;
2835 }
2836
2837 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2838 {
2839     HEVCLocalContext *lc = s->HEVClc;
2840     GetBitContext *gb    = &lc->gb;
2841     int ctb_addr_ts, ret;
2842
2843     *gb              = nal->gb;
2844     s->nal_unit_type = nal->type;
2845     s->temporal_id   = nal->temporal_id;
2846
2847     switch (s->nal_unit_type) {
2848     case HEVC_NAL_VPS:
2849         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2850             ret = s->avctx->hwaccel->decode_params(s->avctx,
2851                                                    nal->type,
2852                                                    nal->raw_data,
2853                                                    nal->raw_size);
2854             if (ret < 0)
2855                 goto fail;
2856         }
2857         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2858         if (ret < 0)
2859             goto fail;
2860         break;
2861     case HEVC_NAL_SPS:
2862         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2863             ret = s->avctx->hwaccel->decode_params(s->avctx,
2864                                                    nal->type,
2865                                                    nal->raw_data,
2866                                                    nal->raw_size);
2867             if (ret < 0)
2868                 goto fail;
2869         }
2870         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2871                                      s->apply_defdispwin);
2872         if (ret < 0)
2873             goto fail;
2874         break;
2875     case HEVC_NAL_PPS:
2876         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2877             ret = s->avctx->hwaccel->decode_params(s->avctx,
2878                                                    nal->type,
2879                                                    nal->raw_data,
2880                                                    nal->raw_size);
2881             if (ret < 0)
2882                 goto fail;
2883         }
2884         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2885         if (ret < 0)
2886             goto fail;
2887         break;
2888     case HEVC_NAL_SEI_PREFIX:
2889     case HEVC_NAL_SEI_SUFFIX:
2890         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2891             ret = s->avctx->hwaccel->decode_params(s->avctx,
2892                                                    nal->type,
2893                                                    nal->raw_data,
2894                                                    nal->raw_size);
2895             if (ret < 0)
2896                 goto fail;
2897         }
2898         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2899         if (ret < 0)
2900             goto fail;
2901         break;
2902     case HEVC_NAL_TRAIL_R:
2903     case HEVC_NAL_TRAIL_N:
2904     case HEVC_NAL_TSA_N:
2905     case HEVC_NAL_TSA_R:
2906     case HEVC_NAL_STSA_N:
2907     case HEVC_NAL_STSA_R:
2908     case HEVC_NAL_BLA_W_LP:
2909     case HEVC_NAL_BLA_W_RADL:
2910     case HEVC_NAL_BLA_N_LP:
2911     case HEVC_NAL_IDR_W_RADL:
2912     case HEVC_NAL_IDR_N_LP:
2913     case HEVC_NAL_CRA_NUT:
2914     case HEVC_NAL_RADL_N:
2915     case HEVC_NAL_RADL_R:
2916     case HEVC_NAL_RASL_N:
2917     case HEVC_NAL_RASL_R:
2918         ret = hls_slice_header(s);
2919         if (ret < 0)
2920             return ret;
2921
2922         if (
2923             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
2924             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2925             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2926             break;
2927         }
2928
2929         if (s->sh.first_slice_in_pic_flag) {
2930             if (s->max_ra == INT_MAX) {
2931                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2932                     s->max_ra = s->poc;
2933                 } else {
2934                     if (IS_IDR(s))
2935                         s->max_ra = INT_MIN;
2936                 }
2937             }
2938
2939             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2940                 s->poc <= s->max_ra) {
2941                 s->is_decoded = 0;
2942                 break;
2943             } else {
2944                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2945                     s->max_ra = INT_MIN;
2946             }
2947
2948             s->overlap ++;
2949             ret = hevc_frame_start(s);
2950             if (ret < 0)
2951                 return ret;
2952         } else if (!s->ref) {
2953             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2954             goto fail;
2955         }
2956
2957         if (s->nal_unit_type != s->first_nal_type) {
2958             av_log(s->avctx, AV_LOG_ERROR,
2959                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2960                    s->first_nal_type, s->nal_unit_type);
2961             return AVERROR_INVALIDDATA;
2962         }
2963
2964         if (!s->sh.dependent_slice_segment_flag &&
2965             s->sh.slice_type != HEVC_SLICE_I) {
2966             ret = ff_hevc_slice_rpl(s);
2967             if (ret < 0) {
2968                 av_log(s->avctx, AV_LOG_WARNING,
2969                        "Error constructing the reference lists for the current slice.\n");
2970                 goto fail;
2971             }
2972         }
2973
2974         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2975             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2976             if (ret < 0)
2977                 goto fail;
2978         }
2979
2980         if (s->avctx->hwaccel) {
2981             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2982             if (ret < 0)
2983                 goto fail;
2984         } else {
2985             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2986                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2987             else
2988                 ctb_addr_ts = hls_slice_data(s);
2989             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2990                 s->is_decoded = 1;
2991             }
2992
2993             if (ctb_addr_ts < 0) {
2994                 ret = ctb_addr_ts;
2995                 goto fail;
2996             }
2997         }
2998         break;
2999     case HEVC_NAL_EOS_NUT:
3000     case HEVC_NAL_EOB_NUT:
3001         s->seq_decode = (s->seq_decode + 1) & 0xff;
3002         s->max_ra     = INT_MAX;
3003         break;
3004     case HEVC_NAL_AUD:
3005     case HEVC_NAL_FD_NUT:
3006         break;
3007     default:
3008         av_log(s->avctx, AV_LOG_INFO,
3009                "Skipping NAL unit %d\n", s->nal_unit_type);
3010     }
3011
3012     return 0;
3013 fail:
3014     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3015         return ret;
3016     return 0;
3017 }
3018
3019 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3020 {
3021     int i, ret = 0;
3022     int eos_at_start = 1;
3023
3024     s->ref = NULL;
3025     s->last_eos = s->eos;
3026     s->eos = 0;
3027     s->overlap = 0;
3028
3029     /* split the input packet into NAL units, so we know the upper bound on the
3030      * number of slices in the frame */
3031     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3032                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3033     if (ret < 0) {
3034         av_log(s->avctx, AV_LOG_ERROR,
3035                "Error splitting the input into NAL units.\n");
3036         return ret;
3037     }
3038
3039     for (i = 0; i < s->pkt.nb_nals; i++) {
3040         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3041             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3042             if (eos_at_start) {
3043                 s->last_eos = 1;
3044             } else {
3045                 s->eos = 1;
3046             }
3047         } else {
3048             eos_at_start = 0;
3049         }
3050     }
3051
3052     /* decode the NAL units */
3053     for (i = 0; i < s->pkt.nb_nals; i++) {
3054         H2645NAL *nal = &s->pkt.nals[i];
3055
3056         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3057             (s->avctx->skip_frame >= AVDISCARD_NONREF
3058             && ff_hevc_nal_is_nonref(nal->type)))
3059             continue;
3060
3061         ret = decode_nal_unit(s, nal);
3062         if (ret >= 0 && s->overlap > 2)
3063             ret = AVERROR_INVALIDDATA;
3064         if (ret < 0) {
3065             av_log(s->avctx, AV_LOG_WARNING,
3066                    "Error parsing NAL unit #%d.\n", i);
3067             goto fail;
3068         }
3069     }
3070
3071 fail:
3072     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3073         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3074
3075     return ret;
3076 }
3077
3078 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3079 {
3080     int i;
3081     for (i = 0; i < 16; i++)
3082         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3083 }
3084
3085 static int verify_md5(HEVCContext *s, AVFrame *frame)
3086 {
3087     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3088     int pixel_shift;
3089     int i, j;
3090
3091     if (!desc)
3092         return AVERROR(EINVAL);
3093
3094     pixel_shift = desc->comp[0].depth > 8;
3095
3096     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3097            s->poc);
3098
3099     /* the checksums are LE, so we have to byteswap for >8bpp formats
3100      * on BE arches */
3101 #if HAVE_BIGENDIAN
3102     if (pixel_shift && !s->checksum_buf) {
3103         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3104                        FFMAX3(frame->linesize[0], frame->linesize[1],
3105                               frame->linesize[2]));
3106         if (!s->checksum_buf)
3107             return AVERROR(ENOMEM);
3108     }
3109 #endif
3110
3111     for (i = 0; frame->data[i]; i++) {
3112         int width  = s->avctx->coded_width;
3113         int height = s->avctx->coded_height;
3114         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3115         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3116         uint8_t md5[16];
3117
3118         av_md5_init(s->md5_ctx);
3119         for (j = 0; j < h; j++) {
3120             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3121 #if HAVE_BIGENDIAN
3122             if (pixel_shift) {
3123                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3124                                     (const uint16_t *) src, w);
3125                 src = s->checksum_buf;
3126             }
3127 #endif
3128             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3129         }
3130         av_md5_final(s->md5_ctx, md5);
3131
3132         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3133             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3134             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3135             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3136         } else {
3137             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3138             print_md5(s->avctx, AV_LOG_ERROR, md5);
3139             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3140             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3141             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3142             return AVERROR_INVALIDDATA;
3143         }
3144     }
3145
3146     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3147
3148     return 0;
3149 }
3150
3151 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3152 {
3153     int ret, i;
3154
3155     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3156                                    &s->nal_length_size, s->avctx->err_recognition,
3157                                    s->apply_defdispwin, s->avctx);
3158     if (ret < 0)
3159         return ret;
3160
3161     /* export stream parameters from the first SPS */
3162     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3163         if (first && s->ps.sps_list[i]) {
3164             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3165             export_stream_params(s->avctx, &s->ps, sps);
3166             break;
3167         }
3168     }
3169
3170     return 0;
3171 }
3172
3173 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3174                              AVPacket *avpkt)
3175 {
3176     int ret;
3177     int new_extradata_size;
3178     uint8_t *new_extradata;
3179     HEVCContext *s = avctx->priv_data;
3180
3181     if (!avpkt->size) {
3182         ret = ff_hevc_output_frame(s, data, 1);
3183         if (ret < 0)
3184             return ret;
3185
3186         *got_output = ret;
3187         return 0;
3188     }
3189
3190     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3191                                             &new_extradata_size);
3192     if (new_extradata && new_extradata_size > 0) {
3193         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3194         if (ret < 0)
3195             return ret;
3196     }
3197
3198     s->ref = NULL;
3199     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3200     if (ret < 0)
3201         return ret;
3202
3203     if (avctx->hwaccel) {
3204         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3205             av_log(avctx, AV_LOG_ERROR,
3206                    "hardware accelerator failed to decode picture\n");
3207             ff_hevc_unref_frame(s, s->ref, ~0);
3208             return ret;
3209         }
3210     } else {
3211         /* verify the SEI checksum */
3212         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3213             s->sei.picture_hash.is_md5) {
3214             ret = verify_md5(s, s->ref->frame);
3215             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3216                 ff_hevc_unref_frame(s, s->ref, ~0);
3217                 return ret;
3218             }
3219         }
3220     }
3221     s->sei.picture_hash.is_md5 = 0;
3222
3223     if (s->is_decoded) {
3224         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3225         s->is_decoded = 0;
3226     }
3227
3228     if (s->output_frame->buf[0]) {
3229         av_frame_move_ref(data, s->output_frame);
3230         *got_output = 1;
3231     }
3232
3233     return avpkt->size;
3234 }
3235
3236 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3237 {
3238     int ret;
3239
3240     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3241     if (ret < 0)
3242         return ret;
3243
3244     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3245     if (!dst->tab_mvf_buf)
3246         goto fail;
3247     dst->tab_mvf = src->tab_mvf;
3248
3249     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3250     if (!dst->rpl_tab_buf)
3251         goto fail;
3252     dst->rpl_tab = src->rpl_tab;
3253
3254     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3255     if (!dst->rpl_buf)
3256         goto fail;
3257
3258     dst->poc        = src->poc;
3259     dst->ctb_count  = src->ctb_count;
3260     dst->flags      = src->flags;
3261     dst->sequence   = src->sequence;
3262
3263     if (src->hwaccel_picture_private) {
3264         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3265         if (!dst->hwaccel_priv_buf)
3266             goto fail;
3267         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3268     }
3269
3270     return 0;
3271 fail:
3272     ff_hevc_unref_frame(s, dst, ~0);
3273     return AVERROR(ENOMEM);
3274 }
3275
3276 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3277 {
3278     HEVCContext       *s = avctx->priv_data;
3279     int i;
3280
3281     pic_arrays_free(s);
3282
3283     av_freep(&s->md5_ctx);
3284
3285     av_freep(&s->cabac_state);
3286
3287     for (i = 0; i < 3; i++) {
3288         av_freep(&s->sao_pixel_buffer_h[i]);
3289         av_freep(&s->sao_pixel_buffer_v[i]);
3290     }
3291     av_frame_free(&s->output_frame);
3292
3293     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3294         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3295         av_frame_free(&s->DPB[i].frame);
3296     }
3297
3298     ff_hevc_ps_uninit(&s->ps);
3299
3300     av_freep(&s->sh.entry_point_offset);
3301     av_freep(&s->sh.offset);
3302     av_freep(&s->sh.size);
3303
3304     for (i = 1; i < s->threads_number; i++) {
3305         HEVCLocalContext *lc = s->HEVClcList[i];
3306         if (lc) {
3307             av_freep(&s->HEVClcList[i]);
3308             av_freep(&s->sList[i]);
3309         }
3310     }
3311     if (s->HEVClc == s->HEVClcList[0])
3312         s->HEVClc = NULL;
3313     av_freep(&s->HEVClcList[0]);
3314
3315     ff_h2645_packet_uninit(&s->pkt);
3316
3317     return 0;
3318 }
3319
3320 static av_cold int hevc_init_context(AVCodecContext *avctx)
3321 {
3322     HEVCContext *s = avctx->priv_data;
3323     int i;
3324
3325     s->avctx = avctx;
3326
3327     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3328     if (!s->HEVClc)
3329         goto fail;
3330     s->HEVClcList[0] = s->HEVClc;
3331     s->sList[0] = s;
3332
3333     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3334     if (!s->cabac_state)
3335         goto fail;
3336
3337     s->output_frame = av_frame_alloc();
3338     if (!s->output_frame)
3339         goto fail;
3340
3341     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3342         s->DPB[i].frame = av_frame_alloc();
3343         if (!s->DPB[i].frame)
3344             goto fail;
3345         s->DPB[i].tf.f = s->DPB[i].frame;
3346     }
3347
3348     s->max_ra = INT_MAX;
3349
3350     s->md5_ctx = av_md5_alloc();
3351     if (!s->md5_ctx)
3352         goto fail;
3353
3354     ff_bswapdsp_init(&s->bdsp);
3355
3356     s->context_initialized = 1;
3357     s->eos = 0;
3358
3359     ff_hevc_reset_sei(&s->sei);
3360
3361     return 0;
3362
3363 fail:
3364     hevc_decode_free(avctx);
3365     return AVERROR(ENOMEM);
3366 }
3367
3368 #if HAVE_THREADS
3369 static int hevc_update_thread_context(AVCodecContext *dst,
3370                                       const AVCodecContext *src)
3371 {
3372     HEVCContext *s  = dst->priv_data;
3373     HEVCContext *s0 = src->priv_data;
3374     int i, ret;
3375
3376     if (!s->context_initialized) {
3377         ret = hevc_init_context(dst);
3378         if (ret < 0)
3379             return ret;
3380     }
3381
3382     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3383         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3384         if (s0->DPB[i].frame->buf[0]) {
3385             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3386             if (ret < 0)
3387                 return ret;
3388         }
3389     }
3390
3391     if (s->ps.sps != s0->ps.sps)
3392         s->ps.sps = NULL;
3393     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3394         av_buffer_unref(&s->ps.vps_list[i]);
3395         if (s0->ps.vps_list[i]) {
3396             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3397             if (!s->ps.vps_list[i])
3398                 return AVERROR(ENOMEM);
3399         }
3400     }
3401
3402     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3403         av_buffer_unref(&s->ps.sps_list[i]);
3404         if (s0->ps.sps_list[i]) {
3405             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3406             if (!s->ps.sps_list[i])
3407                 return AVERROR(ENOMEM);
3408         }
3409     }
3410
3411     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3412         av_buffer_unref(&s->ps.pps_list[i]);
3413         if (s0->ps.pps_list[i]) {
3414             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3415             if (!s->ps.pps_list[i])
3416                 return AVERROR(ENOMEM);
3417         }
3418     }
3419
3420     if (s->ps.sps != s0->ps.sps)
3421         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3422             return ret;
3423
3424     s->seq_decode = s0->seq_decode;
3425     s->seq_output = s0->seq_output;
3426     s->pocTid0    = s0->pocTid0;
3427     s->max_ra     = s0->max_ra;
3428     s->eos        = s0->eos;
3429     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3430
3431     s->is_nalff        = s0->is_nalff;
3432     s->nal_length_size = s0->nal_length_size;
3433
3434     s->threads_number      = s0->threads_number;
3435     s->threads_type        = s0->threads_type;
3436
3437     if (s0->eos) {
3438         s->seq_decode = (s->seq_decode + 1) & 0xff;
3439         s->max_ra = INT_MAX;
3440     }
3441
3442     s->sei.frame_packing        = s0->sei.frame_packing;
3443     s->sei.display_orientation  = s0->sei.display_orientation;
3444     s->sei.mastering_display    = s0->sei.mastering_display;
3445     s->sei.content_light        = s0->sei.content_light;
3446     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3447
3448     return 0;
3449 }
3450 #endif
3451
3452 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3453 {
3454     HEVCContext *s = avctx->priv_data;
3455     int ret;
3456
3457     avctx->internal->allocate_progress = 1;
3458
3459     ret = hevc_init_context(avctx);
3460     if (ret < 0)
3461         return ret;
3462
3463     s->enable_parallel_tiles = 0;
3464     s->sei.picture_timing.picture_struct = 0;
3465     s->eos = 1;
3466
3467     atomic_init(&s->wpp_err, 0);
3468
3469     if(avctx->active_thread_type & FF_THREAD_SLICE)
3470         s->threads_number = avctx->thread_count;
3471     else
3472         s->threads_number = 1;
3473
3474     if (avctx->extradata_size > 0 && avctx->extradata) {
3475         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3476         if (ret < 0) {
3477             hevc_decode_free(avctx);
3478             return ret;
3479         }
3480     }
3481
3482     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3483             s->threads_type = FF_THREAD_FRAME;
3484         else
3485             s->threads_type = FF_THREAD_SLICE;
3486
3487     return 0;
3488 }
3489
3490 #if HAVE_THREADS
3491 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3492 {
3493     HEVCContext *s = avctx->priv_data;
3494     int ret;
3495
3496     memset(s, 0, sizeof(*s));
3497
3498     ret = hevc_init_context(avctx);
3499     if (ret < 0)
3500         return ret;
3501
3502     return 0;
3503 }
3504 #endif
3505
3506 static void hevc_decode_flush(AVCodecContext *avctx)
3507 {
3508     HEVCContext *s = avctx->priv_data;
3509     ff_hevc_flush_dpb(s);
3510     s->max_ra = INT_MAX;
3511     s->eos = 1;
3512 }
3513
3514 #define OFFSET(x) offsetof(HEVCContext, x)
3515 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3516
3517 static const AVOption options[] = {
3518     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3519         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3520     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3521         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3522     { NULL },
3523 };
3524
3525 static const AVClass hevc_decoder_class = {
3526     .class_name = "HEVC decoder",
3527     .item_name  = av_default_item_name,
3528     .option     = options,
3529     .version    = LIBAVUTIL_VERSION_INT,
3530 };
3531
3532 AVCodec ff_hevc_decoder = {
3533     .name                  = "hevc",
3534     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3535     .type                  = AVMEDIA_TYPE_VIDEO,
3536     .id                    = AV_CODEC_ID_HEVC,
3537     .priv_data_size        = sizeof(HEVCContext),
3538     .priv_class            = &hevc_decoder_class,
3539     .init                  = hevc_decode_init,
3540     .close                 = hevc_decode_free,
3541     .decode                = hevc_decode_frame,
3542     .flush                 = hevc_decode_flush,
3543     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3544     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
3545     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3546                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3547     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3548     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3549     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3550 #if CONFIG_HEVC_DXVA2_HWACCEL
3551                                HWACCEL_DXVA2(hevc),
3552 #endif
3553 #if CONFIG_HEVC_D3D11VA_HWACCEL
3554                                HWACCEL_D3D11VA(hevc),
3555 #endif
3556 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3557                                HWACCEL_D3D11VA2(hevc),
3558 #endif
3559 #if CONFIG_HEVC_NVDEC_HWACCEL
3560                                HWACCEL_NVDEC(hevc),
3561 #endif
3562 #if CONFIG_HEVC_VAAPI_HWACCEL
3563                                HWACCEL_VAAPI(hevc),
3564 #endif
3565 #if CONFIG_HEVC_VDPAU_HWACCEL
3566                                HWACCEL_VDPAU(hevc),
3567 #endif
3568 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3569                                HWACCEL_VIDEOTOOLBOX(hevc),
3570 #endif
3571                                NULL
3572                            },
3573 };