]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
avcodec/hevc: export chroma sample location
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/mastering_display_metadata.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
35
36 #include "bswapdsp.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
39 #include "golomb.h"
40 #include "hevc.h"
41 #include "hevc_data.h"
42 #include "hevc_parse.h"
43 #include "hevcdec.h"
44 #include "hwconfig.h"
45 #include "profiles.h"
46
47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
48
49 /**
50  * NOTE: Each function hls_foo correspond to the function foo in the
51  * specification (HLS stands for High Level Syntax).
52  */
53
54 /**
55  * Section 5.7
56  */
57
58 /* free everything allocated  by pic_arrays_init() */
59 static void pic_arrays_free(HEVCContext *s)
60 {
61     av_freep(&s->sao);
62     av_freep(&s->deblock);
63
64     av_freep(&s->skip_flag);
65     av_freep(&s->tab_ct_depth);
66
67     av_freep(&s->tab_ipm);
68     av_freep(&s->cbf_luma);
69     av_freep(&s->is_pcm);
70
71     av_freep(&s->qp_y_tab);
72     av_freep(&s->tab_slice_address);
73     av_freep(&s->filter_slice_edges);
74
75     av_freep(&s->horizontal_bs);
76     av_freep(&s->vertical_bs);
77
78     av_freep(&s->sh.entry_point_offset);
79     av_freep(&s->sh.size);
80     av_freep(&s->sh.offset);
81
82     av_buffer_pool_uninit(&s->tab_mvf_pool);
83     av_buffer_pool_uninit(&s->rpl_tab_pool);
84 }
85
86 /* allocate arrays that depend on frame dimensions */
87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
88 {
89     int log2_min_cb_size = sps->log2_min_cb_size;
90     int width            = sps->width;
91     int height           = sps->height;
92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
93                            ((height >> log2_min_cb_size) + 1);
94     int ctb_count        = sps->ctb_width * sps->ctb_height;
95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
96
97     s->bs_width  = (width  >> 2) + 1;
98     s->bs_height = (height >> 2) + 1;
99
100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
102     if (!s->sao || !s->deblock)
103         goto fail;
104
105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107     if (!s->skip_flag || !s->tab_ct_depth)
108         goto fail;
109
110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
111     s->tab_ipm  = av_mallocz(min_pu_size);
112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
114         goto fail;
115
116     s->filter_slice_edges = av_mallocz(ctb_count);
117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
118                                       sizeof(*s->tab_slice_address));
119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
120                                       sizeof(*s->qp_y_tab));
121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
122         goto fail;
123
124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
126     if (!s->horizontal_bs || !s->vertical_bs)
127         goto fail;
128
129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
130                                           av_buffer_allocz);
131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
132                                           av_buffer_allocz);
133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
134         goto fail;
135
136     return 0;
137
138 fail:
139     pic_arrays_free(s);
140     return AVERROR(ENOMEM);
141 }
142
143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
144 {
145     int i = 0;
146     int j = 0;
147     uint8_t luma_weight_l0_flag[16];
148     uint8_t chroma_weight_l0_flag[16];
149     uint8_t luma_weight_l1_flag[16];
150     uint8_t chroma_weight_l1_flag[16];
151     int luma_log2_weight_denom;
152
153     luma_log2_weight_denom = get_ue_golomb_long(gb);
154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
156         return AVERROR_INVALIDDATA;
157     }
158     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
159     if (s->ps.sps->chroma_format_idc != 0) {
160         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
161         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
162             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
163             return AVERROR_INVALIDDATA;
164         }
165         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
166     }
167
168     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
169         luma_weight_l0_flag[i] = get_bits1(gb);
170         if (!luma_weight_l0_flag[i]) {
171             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
172             s->sh.luma_offset_l0[i] = 0;
173         }
174     }
175     if (s->ps.sps->chroma_format_idc != 0) {
176         for (i = 0; i < s->sh.nb_refs[L0]; i++)
177             chroma_weight_l0_flag[i] = get_bits1(gb);
178     } else {
179         for (i = 0; i < s->sh.nb_refs[L0]; i++)
180             chroma_weight_l0_flag[i] = 0;
181     }
182     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
183         if (luma_weight_l0_flag[i]) {
184             int delta_luma_weight_l0 = get_se_golomb(gb);
185             if ((int8_t)delta_luma_weight_l0 != delta_luma_weight_l0)
186                 return AVERROR_INVALIDDATA;
187             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
188             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
189         }
190         if (chroma_weight_l0_flag[i]) {
191             for (j = 0; j < 2; j++) {
192                 int delta_chroma_weight_l0 = get_se_golomb(gb);
193                 int delta_chroma_offset_l0 = get_se_golomb(gb);
194
195                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
196                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
197                     return AVERROR_INVALIDDATA;
198                 }
199
200                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
201                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
202                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
203             }
204         } else {
205             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
206             s->sh.chroma_offset_l0[i][0] = 0;
207             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
208             s->sh.chroma_offset_l0[i][1] = 0;
209         }
210     }
211     if (s->sh.slice_type == HEVC_SLICE_B) {
212         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
213             luma_weight_l1_flag[i] = get_bits1(gb);
214             if (!luma_weight_l1_flag[i]) {
215                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
216                 s->sh.luma_offset_l1[i] = 0;
217             }
218         }
219         if (s->ps.sps->chroma_format_idc != 0) {
220             for (i = 0; i < s->sh.nb_refs[L1]; i++)
221                 chroma_weight_l1_flag[i] = get_bits1(gb);
222         } else {
223             for (i = 0; i < s->sh.nb_refs[L1]; i++)
224                 chroma_weight_l1_flag[i] = 0;
225         }
226         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
227             if (luma_weight_l1_flag[i]) {
228                 int delta_luma_weight_l1 = get_se_golomb(gb);
229                 if ((int8_t)delta_luma_weight_l1 != delta_luma_weight_l1)
230                     return AVERROR_INVALIDDATA;
231                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
232                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
233             }
234             if (chroma_weight_l1_flag[i]) {
235                 for (j = 0; j < 2; j++) {
236                     int delta_chroma_weight_l1 = get_se_golomb(gb);
237                     int delta_chroma_offset_l1 = get_se_golomb(gb);
238
239                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
240                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
241                         return AVERROR_INVALIDDATA;
242                     }
243
244                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
245                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
246                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
247                 }
248             } else {
249                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
250                 s->sh.chroma_offset_l1[i][0] = 0;
251                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
252                 s->sh.chroma_offset_l1[i][1] = 0;
253             }
254         }
255     }
256     return 0;
257 }
258
259 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
260 {
261     const HEVCSPS *sps = s->ps.sps;
262     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
263     int prev_delta_msb = 0;
264     unsigned int nb_sps = 0, nb_sh;
265     int i;
266
267     rps->nb_refs = 0;
268     if (!sps->long_term_ref_pics_present_flag)
269         return 0;
270
271     if (sps->num_long_term_ref_pics_sps > 0)
272         nb_sps = get_ue_golomb_long(gb);
273     nb_sh = get_ue_golomb_long(gb);
274
275     if (nb_sps > sps->num_long_term_ref_pics_sps)
276         return AVERROR_INVALIDDATA;
277     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
278         return AVERROR_INVALIDDATA;
279
280     rps->nb_refs = nb_sh + nb_sps;
281
282     for (i = 0; i < rps->nb_refs; i++) {
283
284         if (i < nb_sps) {
285             uint8_t lt_idx_sps = 0;
286
287             if (sps->num_long_term_ref_pics_sps > 1)
288                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
289
290             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
291             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
292         } else {
293             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
294             rps->used[i] = get_bits1(gb);
295         }
296
297         rps->poc_msb_present[i] = get_bits1(gb);
298         if (rps->poc_msb_present[i]) {
299             int64_t delta = get_ue_golomb_long(gb);
300             int64_t poc;
301
302             if (i && i != nb_sps)
303                 delta += prev_delta_msb;
304
305             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
306             if (poc != (int32_t)poc)
307                 return AVERROR_INVALIDDATA;
308             rps->poc[i] = poc;
309             prev_delta_msb = delta;
310         }
311     }
312
313     return 0;
314 }
315
316 static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
317 {
318     AVCodecContext *avctx = s->avctx;
319     const HEVCParamSets *ps = &s->ps;
320     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
321     const HEVCWindow *ow = &sps->output_window;
322     unsigned int num = 0, den = 0;
323
324     avctx->pix_fmt             = sps->pix_fmt;
325     avctx->coded_width         = sps->width;
326     avctx->coded_height        = sps->height;
327     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
328     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
329     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
330     avctx->profile             = sps->ptl.general_ptl.profile_idc;
331     avctx->level               = sps->ptl.general_ptl.level_idc;
332
333     ff_set_sar(avctx, sps->vui.sar);
334
335     if (sps->vui.video_signal_type_present_flag)
336         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
337                                                             : AVCOL_RANGE_MPEG;
338     else
339         avctx->color_range = AVCOL_RANGE_MPEG;
340
341     if (sps->vui.colour_description_present_flag) {
342         avctx->color_primaries = sps->vui.colour_primaries;
343         avctx->color_trc       = sps->vui.transfer_characteristic;
344         avctx->colorspace      = sps->vui.matrix_coeffs;
345     } else {
346         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
347         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
348         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
349     }
350
351     avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
352     if (sps->chroma_format_idc == 1) {
353         if (sps->vui.chroma_loc_info_present_flag) {
354             if (sps->vui.chroma_sample_loc_type_top_field <= 5)
355                 avctx->chroma_sample_location = sps->vui.chroma_sample_loc_type_top_field + 1;
356         } else
357             avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
358     }
359
360     if (vps->vps_timing_info_present_flag) {
361         num = vps->vps_num_units_in_tick;
362         den = vps->vps_time_scale;
363     } else if (sps->vui.vui_timing_info_present_flag) {
364         num = sps->vui.vui_num_units_in_tick;
365         den = sps->vui.vui_time_scale;
366     }
367
368     if (num != 0 && den != 0)
369         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
370                   num, den, 1 << 30);
371
372     if (s->sei.alternative_transfer.present &&
373         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
374         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
375         avctx->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
376     }
377 }
378
379 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
380 {
381 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
382                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
383                      CONFIG_HEVC_NVDEC_HWACCEL + \
384                      CONFIG_HEVC_VAAPI_HWACCEL + \
385                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
386                      CONFIG_HEVC_VDPAU_HWACCEL)
387     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
388
389     switch (sps->pix_fmt) {
390     case AV_PIX_FMT_YUV420P:
391     case AV_PIX_FMT_YUVJ420P:
392 #if CONFIG_HEVC_DXVA2_HWACCEL
393         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
394 #endif
395 #if CONFIG_HEVC_D3D11VA_HWACCEL
396         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
397         *fmt++ = AV_PIX_FMT_D3D11;
398 #endif
399 #if CONFIG_HEVC_VAAPI_HWACCEL
400         *fmt++ = AV_PIX_FMT_VAAPI;
401 #endif
402 #if CONFIG_HEVC_VDPAU_HWACCEL
403         *fmt++ = AV_PIX_FMT_VDPAU;
404 #endif
405 #if CONFIG_HEVC_NVDEC_HWACCEL
406         *fmt++ = AV_PIX_FMT_CUDA;
407 #endif
408 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
409         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
410 #endif
411         break;
412     case AV_PIX_FMT_YUV420P10:
413 #if CONFIG_HEVC_DXVA2_HWACCEL
414         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
415 #endif
416 #if CONFIG_HEVC_D3D11VA_HWACCEL
417         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
418         *fmt++ = AV_PIX_FMT_D3D11;
419 #endif
420 #if CONFIG_HEVC_VAAPI_HWACCEL
421         *fmt++ = AV_PIX_FMT_VAAPI;
422 #endif
423 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
424         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
425 #endif
426 #if CONFIG_HEVC_NVDEC_HWACCEL
427         *fmt++ = AV_PIX_FMT_CUDA;
428 #endif
429         break;
430     case AV_PIX_FMT_YUV444P:
431 #if CONFIG_HEVC_VDPAU_HWACCEL
432         *fmt++ = AV_PIX_FMT_VDPAU;
433 #endif
434 #if CONFIG_HEVC_NVDEC_HWACCEL
435         *fmt++ = AV_PIX_FMT_CUDA;
436 #endif
437         break;
438     case AV_PIX_FMT_YUV422P:
439     case AV_PIX_FMT_YUV422P10LE:
440 #if CONFIG_HEVC_VAAPI_HWACCEL
441        *fmt++ = AV_PIX_FMT_VAAPI;
442 #endif
443         break;
444     case AV_PIX_FMT_YUV420P12:
445     case AV_PIX_FMT_YUV444P10:
446     case AV_PIX_FMT_YUV444P12:
447 #if CONFIG_HEVC_NVDEC_HWACCEL
448         *fmt++ = AV_PIX_FMT_CUDA;
449 #endif
450         break;
451     }
452
453     *fmt++ = sps->pix_fmt;
454     *fmt = AV_PIX_FMT_NONE;
455
456     return ff_thread_get_format(s->avctx, pix_fmts);
457 }
458
459 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
460                    enum AVPixelFormat pix_fmt)
461 {
462     int ret, i;
463
464     pic_arrays_free(s);
465     s->ps.sps = NULL;
466     s->ps.vps = NULL;
467
468     if (!sps)
469         return 0;
470
471     ret = pic_arrays_init(s, sps);
472     if (ret < 0)
473         goto fail;
474
475     export_stream_params(s, sps);
476
477     s->avctx->pix_fmt = pix_fmt;
478
479     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
480     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
481     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
482
483     for (i = 0; i < 3; i++) {
484         av_freep(&s->sao_pixel_buffer_h[i]);
485         av_freep(&s->sao_pixel_buffer_v[i]);
486     }
487
488     if (sps->sao_enabled && !s->avctx->hwaccel) {
489         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
490         int c_idx;
491
492         for(c_idx = 0; c_idx < c_count; c_idx++) {
493             int w = sps->width >> sps->hshift[c_idx];
494             int h = sps->height >> sps->vshift[c_idx];
495             s->sao_pixel_buffer_h[c_idx] =
496                 av_malloc((w * 2 * sps->ctb_height) <<
497                           sps->pixel_shift);
498             s->sao_pixel_buffer_v[c_idx] =
499                 av_malloc((h * 2 * sps->ctb_width) <<
500                           sps->pixel_shift);
501         }
502     }
503
504     s->ps.sps = sps;
505     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
506
507     return 0;
508
509 fail:
510     pic_arrays_free(s);
511     s->ps.sps = NULL;
512     return ret;
513 }
514
515 static int hls_slice_header(HEVCContext *s)
516 {
517     GetBitContext *gb = &s->HEVClc->gb;
518     SliceHeader *sh   = &s->sh;
519     int i, ret;
520
521     // Coded parameters
522     sh->first_slice_in_pic_flag = get_bits1(gb);
523     if (s->ref && sh->first_slice_in_pic_flag) {
524         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
525         return 1; // This slice will be skipped later, do not corrupt state
526     }
527
528     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
529         s->seq_decode = (s->seq_decode + 1) & 0xff;
530         s->max_ra     = INT_MAX;
531         if (IS_IDR(s))
532             ff_hevc_clear_refs(s);
533     }
534     sh->no_output_of_prior_pics_flag = 0;
535     if (IS_IRAP(s))
536         sh->no_output_of_prior_pics_flag = get_bits1(gb);
537
538     sh->pps_id = get_ue_golomb_long(gb);
539     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
540         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
541         return AVERROR_INVALIDDATA;
542     }
543     if (!sh->first_slice_in_pic_flag &&
544         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
545         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
546         return AVERROR_INVALIDDATA;
547     }
548     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
549     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
550         sh->no_output_of_prior_pics_flag = 1;
551
552     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
553         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
554         const HEVCSPS *last_sps = s->ps.sps;
555         enum AVPixelFormat pix_fmt;
556
557         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
558             if (sps->width != last_sps->width || sps->height != last_sps->height ||
559                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
560                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
561                 sh->no_output_of_prior_pics_flag = 0;
562         }
563         ff_hevc_clear_refs(s);
564
565         ret = set_sps(s, sps, sps->pix_fmt);
566         if (ret < 0)
567             return ret;
568
569         pix_fmt = get_format(s, sps);
570         if (pix_fmt < 0)
571             return pix_fmt;
572         s->avctx->pix_fmt = pix_fmt;
573
574         s->seq_decode = (s->seq_decode + 1) & 0xff;
575         s->max_ra     = INT_MAX;
576     }
577
578     sh->dependent_slice_segment_flag = 0;
579     if (!sh->first_slice_in_pic_flag) {
580         int slice_address_length;
581
582         if (s->ps.pps->dependent_slice_segments_enabled_flag)
583             sh->dependent_slice_segment_flag = get_bits1(gb);
584
585         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
586                                             s->ps.sps->ctb_height);
587         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
588         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
589             av_log(s->avctx, AV_LOG_ERROR,
590                    "Invalid slice segment address: %u.\n",
591                    sh->slice_segment_addr);
592             return AVERROR_INVALIDDATA;
593         }
594
595         if (!sh->dependent_slice_segment_flag) {
596             sh->slice_addr = sh->slice_segment_addr;
597             s->slice_idx++;
598         }
599     } else {
600         sh->slice_segment_addr = sh->slice_addr = 0;
601         s->slice_idx           = 0;
602         s->slice_initialized   = 0;
603     }
604
605     if (!sh->dependent_slice_segment_flag) {
606         s->slice_initialized = 0;
607
608         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
609             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
610
611         sh->slice_type = get_ue_golomb_long(gb);
612         if (!(sh->slice_type == HEVC_SLICE_I ||
613               sh->slice_type == HEVC_SLICE_P ||
614               sh->slice_type == HEVC_SLICE_B)) {
615             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
616                    sh->slice_type);
617             return AVERROR_INVALIDDATA;
618         }
619         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
620             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
621             return AVERROR_INVALIDDATA;
622         }
623
624         // when flag is not present, picture is inferred to be output
625         sh->pic_output_flag = 1;
626         if (s->ps.pps->output_flag_present_flag)
627             sh->pic_output_flag = get_bits1(gb);
628
629         if (s->ps.sps->separate_colour_plane_flag)
630             sh->colour_plane_id = get_bits(gb, 2);
631
632         if (!IS_IDR(s)) {
633             int poc, pos;
634
635             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
636             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
637             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
638                 av_log(s->avctx, AV_LOG_WARNING,
639                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
640                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
641                     return AVERROR_INVALIDDATA;
642                 poc = s->poc;
643             }
644             s->poc = poc;
645
646             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
647             pos = get_bits_left(gb);
648             if (!sh->short_term_ref_pic_set_sps_flag) {
649                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
650                 if (ret < 0)
651                     return ret;
652
653                 sh->short_term_rps = &sh->slice_rps;
654             } else {
655                 int numbits, rps_idx;
656
657                 if (!s->ps.sps->nb_st_rps) {
658                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
659                     return AVERROR_INVALIDDATA;
660                 }
661
662                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
663                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
664                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
665             }
666             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
667
668             pos = get_bits_left(gb);
669             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
670             if (ret < 0) {
671                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
672                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
673                     return AVERROR_INVALIDDATA;
674             }
675             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
676
677             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
678                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
679             else
680                 sh->slice_temporal_mvp_enabled_flag = 0;
681         } else {
682             s->sh.short_term_rps = NULL;
683             s->poc               = 0;
684         }
685
686         /* 8.3.1 */
687         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
688             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
689             s->nal_unit_type != HEVC_NAL_TSA_N   &&
690             s->nal_unit_type != HEVC_NAL_STSA_N  &&
691             s->nal_unit_type != HEVC_NAL_RADL_N  &&
692             s->nal_unit_type != HEVC_NAL_RADL_R  &&
693             s->nal_unit_type != HEVC_NAL_RASL_N  &&
694             s->nal_unit_type != HEVC_NAL_RASL_R)
695             s->pocTid0 = s->poc;
696
697         if (s->ps.sps->sao_enabled) {
698             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
699             if (s->ps.sps->chroma_format_idc) {
700                 sh->slice_sample_adaptive_offset_flag[1] =
701                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
702             }
703         } else {
704             sh->slice_sample_adaptive_offset_flag[0] = 0;
705             sh->slice_sample_adaptive_offset_flag[1] = 0;
706             sh->slice_sample_adaptive_offset_flag[2] = 0;
707         }
708
709         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
710         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
711             int nb_refs;
712
713             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
714             if (sh->slice_type == HEVC_SLICE_B)
715                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
716
717             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
718                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
719                 if (sh->slice_type == HEVC_SLICE_B)
720                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
721             }
722             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
723                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
724                        sh->nb_refs[L0], sh->nb_refs[L1]);
725                 return AVERROR_INVALIDDATA;
726             }
727
728             sh->rpl_modification_flag[0] = 0;
729             sh->rpl_modification_flag[1] = 0;
730             nb_refs = ff_hevc_frame_nb_refs(s);
731             if (!nb_refs) {
732                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
733                 return AVERROR_INVALIDDATA;
734             }
735
736             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
737                 sh->rpl_modification_flag[0] = get_bits1(gb);
738                 if (sh->rpl_modification_flag[0]) {
739                     for (i = 0; i < sh->nb_refs[L0]; i++)
740                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
741                 }
742
743                 if (sh->slice_type == HEVC_SLICE_B) {
744                     sh->rpl_modification_flag[1] = get_bits1(gb);
745                     if (sh->rpl_modification_flag[1] == 1)
746                         for (i = 0; i < sh->nb_refs[L1]; i++)
747                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
748                 }
749             }
750
751             if (sh->slice_type == HEVC_SLICE_B)
752                 sh->mvd_l1_zero_flag = get_bits1(gb);
753
754             if (s->ps.pps->cabac_init_present_flag)
755                 sh->cabac_init_flag = get_bits1(gb);
756             else
757                 sh->cabac_init_flag = 0;
758
759             sh->collocated_ref_idx = 0;
760             if (sh->slice_temporal_mvp_enabled_flag) {
761                 sh->collocated_list = L0;
762                 if (sh->slice_type == HEVC_SLICE_B)
763                     sh->collocated_list = !get_bits1(gb);
764
765                 if (sh->nb_refs[sh->collocated_list] > 1) {
766                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
767                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
768                         av_log(s->avctx, AV_LOG_ERROR,
769                                "Invalid collocated_ref_idx: %d.\n",
770                                sh->collocated_ref_idx);
771                         return AVERROR_INVALIDDATA;
772                     }
773                 }
774             }
775
776             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
777                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
778                 int ret = pred_weight_table(s, gb);
779                 if (ret < 0)
780                     return ret;
781             }
782
783             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
784             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
785                 av_log(s->avctx, AV_LOG_ERROR,
786                        "Invalid number of merging MVP candidates: %d.\n",
787                        sh->max_num_merge_cand);
788                 return AVERROR_INVALIDDATA;
789             }
790         }
791
792         sh->slice_qp_delta = get_se_golomb(gb);
793
794         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
795             sh->slice_cb_qp_offset = get_se_golomb(gb);
796             sh->slice_cr_qp_offset = get_se_golomb(gb);
797         } else {
798             sh->slice_cb_qp_offset = 0;
799             sh->slice_cr_qp_offset = 0;
800         }
801
802         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
803             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
804         else
805             sh->cu_chroma_qp_offset_enabled_flag = 0;
806
807         if (s->ps.pps->deblocking_filter_control_present_flag) {
808             int deblocking_filter_override_flag = 0;
809
810             if (s->ps.pps->deblocking_filter_override_enabled_flag)
811                 deblocking_filter_override_flag = get_bits1(gb);
812
813             if (deblocking_filter_override_flag) {
814                 sh->disable_deblocking_filter_flag = get_bits1(gb);
815                 if (!sh->disable_deblocking_filter_flag) {
816                     int beta_offset_div2 = get_se_golomb(gb);
817                     int tc_offset_div2   = get_se_golomb(gb) ;
818                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
819                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
820                         av_log(s->avctx, AV_LOG_ERROR,
821                             "Invalid deblock filter offsets: %d, %d\n",
822                             beta_offset_div2, tc_offset_div2);
823                         return AVERROR_INVALIDDATA;
824                     }
825                     sh->beta_offset = beta_offset_div2 * 2;
826                     sh->tc_offset   =   tc_offset_div2 * 2;
827                 }
828             } else {
829                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
830                 sh->beta_offset                    = s->ps.pps->beta_offset;
831                 sh->tc_offset                      = s->ps.pps->tc_offset;
832             }
833         } else {
834             sh->disable_deblocking_filter_flag = 0;
835             sh->beta_offset                    = 0;
836             sh->tc_offset                      = 0;
837         }
838
839         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
840             (sh->slice_sample_adaptive_offset_flag[0] ||
841              sh->slice_sample_adaptive_offset_flag[1] ||
842              !sh->disable_deblocking_filter_flag)) {
843             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
844         } else {
845             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
846         }
847     } else if (!s->slice_initialized) {
848         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
849         return AVERROR_INVALIDDATA;
850     }
851
852     sh->num_entry_point_offsets = 0;
853     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
854         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
855         // It would be possible to bound this tighter but this here is simpler
856         if (num_entry_point_offsets > get_bits_left(gb)) {
857             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
858             return AVERROR_INVALIDDATA;
859         }
860
861         sh->num_entry_point_offsets = num_entry_point_offsets;
862         if (sh->num_entry_point_offsets > 0) {
863             int offset_len = get_ue_golomb_long(gb) + 1;
864
865             if (offset_len < 1 || offset_len > 32) {
866                 sh->num_entry_point_offsets = 0;
867                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
868                 return AVERROR_INVALIDDATA;
869             }
870
871             av_freep(&sh->entry_point_offset);
872             av_freep(&sh->offset);
873             av_freep(&sh->size);
874             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
875             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
876             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
877             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
878                 sh->num_entry_point_offsets = 0;
879                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
880                 return AVERROR(ENOMEM);
881             }
882             for (i = 0; i < sh->num_entry_point_offsets; i++) {
883                 unsigned val = get_bits_long(gb, offset_len);
884                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
885             }
886             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
887                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
888                 s->threads_number = 1;
889             } else
890                 s->enable_parallel_tiles = 0;
891         } else
892             s->enable_parallel_tiles = 0;
893     }
894
895     if (s->ps.pps->slice_header_extension_present_flag) {
896         unsigned int length = get_ue_golomb_long(gb);
897         if (length*8LL > get_bits_left(gb)) {
898             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
899             return AVERROR_INVALIDDATA;
900         }
901         for (i = 0; i < length; i++)
902             skip_bits(gb, 8);  // slice_header_extension_data_byte
903     }
904
905     // Inferred parameters
906     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
907     if (sh->slice_qp > 51 ||
908         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
909         av_log(s->avctx, AV_LOG_ERROR,
910                "The slice_qp %d is outside the valid range "
911                "[%d, 51].\n",
912                sh->slice_qp,
913                -s->ps.sps->qp_bd_offset);
914         return AVERROR_INVALIDDATA;
915     }
916
917     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
918
919     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
920         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
921         return AVERROR_INVALIDDATA;
922     }
923
924     if (get_bits_left(gb) < 0) {
925         av_log(s->avctx, AV_LOG_ERROR,
926                "Overread slice header by %d bits\n", -get_bits_left(gb));
927         return AVERROR_INVALIDDATA;
928     }
929
930     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
931
932     if (!s->ps.pps->cu_qp_delta_enabled_flag)
933         s->HEVClc->qp_y = s->sh.slice_qp;
934
935     s->slice_initialized = 1;
936     s->HEVClc->tu.cu_qp_offset_cb = 0;
937     s->HEVClc->tu.cu_qp_offset_cr = 0;
938
939     return 0;
940 }
941
942 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
943
944 #define SET_SAO(elem, value)                            \
945 do {                                                    \
946     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
947         sao->elem = value;                              \
948     else if (sao_merge_left_flag)                       \
949         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
950     else if (sao_merge_up_flag)                         \
951         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
952     else                                                \
953         sao->elem = 0;                                  \
954 } while (0)
955
956 static void hls_sao_param(HEVCContext *s, int rx, int ry)
957 {
958     HEVCLocalContext *lc    = s->HEVClc;
959     int sao_merge_left_flag = 0;
960     int sao_merge_up_flag   = 0;
961     SAOParams *sao          = &CTB(s->sao, rx, ry);
962     int c_idx, i;
963
964     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
965         s->sh.slice_sample_adaptive_offset_flag[1]) {
966         if (rx > 0) {
967             if (lc->ctb_left_flag)
968                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
969         }
970         if (ry > 0 && !sao_merge_left_flag) {
971             if (lc->ctb_up_flag)
972                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
973         }
974     }
975
976     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
977         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
978                                                  s->ps.pps->log2_sao_offset_scale_chroma;
979
980         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
981             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
982             continue;
983         }
984
985         if (c_idx == 2) {
986             sao->type_idx[2] = sao->type_idx[1];
987             sao->eo_class[2] = sao->eo_class[1];
988         } else {
989             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
990         }
991
992         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
993             continue;
994
995         for (i = 0; i < 4; i++)
996             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
997
998         if (sao->type_idx[c_idx] == SAO_BAND) {
999             for (i = 0; i < 4; i++) {
1000                 if (sao->offset_abs[c_idx][i]) {
1001                     SET_SAO(offset_sign[c_idx][i],
1002                             ff_hevc_sao_offset_sign_decode(s));
1003                 } else {
1004                     sao->offset_sign[c_idx][i] = 0;
1005                 }
1006             }
1007             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
1008         } else if (c_idx != 2) {
1009             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
1010         }
1011
1012         // Inferred parameters
1013         sao->offset_val[c_idx][0] = 0;
1014         for (i = 0; i < 4; i++) {
1015             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
1016             if (sao->type_idx[c_idx] == SAO_EDGE) {
1017                 if (i > 1)
1018                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1019             } else if (sao->offset_sign[c_idx][i]) {
1020                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1021             }
1022             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
1023         }
1024     }
1025 }
1026
1027 #undef SET_SAO
1028 #undef CTB
1029
1030 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1031     HEVCLocalContext *lc    = s->HEVClc;
1032     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1033
1034     if (log2_res_scale_abs_plus1 !=  0) {
1035         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1036         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1037                                (1 - 2 * res_scale_sign_flag);
1038     } else {
1039         lc->tu.res_scale_val = 0;
1040     }
1041
1042
1043     return 0;
1044 }
1045
1046 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1047                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1048                               int log2_cb_size, int log2_trafo_size,
1049                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1050 {
1051     HEVCLocalContext *lc = s->HEVClc;
1052     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1053     int i;
1054
1055     if (lc->cu.pred_mode == MODE_INTRA) {
1056         int trafo_size = 1 << log2_trafo_size;
1057         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1058
1059         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1060     }
1061
1062     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1063         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1064         int scan_idx   = SCAN_DIAG;
1065         int scan_idx_c = SCAN_DIAG;
1066         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1067                          (s->ps.sps->chroma_format_idc == 2 &&
1068                          (cbf_cb[1] || cbf_cr[1]));
1069
1070         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1071             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1072             if (lc->tu.cu_qp_delta != 0)
1073                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1074                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1075             lc->tu.is_cu_qp_delta_coded = 1;
1076
1077             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1078                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1079                 av_log(s->avctx, AV_LOG_ERROR,
1080                        "The cu_qp_delta %d is outside the valid range "
1081                        "[%d, %d].\n",
1082                        lc->tu.cu_qp_delta,
1083                        -(26 + s->ps.sps->qp_bd_offset / 2),
1084                         (25 + s->ps.sps->qp_bd_offset / 2));
1085                 return AVERROR_INVALIDDATA;
1086             }
1087
1088             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1089         }
1090
1091         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1092             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1093             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1094             if (cu_chroma_qp_offset_flag) {
1095                 int cu_chroma_qp_offset_idx  = 0;
1096                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1097                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1098                     av_log(s->avctx, AV_LOG_ERROR,
1099                         "cu_chroma_qp_offset_idx not yet tested.\n");
1100                 }
1101                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1102                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1103             } else {
1104                 lc->tu.cu_qp_offset_cb = 0;
1105                 lc->tu.cu_qp_offset_cr = 0;
1106             }
1107             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1108         }
1109
1110         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1111             if (lc->tu.intra_pred_mode >= 6 &&
1112                 lc->tu.intra_pred_mode <= 14) {
1113                 scan_idx = SCAN_VERT;
1114             } else if (lc->tu.intra_pred_mode >= 22 &&
1115                        lc->tu.intra_pred_mode <= 30) {
1116                 scan_idx = SCAN_HORIZ;
1117             }
1118
1119             if (lc->tu.intra_pred_mode_c >=  6 &&
1120                 lc->tu.intra_pred_mode_c <= 14) {
1121                 scan_idx_c = SCAN_VERT;
1122             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1123                        lc->tu.intra_pred_mode_c <= 30) {
1124                 scan_idx_c = SCAN_HORIZ;
1125             }
1126         }
1127
1128         lc->tu.cross_pf = 0;
1129
1130         if (cbf_luma)
1131             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1132         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1133             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1134             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1135             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1136                                 (lc->cu.pred_mode == MODE_INTER ||
1137                                  (lc->tu.chroma_mode_c ==  4)));
1138
1139             if (lc->tu.cross_pf) {
1140                 hls_cross_component_pred(s, 0);
1141             }
1142             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1143                 if (lc->cu.pred_mode == MODE_INTRA) {
1144                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1145                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1146                 }
1147                 if (cbf_cb[i])
1148                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1149                                                 log2_trafo_size_c, scan_idx_c, 1);
1150                 else
1151                     if (lc->tu.cross_pf) {
1152                         ptrdiff_t stride = s->frame->linesize[1];
1153                         int hshift = s->ps.sps->hshift[1];
1154                         int vshift = s->ps.sps->vshift[1];
1155                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1156                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1157                         int size = 1 << log2_trafo_size_c;
1158
1159                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1160                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1161                         for (i = 0; i < (size * size); i++) {
1162                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1163                         }
1164                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1165                     }
1166             }
1167
1168             if (lc->tu.cross_pf) {
1169                 hls_cross_component_pred(s, 1);
1170             }
1171             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1172                 if (lc->cu.pred_mode == MODE_INTRA) {
1173                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1174                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1175                 }
1176                 if (cbf_cr[i])
1177                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1178                                                 log2_trafo_size_c, scan_idx_c, 2);
1179                 else
1180                     if (lc->tu.cross_pf) {
1181                         ptrdiff_t stride = s->frame->linesize[2];
1182                         int hshift = s->ps.sps->hshift[2];
1183                         int vshift = s->ps.sps->vshift[2];
1184                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1185                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1186                         int size = 1 << log2_trafo_size_c;
1187
1188                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1189                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1190                         for (i = 0; i < (size * size); i++) {
1191                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1192                         }
1193                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1194                     }
1195             }
1196         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1197             int trafo_size_h = 1 << (log2_trafo_size + 1);
1198             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1199             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1200                 if (lc->cu.pred_mode == MODE_INTRA) {
1201                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1202                                                     trafo_size_h, trafo_size_v);
1203                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1204                 }
1205                 if (cbf_cb[i])
1206                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1207                                                 log2_trafo_size, scan_idx_c, 1);
1208             }
1209             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1210                 if (lc->cu.pred_mode == MODE_INTRA) {
1211                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1212                                                 trafo_size_h, trafo_size_v);
1213                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1214                 }
1215                 if (cbf_cr[i])
1216                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1217                                                 log2_trafo_size, scan_idx_c, 2);
1218             }
1219         }
1220     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1221         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1222             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1223             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1224             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1225             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1226             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1227             if (s->ps.sps->chroma_format_idc == 2) {
1228                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1229                                                 trafo_size_h, trafo_size_v);
1230                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1231                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1232             }
1233         } else if (blk_idx == 3) {
1234             int trafo_size_h = 1 << (log2_trafo_size + 1);
1235             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1236             ff_hevc_set_neighbour_available(s, xBase, yBase,
1237                                             trafo_size_h, trafo_size_v);
1238             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1239             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1240             if (s->ps.sps->chroma_format_idc == 2) {
1241                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1242                                                 trafo_size_h, trafo_size_v);
1243                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1244                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1245             }
1246         }
1247     }
1248
1249     return 0;
1250 }
1251
1252 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1253 {
1254     int cb_size          = 1 << log2_cb_size;
1255     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1256
1257     int min_pu_width     = s->ps.sps->min_pu_width;
1258     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1259     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1260     int i, j;
1261
1262     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1263         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1264             s->is_pcm[i + j * min_pu_width] = 2;
1265 }
1266
1267 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1268                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1269                               int log2_cb_size, int log2_trafo_size,
1270                               int trafo_depth, int blk_idx,
1271                               const int *base_cbf_cb, const int *base_cbf_cr)
1272 {
1273     HEVCLocalContext *lc = s->HEVClc;
1274     uint8_t split_transform_flag;
1275     int cbf_cb[2];
1276     int cbf_cr[2];
1277     int ret;
1278
1279     cbf_cb[0] = base_cbf_cb[0];
1280     cbf_cb[1] = base_cbf_cb[1];
1281     cbf_cr[0] = base_cbf_cr[0];
1282     cbf_cr[1] = base_cbf_cr[1];
1283
1284     if (lc->cu.intra_split_flag) {
1285         if (trafo_depth == 1) {
1286             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1287             if (s->ps.sps->chroma_format_idc == 3) {
1288                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1289                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1290             } else {
1291                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1292                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1293             }
1294         }
1295     } else {
1296         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1297         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1298         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1299     }
1300
1301     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1302         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1303         trafo_depth     < lc->cu.max_trafo_depth       &&
1304         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1305         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1306     } else {
1307         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1308                           lc->cu.pred_mode == MODE_INTER &&
1309                           lc->cu.part_mode != PART_2Nx2N &&
1310                           trafo_depth == 0;
1311
1312         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1313                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1314                                inter_split;
1315     }
1316
1317     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1318         if (trafo_depth == 0 || cbf_cb[0]) {
1319             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1320             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1321                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1322             }
1323         }
1324
1325         if (trafo_depth == 0 || cbf_cr[0]) {
1326             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1327             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1328                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1329             }
1330         }
1331     }
1332
1333     if (split_transform_flag) {
1334         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1335         const int x1 = x0 + trafo_size_split;
1336         const int y1 = y0 + trafo_size_split;
1337
1338 #define SUBDIVIDE(x, y, idx)                                                    \
1339 do {                                                                            \
1340     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1341                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1342                              cbf_cb, cbf_cr);                                   \
1343     if (ret < 0)                                                                \
1344         return ret;                                                             \
1345 } while (0)
1346
1347         SUBDIVIDE(x0, y0, 0);
1348         SUBDIVIDE(x1, y0, 1);
1349         SUBDIVIDE(x0, y1, 2);
1350         SUBDIVIDE(x1, y1, 3);
1351
1352 #undef SUBDIVIDE
1353     } else {
1354         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1355         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1356         int min_tu_width     = s->ps.sps->min_tb_width;
1357         int cbf_luma         = 1;
1358
1359         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1360             cbf_cb[0] || cbf_cr[0] ||
1361             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1362             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1363         }
1364
1365         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1366                                  log2_cb_size, log2_trafo_size,
1367                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1368         if (ret < 0)
1369             return ret;
1370         // TODO: store cbf_luma somewhere else
1371         if (cbf_luma) {
1372             int i, j;
1373             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1374                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1375                     int x_tu = (x0 + j) >> log2_min_tu_size;
1376                     int y_tu = (y0 + i) >> log2_min_tu_size;
1377                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1378                 }
1379         }
1380         if (!s->sh.disable_deblocking_filter_flag) {
1381             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1382             if (s->ps.pps->transquant_bypass_enable_flag &&
1383                 lc->cu.cu_transquant_bypass_flag)
1384                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1385         }
1386     }
1387     return 0;
1388 }
1389
1390 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1391 {
1392     HEVCLocalContext *lc = s->HEVClc;
1393     GetBitContext gb;
1394     int cb_size   = 1 << log2_cb_size;
1395     ptrdiff_t stride0 = s->frame->linesize[0];
1396     ptrdiff_t stride1 = s->frame->linesize[1];
1397     ptrdiff_t stride2 = s->frame->linesize[2];
1398     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1399     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1400     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1401
1402     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1403                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1404                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1405                           s->ps.sps->pcm.bit_depth_chroma;
1406     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1407     int ret;
1408
1409     if (!s->sh.disable_deblocking_filter_flag)
1410         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1411
1412     ret = init_get_bits(&gb, pcm, length);
1413     if (ret < 0)
1414         return ret;
1415
1416     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1417     if (s->ps.sps->chroma_format_idc) {
1418         s->hevcdsp.put_pcm(dst1, stride1,
1419                            cb_size >> s->ps.sps->hshift[1],
1420                            cb_size >> s->ps.sps->vshift[1],
1421                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1422         s->hevcdsp.put_pcm(dst2, stride2,
1423                            cb_size >> s->ps.sps->hshift[2],
1424                            cb_size >> s->ps.sps->vshift[2],
1425                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1426     }
1427
1428     return 0;
1429 }
1430
1431 /**
1432  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1433  *
1434  * @param s HEVC decoding context
1435  * @param dst target buffer for block data at block position
1436  * @param dststride stride of the dst buffer
1437  * @param ref reference picture buffer at origin (0, 0)
1438  * @param mv motion vector (relative to block position) to get pixel data from
1439  * @param x_off horizontal position of block from origin (0, 0)
1440  * @param y_off vertical position of block from origin (0, 0)
1441  * @param block_w width of block
1442  * @param block_h height of block
1443  * @param luma_weight weighting factor applied to the luma prediction
1444  * @param luma_offset additive offset applied to the luma prediction value
1445  */
1446
1447 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1448                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1449                         int block_w, int block_h, int luma_weight, int luma_offset)
1450 {
1451     HEVCLocalContext *lc = s->HEVClc;
1452     uint8_t *src         = ref->data[0];
1453     ptrdiff_t srcstride  = ref->linesize[0];
1454     int pic_width        = s->ps.sps->width;
1455     int pic_height       = s->ps.sps->height;
1456     int mx               = mv->x & 3;
1457     int my               = mv->y & 3;
1458     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1459                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1460     int idx              = ff_hevc_pel_weight[block_w];
1461
1462     x_off += mv->x >> 2;
1463     y_off += mv->y >> 2;
1464     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1465
1466     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1467         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1468         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1469         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1470         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1471         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1472
1473         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1474                                  edge_emu_stride, srcstride,
1475                                  block_w + QPEL_EXTRA,
1476                                  block_h + QPEL_EXTRA,
1477                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1478                                  pic_width, pic_height);
1479         src = lc->edge_emu_buffer + buf_offset;
1480         srcstride = edge_emu_stride;
1481     }
1482
1483     if (!weight_flag)
1484         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1485                                                       block_h, mx, my, block_w);
1486     else
1487         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1488                                                         block_h, s->sh.luma_log2_weight_denom,
1489                                                         luma_weight, luma_offset, mx, my, block_w);
1490 }
1491
1492 /**
1493  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1494  *
1495  * @param s HEVC decoding context
1496  * @param dst target buffer for block data at block position
1497  * @param dststride stride of the dst buffer
1498  * @param ref0 reference picture0 buffer at origin (0, 0)
1499  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1500  * @param x_off horizontal position of block from origin (0, 0)
1501  * @param y_off vertical position of block from origin (0, 0)
1502  * @param block_w width of block
1503  * @param block_h height of block
1504  * @param ref1 reference picture1 buffer at origin (0, 0)
1505  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1506  * @param current_mv current motion vector structure
1507  */
1508  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1509                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1510                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1511 {
1512     HEVCLocalContext *lc = s->HEVClc;
1513     ptrdiff_t src0stride  = ref0->linesize[0];
1514     ptrdiff_t src1stride  = ref1->linesize[0];
1515     int pic_width        = s->ps.sps->width;
1516     int pic_height       = s->ps.sps->height;
1517     int mx0              = mv0->x & 3;
1518     int my0              = mv0->y & 3;
1519     int mx1              = mv1->x & 3;
1520     int my1              = mv1->y & 3;
1521     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1522                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1523     int x_off0           = x_off + (mv0->x >> 2);
1524     int y_off0           = y_off + (mv0->y >> 2);
1525     int x_off1           = x_off + (mv1->x >> 2);
1526     int y_off1           = y_off + (mv1->y >> 2);
1527     int idx              = ff_hevc_pel_weight[block_w];
1528
1529     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1530     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1531
1532     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1533         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1534         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1535         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1536         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1537         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1538
1539         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1540                                  edge_emu_stride, src0stride,
1541                                  block_w + QPEL_EXTRA,
1542                                  block_h + QPEL_EXTRA,
1543                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1544                                  pic_width, pic_height);
1545         src0 = lc->edge_emu_buffer + buf_offset;
1546         src0stride = edge_emu_stride;
1547     }
1548
1549     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1550         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1551         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1552         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1553         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1554         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1555
1556         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1557                                  edge_emu_stride, src1stride,
1558                                  block_w + QPEL_EXTRA,
1559                                  block_h + QPEL_EXTRA,
1560                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1561                                  pic_width, pic_height);
1562         src1 = lc->edge_emu_buffer2 + buf_offset;
1563         src1stride = edge_emu_stride;
1564     }
1565
1566     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1567                                                 block_h, mx0, my0, block_w);
1568     if (!weight_flag)
1569         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1570                                                        block_h, mx1, my1, block_w);
1571     else
1572         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1573                                                          block_h, s->sh.luma_log2_weight_denom,
1574                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1575                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1576                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1577                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1578                                                          mx1, my1, block_w);
1579
1580 }
1581
1582 /**
1583  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1584  *
1585  * @param s HEVC decoding context
1586  * @param dst1 target buffer for block data at block position (U plane)
1587  * @param dst2 target buffer for block data at block position (V plane)
1588  * @param dststride stride of the dst1 and dst2 buffers
1589  * @param ref reference picture buffer at origin (0, 0)
1590  * @param mv motion vector (relative to block position) to get pixel data from
1591  * @param x_off horizontal position of block from origin (0, 0)
1592  * @param y_off vertical position of block from origin (0, 0)
1593  * @param block_w width of block
1594  * @param block_h height of block
1595  * @param chroma_weight weighting factor applied to the chroma prediction
1596  * @param chroma_offset additive offset applied to the chroma prediction value
1597  */
1598
1599 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1600                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1601                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1602 {
1603     HEVCLocalContext *lc = s->HEVClc;
1604     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1605     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1606     const Mv *mv         = &current_mv->mv[reflist];
1607     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1608                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1609     int idx              = ff_hevc_pel_weight[block_w];
1610     int hshift           = s->ps.sps->hshift[1];
1611     int vshift           = s->ps.sps->vshift[1];
1612     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1613     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1614     intptr_t _mx         = mx << (1 - hshift);
1615     intptr_t _my         = my << (1 - vshift);
1616
1617     x_off += mv->x >> (2 + hshift);
1618     y_off += mv->y >> (2 + vshift);
1619     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1620
1621     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1622         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1623         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1624         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1625         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1626         int buf_offset0 = EPEL_EXTRA_BEFORE *
1627                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1628         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1629                                  edge_emu_stride, srcstride,
1630                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1631                                  x_off - EPEL_EXTRA_BEFORE,
1632                                  y_off - EPEL_EXTRA_BEFORE,
1633                                  pic_width, pic_height);
1634
1635         src0 = lc->edge_emu_buffer + buf_offset0;
1636         srcstride = edge_emu_stride;
1637     }
1638     if (!weight_flag)
1639         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1640                                                   block_h, _mx, _my, block_w);
1641     else
1642         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1643                                                         block_h, s->sh.chroma_log2_weight_denom,
1644                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1645 }
1646
1647 /**
1648  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1649  *
1650  * @param s HEVC decoding context
1651  * @param dst target buffer for block data at block position
1652  * @param dststride stride of the dst buffer
1653  * @param ref0 reference picture0 buffer at origin (0, 0)
1654  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1655  * @param x_off horizontal position of block from origin (0, 0)
1656  * @param y_off vertical position of block from origin (0, 0)
1657  * @param block_w width of block
1658  * @param block_h height of block
1659  * @param ref1 reference picture1 buffer at origin (0, 0)
1660  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1661  * @param current_mv current motion vector structure
1662  * @param cidx chroma component(cb, cr)
1663  */
1664 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1665                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1666 {
1667     HEVCLocalContext *lc = s->HEVClc;
1668     uint8_t *src1        = ref0->data[cidx+1];
1669     uint8_t *src2        = ref1->data[cidx+1];
1670     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1671     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1672     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1673                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1674     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1675     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1676     Mv *mv0              = &current_mv->mv[0];
1677     Mv *mv1              = &current_mv->mv[1];
1678     int hshift = s->ps.sps->hshift[1];
1679     int vshift = s->ps.sps->vshift[1];
1680
1681     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1682     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1683     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1684     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1685     intptr_t _mx0 = mx0 << (1 - hshift);
1686     intptr_t _my0 = my0 << (1 - vshift);
1687     intptr_t _mx1 = mx1 << (1 - hshift);
1688     intptr_t _my1 = my1 << (1 - vshift);
1689
1690     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1691     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1692     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1693     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1694     int idx = ff_hevc_pel_weight[block_w];
1695     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1696     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1697
1698     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1699         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1700         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1701         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1702         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1703         int buf_offset1 = EPEL_EXTRA_BEFORE *
1704                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1705
1706         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1707                                  edge_emu_stride, src1stride,
1708                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1709                                  x_off0 - EPEL_EXTRA_BEFORE,
1710                                  y_off0 - EPEL_EXTRA_BEFORE,
1711                                  pic_width, pic_height);
1712
1713         src1 = lc->edge_emu_buffer + buf_offset1;
1714         src1stride = edge_emu_stride;
1715     }
1716
1717     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1718         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1719         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1720         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1721         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1722         int buf_offset1 = EPEL_EXTRA_BEFORE *
1723                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1724
1725         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1726                                  edge_emu_stride, src2stride,
1727                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1728                                  x_off1 - EPEL_EXTRA_BEFORE,
1729                                  y_off1 - EPEL_EXTRA_BEFORE,
1730                                  pic_width, pic_height);
1731
1732         src2 = lc->edge_emu_buffer2 + buf_offset1;
1733         src2stride = edge_emu_stride;
1734     }
1735
1736     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1737                                                 block_h, _mx0, _my0, block_w);
1738     if (!weight_flag)
1739         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1740                                                        src2, src2stride, lc->tmp,
1741                                                        block_h, _mx1, _my1, block_w);
1742     else
1743         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1744                                                          src2, src2stride, lc->tmp,
1745                                                          block_h,
1746                                                          s->sh.chroma_log2_weight_denom,
1747                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1748                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1749                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1750                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1751                                                          _mx1, _my1, block_w);
1752 }
1753
1754 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1755                                 const Mv *mv, int y0, int height)
1756 {
1757     if (s->threads_type == FF_THREAD_FRAME ) {
1758         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1759
1760         ff_thread_await_progress(&ref->tf, y, 0);
1761     }
1762 }
1763
1764 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1765                                   int nPbH, int log2_cb_size, int part_idx,
1766                                   int merge_idx, MvField *mv)
1767 {
1768     HEVCLocalContext *lc = s->HEVClc;
1769     enum InterPredIdc inter_pred_idc = PRED_L0;
1770     int mvp_flag;
1771
1772     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1773     mv->pred_flag = 0;
1774     if (s->sh.slice_type == HEVC_SLICE_B)
1775         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1776
1777     if (inter_pred_idc != PRED_L1) {
1778         if (s->sh.nb_refs[L0])
1779             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1780
1781         mv->pred_flag = PF_L0;
1782         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1783         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1784         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1785                                  part_idx, merge_idx, mv, mvp_flag, 0);
1786         mv->mv[0].x += lc->pu.mvd.x;
1787         mv->mv[0].y += lc->pu.mvd.y;
1788     }
1789
1790     if (inter_pred_idc != PRED_L0) {
1791         if (s->sh.nb_refs[L1])
1792             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1793
1794         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1795             AV_ZERO32(&lc->pu.mvd);
1796         } else {
1797             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1798         }
1799
1800         mv->pred_flag += PF_L1;
1801         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1802         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1803                                  part_idx, merge_idx, mv, mvp_flag, 1);
1804         mv->mv[1].x += lc->pu.mvd.x;
1805         mv->mv[1].y += lc->pu.mvd.y;
1806     }
1807 }
1808
1809 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1810                                 int nPbW, int nPbH,
1811                                 int log2_cb_size, int partIdx, int idx)
1812 {
1813 #define POS(c_idx, x, y)                                                              \
1814     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1815                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1816     HEVCLocalContext *lc = s->HEVClc;
1817     int merge_idx = 0;
1818     struct MvField current_mv = {{{ 0 }}};
1819
1820     int min_pu_width = s->ps.sps->min_pu_width;
1821
1822     MvField *tab_mvf = s->ref->tab_mvf;
1823     RefPicList  *refPicList = s->ref->refPicList;
1824     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1825     uint8_t *dst0 = POS(0, x0, y0);
1826     uint8_t *dst1 = POS(1, x0, y0);
1827     uint8_t *dst2 = POS(2, x0, y0);
1828     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1829     int min_cb_width     = s->ps.sps->min_cb_width;
1830     int x_cb             = x0 >> log2_min_cb_size;
1831     int y_cb             = y0 >> log2_min_cb_size;
1832     int x_pu, y_pu;
1833     int i, j;
1834
1835     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1836
1837     if (!skip_flag)
1838         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1839
1840     if (skip_flag || lc->pu.merge_flag) {
1841         if (s->sh.max_num_merge_cand > 1)
1842             merge_idx = ff_hevc_merge_idx_decode(s);
1843         else
1844             merge_idx = 0;
1845
1846         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1847                                    partIdx, merge_idx, &current_mv);
1848     } else {
1849         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1850                               partIdx, merge_idx, &current_mv);
1851     }
1852
1853     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1854     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1855
1856     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1857         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1858             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1859
1860     if (current_mv.pred_flag & PF_L0) {
1861         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1862         if (!ref0)
1863             return;
1864         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1865     }
1866     if (current_mv.pred_flag & PF_L1) {
1867         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1868         if (!ref1)
1869             return;
1870         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1871     }
1872
1873     if (current_mv.pred_flag == PF_L0) {
1874         int x0_c = x0 >> s->ps.sps->hshift[1];
1875         int y0_c = y0 >> s->ps.sps->vshift[1];
1876         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1877         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1878
1879         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1880                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1881                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1882                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1883
1884         if (s->ps.sps->chroma_format_idc) {
1885             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1886                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1887                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1888             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1889                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1890                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1891         }
1892     } else if (current_mv.pred_flag == PF_L1) {
1893         int x0_c = x0 >> s->ps.sps->hshift[1];
1894         int y0_c = y0 >> s->ps.sps->vshift[1];
1895         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1896         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1897
1898         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1899                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1900                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1901                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1902
1903         if (s->ps.sps->chroma_format_idc) {
1904             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1905                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1906                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1907
1908             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1909                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1910                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1911         }
1912     } else if (current_mv.pred_flag == PF_BI) {
1913         int x0_c = x0 >> s->ps.sps->hshift[1];
1914         int y0_c = y0 >> s->ps.sps->vshift[1];
1915         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1916         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1917
1918         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1919                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1920                    ref1->frame, &current_mv.mv[1], &current_mv);
1921
1922         if (s->ps.sps->chroma_format_idc) {
1923             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1924                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1925
1926             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1927                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1928         }
1929     }
1930 }
1931
1932 /**
1933  * 8.4.1
1934  */
1935 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1936                                 int prev_intra_luma_pred_flag)
1937 {
1938     HEVCLocalContext *lc = s->HEVClc;
1939     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1940     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1941     int min_pu_width     = s->ps.sps->min_pu_width;
1942     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1943     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1944     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1945
1946     int cand_up   = (lc->ctb_up_flag || y0b) ?
1947                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1948     int cand_left = (lc->ctb_left_flag || x0b) ?
1949                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1950
1951     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1952
1953     MvField *tab_mvf = s->ref->tab_mvf;
1954     int intra_pred_mode;
1955     int candidate[3];
1956     int i, j;
1957
1958     // intra_pred_mode prediction does not cross vertical CTB boundaries
1959     if ((y0 - 1) < y_ctb)
1960         cand_up = INTRA_DC;
1961
1962     if (cand_left == cand_up) {
1963         if (cand_left < 2) {
1964             candidate[0] = INTRA_PLANAR;
1965             candidate[1] = INTRA_DC;
1966             candidate[2] = INTRA_ANGULAR_26;
1967         } else {
1968             candidate[0] = cand_left;
1969             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1970             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1971         }
1972     } else {
1973         candidate[0] = cand_left;
1974         candidate[1] = cand_up;
1975         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1976             candidate[2] = INTRA_PLANAR;
1977         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1978             candidate[2] = INTRA_DC;
1979         } else {
1980             candidate[2] = INTRA_ANGULAR_26;
1981         }
1982     }
1983
1984     if (prev_intra_luma_pred_flag) {
1985         intra_pred_mode = candidate[lc->pu.mpm_idx];
1986     } else {
1987         if (candidate[0] > candidate[1])
1988             FFSWAP(uint8_t, candidate[0], candidate[1]);
1989         if (candidate[0] > candidate[2])
1990             FFSWAP(uint8_t, candidate[0], candidate[2]);
1991         if (candidate[1] > candidate[2])
1992             FFSWAP(uint8_t, candidate[1], candidate[2]);
1993
1994         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1995         for (i = 0; i < 3; i++)
1996             if (intra_pred_mode >= candidate[i])
1997                 intra_pred_mode++;
1998     }
1999
2000     /* write the intra prediction units into the mv array */
2001     if (!size_in_pus)
2002         size_in_pus = 1;
2003     for (i = 0; i < size_in_pus; i++) {
2004         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
2005                intra_pred_mode, size_in_pus);
2006
2007         for (j = 0; j < size_in_pus; j++) {
2008             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
2009         }
2010     }
2011
2012     return intra_pred_mode;
2013 }
2014
2015 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2016                                           int log2_cb_size, int ct_depth)
2017 {
2018     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
2019     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
2020     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
2021     int y;
2022
2023     for (y = 0; y < length; y++)
2024         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2025                ct_depth, length);
2026 }
2027
2028 static const uint8_t tab_mode_idx[] = {
2029      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2030     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2031
2032 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2033                                   int log2_cb_size)
2034 {
2035     HEVCLocalContext *lc = s->HEVClc;
2036     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2037     uint8_t prev_intra_luma_pred_flag[4];
2038     int split   = lc->cu.part_mode == PART_NxN;
2039     int pb_size = (1 << log2_cb_size) >> split;
2040     int side    = split + 1;
2041     int chroma_mode;
2042     int i, j;
2043
2044     for (i = 0; i < side; i++)
2045         for (j = 0; j < side; j++)
2046             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2047
2048     for (i = 0; i < side; i++) {
2049         for (j = 0; j < side; j++) {
2050             if (prev_intra_luma_pred_flag[2 * i + j])
2051                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2052             else
2053                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2054
2055             lc->pu.intra_pred_mode[2 * i + j] =
2056                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2057                                      prev_intra_luma_pred_flag[2 * i + j]);
2058         }
2059     }
2060
2061     if (s->ps.sps->chroma_format_idc == 3) {
2062         for (i = 0; i < side; i++) {
2063             for (j = 0; j < side; j++) {
2064                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2065                 if (chroma_mode != 4) {
2066                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2067                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2068                     else
2069                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2070                 } else {
2071                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2072                 }
2073             }
2074         }
2075     } else if (s->ps.sps->chroma_format_idc == 2) {
2076         int mode_idx;
2077         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2078         if (chroma_mode != 4) {
2079             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2080                 mode_idx = 34;
2081             else
2082                 mode_idx = intra_chroma_table[chroma_mode];
2083         } else {
2084             mode_idx = lc->pu.intra_pred_mode[0];
2085         }
2086         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2087     } else if (s->ps.sps->chroma_format_idc != 0) {
2088         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2089         if (chroma_mode != 4) {
2090             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2091                 lc->pu.intra_pred_mode_c[0] = 34;
2092             else
2093                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2094         } else {
2095             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2096         }
2097     }
2098 }
2099
2100 static void intra_prediction_unit_default_value(HEVCContext *s,
2101                                                 int x0, int y0,
2102                                                 int log2_cb_size)
2103 {
2104     HEVCLocalContext *lc = s->HEVClc;
2105     int pb_size          = 1 << log2_cb_size;
2106     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2107     int min_pu_width     = s->ps.sps->min_pu_width;
2108     MvField *tab_mvf     = s->ref->tab_mvf;
2109     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2110     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2111     int j, k;
2112
2113     if (size_in_pus == 0)
2114         size_in_pus = 1;
2115     for (j = 0; j < size_in_pus; j++)
2116         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2117     if (lc->cu.pred_mode == MODE_INTRA)
2118         for (j = 0; j < size_in_pus; j++)
2119             for (k = 0; k < size_in_pus; k++)
2120                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2121 }
2122
2123 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2124 {
2125     int cb_size          = 1 << log2_cb_size;
2126     HEVCLocalContext *lc = s->HEVClc;
2127     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2128     int length           = cb_size >> log2_min_cb_size;
2129     int min_cb_width     = s->ps.sps->min_cb_width;
2130     int x_cb             = x0 >> log2_min_cb_size;
2131     int y_cb             = y0 >> log2_min_cb_size;
2132     int idx              = log2_cb_size - 2;
2133     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2134     int x, y, ret;
2135
2136     lc->cu.x                = x0;
2137     lc->cu.y                = y0;
2138     lc->cu.pred_mode        = MODE_INTRA;
2139     lc->cu.part_mode        = PART_2Nx2N;
2140     lc->cu.intra_split_flag = 0;
2141
2142     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2143     for (x = 0; x < 4; x++)
2144         lc->pu.intra_pred_mode[x] = 1;
2145     if (s->ps.pps->transquant_bypass_enable_flag) {
2146         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2147         if (lc->cu.cu_transquant_bypass_flag)
2148             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2149     } else
2150         lc->cu.cu_transquant_bypass_flag = 0;
2151
2152     if (s->sh.slice_type != HEVC_SLICE_I) {
2153         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2154
2155         x = y_cb * min_cb_width + x_cb;
2156         for (y = 0; y < length; y++) {
2157             memset(&s->skip_flag[x], skip_flag, length);
2158             x += min_cb_width;
2159         }
2160         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2161     } else {
2162         x = y_cb * min_cb_width + x_cb;
2163         for (y = 0; y < length; y++) {
2164             memset(&s->skip_flag[x], 0, length);
2165             x += min_cb_width;
2166         }
2167     }
2168
2169     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2170         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2171         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2172
2173         if (!s->sh.disable_deblocking_filter_flag)
2174             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2175     } else {
2176         int pcm_flag = 0;
2177
2178         if (s->sh.slice_type != HEVC_SLICE_I)
2179             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2180         if (lc->cu.pred_mode != MODE_INTRA ||
2181             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2182             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2183             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2184                                       lc->cu.pred_mode == MODE_INTRA;
2185         }
2186
2187         if (lc->cu.pred_mode == MODE_INTRA) {
2188             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2189                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2190                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2191                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2192             }
2193             if (pcm_flag) {
2194                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2195                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2196                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2197                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2198
2199                 if (ret < 0)
2200                     return ret;
2201             } else {
2202                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2203             }
2204         } else {
2205             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2206             switch (lc->cu.part_mode) {
2207             case PART_2Nx2N:
2208                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2209                 break;
2210             case PART_2NxN:
2211                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2212                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2213                 break;
2214             case PART_Nx2N:
2215                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2216                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2217                 break;
2218             case PART_2NxnU:
2219                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2220                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2221                 break;
2222             case PART_2NxnD:
2223                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2224                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2225                 break;
2226             case PART_nLx2N:
2227                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2228                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2229                 break;
2230             case PART_nRx2N:
2231                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2232                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2233                 break;
2234             case PART_NxN:
2235                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2236                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2237                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2238                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2239                 break;
2240             }
2241         }
2242
2243         if (!pcm_flag) {
2244             int rqt_root_cbf = 1;
2245
2246             if (lc->cu.pred_mode != MODE_INTRA &&
2247                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2248                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2249             }
2250             if (rqt_root_cbf) {
2251                 const static int cbf[2] = { 0 };
2252                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2253                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2254                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2255                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2256                                          log2_cb_size,
2257                                          log2_cb_size, 0, 0, cbf, cbf);
2258                 if (ret < 0)
2259                     return ret;
2260             } else {
2261                 if (!s->sh.disable_deblocking_filter_flag)
2262                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2263             }
2264         }
2265     }
2266
2267     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2268         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2269
2270     x = y_cb * min_cb_width + x_cb;
2271     for (y = 0; y < length; y++) {
2272         memset(&s->qp_y_tab[x], lc->qp_y, length);
2273         x += min_cb_width;
2274     }
2275
2276     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2277        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2278         lc->qPy_pred = lc->qp_y;
2279     }
2280
2281     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2282
2283     return 0;
2284 }
2285
2286 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2287                                int log2_cb_size, int cb_depth)
2288 {
2289     HEVCLocalContext *lc = s->HEVClc;
2290     const int cb_size    = 1 << log2_cb_size;
2291     int ret;
2292     int split_cu;
2293
2294     lc->ct_depth = cb_depth;
2295     if (x0 + cb_size <= s->ps.sps->width  &&
2296         y0 + cb_size <= s->ps.sps->height &&
2297         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2298         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2299     } else {
2300         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2301     }
2302     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2303         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2304         lc->tu.is_cu_qp_delta_coded = 0;
2305         lc->tu.cu_qp_delta          = 0;
2306     }
2307
2308     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2309         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2310         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2311     }
2312
2313     if (split_cu) {
2314         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2315         const int cb_size_split = cb_size >> 1;
2316         const int x1 = x0 + cb_size_split;
2317         const int y1 = y0 + cb_size_split;
2318
2319         int more_data = 0;
2320
2321         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2322         if (more_data < 0)
2323             return more_data;
2324
2325         if (more_data && x1 < s->ps.sps->width) {
2326             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2327             if (more_data < 0)
2328                 return more_data;
2329         }
2330         if (more_data && y1 < s->ps.sps->height) {
2331             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2332             if (more_data < 0)
2333                 return more_data;
2334         }
2335         if (more_data && x1 < s->ps.sps->width &&
2336             y1 < s->ps.sps->height) {
2337             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2338             if (more_data < 0)
2339                 return more_data;
2340         }
2341
2342         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2343             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2344             lc->qPy_pred = lc->qp_y;
2345
2346         if (more_data)
2347             return ((x1 + cb_size_split) < s->ps.sps->width ||
2348                     (y1 + cb_size_split) < s->ps.sps->height);
2349         else
2350             return 0;
2351     } else {
2352         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2353         if (ret < 0)
2354             return ret;
2355         if ((!((x0 + cb_size) %
2356                (1 << (s->ps.sps->log2_ctb_size))) ||
2357              (x0 + cb_size >= s->ps.sps->width)) &&
2358             (!((y0 + cb_size) %
2359                (1 << (s->ps.sps->log2_ctb_size))) ||
2360              (y0 + cb_size >= s->ps.sps->height))) {
2361             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2362             return !end_of_slice_flag;
2363         } else {
2364             return 1;
2365         }
2366     }
2367
2368     return 0;
2369 }
2370
2371 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2372                                  int ctb_addr_ts)
2373 {
2374     HEVCLocalContext *lc  = s->HEVClc;
2375     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2376     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2377     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2378
2379     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2380
2381     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2382         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2383             lc->first_qp_group = 1;
2384         lc->end_of_tiles_x = s->ps.sps->width;
2385     } else if (s->ps.pps->tiles_enabled_flag) {
2386         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2387             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2388             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2389             lc->first_qp_group   = 1;
2390         }
2391     } else {
2392         lc->end_of_tiles_x = s->ps.sps->width;
2393     }
2394
2395     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2396
2397     lc->boundary_flags = 0;
2398     if (s->ps.pps->tiles_enabled_flag) {
2399         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2400             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2401         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2402             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2403         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2404             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2405         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2406             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2407     } else {
2408         if (ctb_addr_in_slice <= 0)
2409             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2410         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2411             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2412     }
2413
2414     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2415     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2416     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2417     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2418 }
2419
2420 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2421 {
2422     HEVCContext *s  = avctxt->priv_data;
2423     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2424     int more_data   = 1;
2425     int x_ctb       = 0;
2426     int y_ctb       = 0;
2427     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2428     int ret;
2429
2430     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2431         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2432         return AVERROR_INVALIDDATA;
2433     }
2434
2435     if (s->sh.dependent_slice_segment_flag) {
2436         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2437         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2438             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2439             return AVERROR_INVALIDDATA;
2440         }
2441     }
2442
2443     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2444         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2445
2446         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2447         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2448         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2449
2450         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2451         if (ret < 0) {
2452             s->tab_slice_address[ctb_addr_rs] = -1;
2453             return ret;
2454         }
2455
2456         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2457
2458         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2459         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2460         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2461
2462         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2463         if (more_data < 0) {
2464             s->tab_slice_address[ctb_addr_rs] = -1;
2465             return more_data;
2466         }
2467
2468
2469         ctb_addr_ts++;
2470         ff_hevc_save_states(s, ctb_addr_ts);
2471         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2472     }
2473
2474     if (x_ctb + ctb_size >= s->ps.sps->width &&
2475         y_ctb + ctb_size >= s->ps.sps->height)
2476         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2477
2478     return ctb_addr_ts;
2479 }
2480
2481 static int hls_slice_data(HEVCContext *s)
2482 {
2483     int arg[2];
2484     int ret[2];
2485
2486     arg[0] = 0;
2487     arg[1] = 1;
2488
2489     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2490     return ret[0];
2491 }
2492 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2493 {
2494     HEVCContext *s1  = avctxt->priv_data, *s;
2495     HEVCLocalContext *lc;
2496     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2497     int more_data   = 1;
2498     int *ctb_row_p    = input_ctb_row;
2499     int ctb_row = ctb_row_p[job];
2500     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2501     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2502     int thread = ctb_row % s1->threads_number;
2503     int ret;
2504
2505     s = s1->sList[self_id];
2506     lc = s->HEVClc;
2507
2508     if(ctb_row) {
2509         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2510         if (ret < 0)
2511             goto error;
2512         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2513     }
2514
2515     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2516         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2517         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2518
2519         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2520
2521         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2522
2523         if (atomic_load(&s1->wpp_err)) {
2524             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2525             return 0;
2526         }
2527
2528         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2529         if (ret < 0)
2530             goto error;
2531         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2532         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2533
2534         if (more_data < 0) {
2535             ret = more_data;
2536             goto error;
2537         }
2538
2539         ctb_addr_ts++;
2540
2541         ff_hevc_save_states(s, ctb_addr_ts);
2542         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2543         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2544
2545         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2546             atomic_store(&s1->wpp_err, 1);
2547             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2548             return 0;
2549         }
2550
2551         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2552             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2553             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2554             return ctb_addr_ts;
2555         }
2556         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2557         x_ctb+=ctb_size;
2558
2559         if(x_ctb >= s->ps.sps->width) {
2560             break;
2561         }
2562     }
2563     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2564
2565     return 0;
2566 error:
2567     s->tab_slice_address[ctb_addr_rs] = -1;
2568     atomic_store(&s1->wpp_err, 1);
2569     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2570     return ret;
2571 }
2572
2573 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2574 {
2575     const uint8_t *data = nal->data;
2576     int length          = nal->size;
2577     HEVCLocalContext *lc = s->HEVClc;
2578     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2579     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2580     int64_t offset;
2581     int64_t startheader, cmpt = 0;
2582     int i, j, res = 0;
2583
2584     if (!ret || !arg) {
2585         av_free(ret);
2586         av_free(arg);
2587         return AVERROR(ENOMEM);
2588     }
2589
2590     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2591         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2592             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2593             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2594         );
2595         res = AVERROR_INVALIDDATA;
2596         goto error;
2597     }
2598
2599     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2600
2601     if (!s->sList[1]) {
2602         for (i = 1; i < s->threads_number; i++) {
2603             s->sList[i] = av_malloc(sizeof(HEVCContext));
2604             memcpy(s->sList[i], s, sizeof(HEVCContext));
2605             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2606             s->sList[i]->HEVClc = s->HEVClcList[i];
2607         }
2608     }
2609
2610     offset = (lc->gb.index >> 3);
2611
2612     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2613         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2614             startheader--;
2615             cmpt++;
2616         }
2617     }
2618
2619     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2620         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2621         for (j = 0, cmpt = 0, startheader = offset
2622              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2623             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2624                 startheader--;
2625                 cmpt++;
2626             }
2627         }
2628         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2629         s->sh.offset[i - 1] = offset;
2630
2631     }
2632     if (s->sh.num_entry_point_offsets != 0) {
2633         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2634         if (length < offset) {
2635             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2636             res = AVERROR_INVALIDDATA;
2637             goto error;
2638         }
2639         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2640         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2641
2642     }
2643     s->data = data;
2644
2645     for (i = 1; i < s->threads_number; i++) {
2646         s->sList[i]->HEVClc->first_qp_group = 1;
2647         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2648         memcpy(s->sList[i], s, sizeof(HEVCContext));
2649         s->sList[i]->HEVClc = s->HEVClcList[i];
2650     }
2651
2652     atomic_store(&s->wpp_err, 0);
2653     ff_reset_entries(s->avctx);
2654
2655     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2656         arg[i] = i;
2657         ret[i] = 0;
2658     }
2659
2660     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2661         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2662
2663     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2664         res += ret[i];
2665 error:
2666     av_free(ret);
2667     av_free(arg);
2668     return res;
2669 }
2670
2671 static int set_side_data(HEVCContext *s)
2672 {
2673     AVFrame *out = s->ref->frame;
2674
2675     if (s->sei.frame_packing.present &&
2676         s->sei.frame_packing.arrangement_type >= 3 &&
2677         s->sei.frame_packing.arrangement_type <= 5 &&
2678         s->sei.frame_packing.content_interpretation_type > 0 &&
2679         s->sei.frame_packing.content_interpretation_type < 3) {
2680         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2681         if (!stereo)
2682             return AVERROR(ENOMEM);
2683
2684         switch (s->sei.frame_packing.arrangement_type) {
2685         case 3:
2686             if (s->sei.frame_packing.quincunx_subsampling)
2687                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2688             else
2689                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2690             break;
2691         case 4:
2692             stereo->type = AV_STEREO3D_TOPBOTTOM;
2693             break;
2694         case 5:
2695             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2696             break;
2697         }
2698
2699         if (s->sei.frame_packing.content_interpretation_type == 2)
2700             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2701
2702         if (s->sei.frame_packing.arrangement_type == 5) {
2703             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2704                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2705             else
2706                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2707         }
2708     }
2709
2710     if (s->sei.display_orientation.present &&
2711         (s->sei.display_orientation.anticlockwise_rotation ||
2712          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2713         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2714         AVFrameSideData *rotation = av_frame_new_side_data(out,
2715                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2716                                                            sizeof(int32_t) * 9);
2717         if (!rotation)
2718             return AVERROR(ENOMEM);
2719
2720         av_display_rotation_set((int32_t *)rotation->data, angle);
2721         av_display_matrix_flip((int32_t *)rotation->data,
2722                                s->sei.display_orientation.hflip,
2723                                s->sei.display_orientation.vflip);
2724     }
2725
2726     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2727     // so the side data persists for the entire coded video sequence.
2728     if (s->sei.mastering_display.present > 0 &&
2729         IS_IRAP(s) && s->no_rasl_output_flag) {
2730         s->sei.mastering_display.present--;
2731     }
2732     if (s->sei.mastering_display.present) {
2733         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2734         const int mapping[3] = {2, 0, 1};
2735         const int chroma_den = 50000;
2736         const int luma_den = 10000;
2737         int i;
2738         AVMasteringDisplayMetadata *metadata =
2739             av_mastering_display_metadata_create_side_data(out);
2740         if (!metadata)
2741             return AVERROR(ENOMEM);
2742
2743         for (i = 0; i < 3; i++) {
2744             const int j = mapping[i];
2745             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2746             metadata->display_primaries[i][0].den = chroma_den;
2747             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2748             metadata->display_primaries[i][1].den = chroma_den;
2749         }
2750         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2751         metadata->white_point[0].den = chroma_den;
2752         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2753         metadata->white_point[1].den = chroma_den;
2754
2755         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2756         metadata->max_luminance.den = luma_den;
2757         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2758         metadata->min_luminance.den = luma_den;
2759         metadata->has_luminance = 1;
2760         metadata->has_primaries = 1;
2761
2762         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2763         av_log(s->avctx, AV_LOG_DEBUG,
2764                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2765                av_q2d(metadata->display_primaries[0][0]),
2766                av_q2d(metadata->display_primaries[0][1]),
2767                av_q2d(metadata->display_primaries[1][0]),
2768                av_q2d(metadata->display_primaries[1][1]),
2769                av_q2d(metadata->display_primaries[2][0]),
2770                av_q2d(metadata->display_primaries[2][1]),
2771                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2772         av_log(s->avctx, AV_LOG_DEBUG,
2773                "min_luminance=%f, max_luminance=%f\n",
2774                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2775     }
2776     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2777     // so the side data persists for the entire coded video sequence.
2778     if (s->sei.content_light.present > 0 &&
2779         IS_IRAP(s) && s->no_rasl_output_flag) {
2780         s->sei.content_light.present--;
2781     }
2782     if (s->sei.content_light.present) {
2783         AVContentLightMetadata *metadata =
2784             av_content_light_metadata_create_side_data(out);
2785         if (!metadata)
2786             return AVERROR(ENOMEM);
2787         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2788         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2789
2790         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2791         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2792                metadata->MaxCLL, metadata->MaxFALL);
2793     }
2794
2795     if (s->sei.a53_caption.buf_ref) {
2796         HEVCSEIA53Caption *a53 = &s->sei.a53_caption;
2797
2798         AVFrameSideData *sd = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_A53_CC, a53->buf_ref);
2799         if (!sd)
2800             av_buffer_unref(&a53->buf_ref);
2801         a53->buf_ref = NULL;
2802
2803         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2804     }
2805
2806     for (int i = 0; i < s->sei.unregistered.nb_buf_ref; i++) {
2807         HEVCSEIUnregistered *unreg = &s->sei.unregistered;
2808
2809         if (unreg->buf_ref[i]) {
2810             AVFrameSideData *sd = av_frame_new_side_data_from_buf(out,
2811                     AV_FRAME_DATA_SEI_UNREGISTERED,
2812                     unreg->buf_ref[i]);
2813             if (!sd)
2814                 av_buffer_unref(&unreg->buf_ref[i]);
2815             unreg->buf_ref[i] = NULL;
2816         }
2817     }
2818     s->sei.unregistered.nb_buf_ref = 0;
2819
2820     return 0;
2821 }
2822
2823 static int hevc_frame_start(HEVCContext *s)
2824 {
2825     HEVCLocalContext *lc = s->HEVClc;
2826     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2827                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2828     int ret;
2829
2830     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2831     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2832     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2833     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2834     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2835
2836     s->is_decoded        = 0;
2837     s->first_nal_type    = s->nal_unit_type;
2838
2839     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2840
2841     if (s->ps.pps->tiles_enabled_flag)
2842         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2843
2844     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2845     if (ret < 0)
2846         goto fail;
2847
2848     ret = ff_hevc_frame_rps(s);
2849     if (ret < 0) {
2850         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2851         goto fail;
2852     }
2853
2854     s->ref->frame->key_frame = IS_IRAP(s);
2855
2856     ret = set_side_data(s);
2857     if (ret < 0)
2858         goto fail;
2859
2860     s->frame->pict_type = 3 - s->sh.slice_type;
2861
2862     if (!IS_IRAP(s))
2863         ff_hevc_bump_frame(s);
2864
2865     av_frame_unref(s->output_frame);
2866     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2867     if (ret < 0)
2868         goto fail;
2869
2870     if (!s->avctx->hwaccel)
2871         ff_thread_finish_setup(s->avctx);
2872
2873     return 0;
2874
2875 fail:
2876     if (s->ref)
2877         ff_hevc_unref_frame(s, s->ref, ~0);
2878     s->ref = NULL;
2879     return ret;
2880 }
2881
2882 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2883 {
2884     HEVCLocalContext *lc = s->HEVClc;
2885     GetBitContext *gb    = &lc->gb;
2886     int ctb_addr_ts, ret;
2887
2888     *gb              = nal->gb;
2889     s->nal_unit_type = nal->type;
2890     s->temporal_id   = nal->temporal_id;
2891
2892     switch (s->nal_unit_type) {
2893     case HEVC_NAL_VPS:
2894         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2895             ret = s->avctx->hwaccel->decode_params(s->avctx,
2896                                                    nal->type,
2897                                                    nal->raw_data,
2898                                                    nal->raw_size);
2899             if (ret < 0)
2900                 goto fail;
2901         }
2902         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2903         if (ret < 0)
2904             goto fail;
2905         break;
2906     case HEVC_NAL_SPS:
2907         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2908             ret = s->avctx->hwaccel->decode_params(s->avctx,
2909                                                    nal->type,
2910                                                    nal->raw_data,
2911                                                    nal->raw_size);
2912             if (ret < 0)
2913                 goto fail;
2914         }
2915         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2916                                      s->apply_defdispwin);
2917         if (ret < 0)
2918             goto fail;
2919         break;
2920     case HEVC_NAL_PPS:
2921         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2922             ret = s->avctx->hwaccel->decode_params(s->avctx,
2923                                                    nal->type,
2924                                                    nal->raw_data,
2925                                                    nal->raw_size);
2926             if (ret < 0)
2927                 goto fail;
2928         }
2929         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2930         if (ret < 0)
2931             goto fail;
2932         break;
2933     case HEVC_NAL_SEI_PREFIX:
2934     case HEVC_NAL_SEI_SUFFIX:
2935         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2936             ret = s->avctx->hwaccel->decode_params(s->avctx,
2937                                                    nal->type,
2938                                                    nal->raw_data,
2939                                                    nal->raw_size);
2940             if (ret < 0)
2941                 goto fail;
2942         }
2943         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2944         if (ret < 0)
2945             goto fail;
2946         break;
2947     case HEVC_NAL_TRAIL_R:
2948     case HEVC_NAL_TRAIL_N:
2949     case HEVC_NAL_TSA_N:
2950     case HEVC_NAL_TSA_R:
2951     case HEVC_NAL_STSA_N:
2952     case HEVC_NAL_STSA_R:
2953     case HEVC_NAL_BLA_W_LP:
2954     case HEVC_NAL_BLA_W_RADL:
2955     case HEVC_NAL_BLA_N_LP:
2956     case HEVC_NAL_IDR_W_RADL:
2957     case HEVC_NAL_IDR_N_LP:
2958     case HEVC_NAL_CRA_NUT:
2959     case HEVC_NAL_RADL_N:
2960     case HEVC_NAL_RADL_R:
2961     case HEVC_NAL_RASL_N:
2962     case HEVC_NAL_RASL_R:
2963         ret = hls_slice_header(s);
2964         if (ret < 0)
2965             return ret;
2966         if (ret == 1) {
2967             ret = AVERROR_INVALIDDATA;
2968             goto fail;
2969         }
2970
2971
2972         if (
2973             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
2974             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2975             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2976             break;
2977         }
2978
2979         if (s->sh.first_slice_in_pic_flag) {
2980             if (s->max_ra == INT_MAX) {
2981                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2982                     s->max_ra = s->poc;
2983                 } else {
2984                     if (IS_IDR(s))
2985                         s->max_ra = INT_MIN;
2986                 }
2987             }
2988
2989             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2990                 s->poc <= s->max_ra) {
2991                 s->is_decoded = 0;
2992                 break;
2993             } else {
2994                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2995                     s->max_ra = INT_MIN;
2996             }
2997
2998             s->overlap ++;
2999             ret = hevc_frame_start(s);
3000             if (ret < 0)
3001                 return ret;
3002         } else if (!s->ref) {
3003             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
3004             goto fail;
3005         }
3006
3007         if (s->nal_unit_type != s->first_nal_type) {
3008             av_log(s->avctx, AV_LOG_ERROR,
3009                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
3010                    s->first_nal_type, s->nal_unit_type);
3011             return AVERROR_INVALIDDATA;
3012         }
3013
3014         if (!s->sh.dependent_slice_segment_flag &&
3015             s->sh.slice_type != HEVC_SLICE_I) {
3016             ret = ff_hevc_slice_rpl(s);
3017             if (ret < 0) {
3018                 av_log(s->avctx, AV_LOG_WARNING,
3019                        "Error constructing the reference lists for the current slice.\n");
3020                 goto fail;
3021             }
3022         }
3023
3024         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
3025             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
3026             if (ret < 0)
3027                 goto fail;
3028         }
3029
3030         if (s->avctx->hwaccel) {
3031             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
3032             if (ret < 0)
3033                 goto fail;
3034         } else {
3035             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3036                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3037             else
3038                 ctb_addr_ts = hls_slice_data(s);
3039             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3040                 s->is_decoded = 1;
3041             }
3042
3043             if (ctb_addr_ts < 0) {
3044                 ret = ctb_addr_ts;
3045                 goto fail;
3046             }
3047         }
3048         break;
3049     case HEVC_NAL_EOS_NUT:
3050     case HEVC_NAL_EOB_NUT:
3051         s->seq_decode = (s->seq_decode + 1) & 0xff;
3052         s->max_ra     = INT_MAX;
3053         break;
3054     case HEVC_NAL_AUD:
3055     case HEVC_NAL_FD_NUT:
3056         break;
3057     default:
3058         av_log(s->avctx, AV_LOG_INFO,
3059                "Skipping NAL unit %d\n", s->nal_unit_type);
3060     }
3061
3062     return 0;
3063 fail:
3064     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3065         return ret;
3066     return 0;
3067 }
3068
3069 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3070 {
3071     int i, ret = 0;
3072     int eos_at_start = 1;
3073
3074     s->ref = NULL;
3075     s->last_eos = s->eos;
3076     s->eos = 0;
3077     s->overlap = 0;
3078
3079     /* split the input packet into NAL units, so we know the upper bound on the
3080      * number of slices in the frame */
3081     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3082                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3083     if (ret < 0) {
3084         av_log(s->avctx, AV_LOG_ERROR,
3085                "Error splitting the input into NAL units.\n");
3086         return ret;
3087     }
3088
3089     for (i = 0; i < s->pkt.nb_nals; i++) {
3090         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3091             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3092             if (eos_at_start) {
3093                 s->last_eos = 1;
3094             } else {
3095                 s->eos = 1;
3096             }
3097         } else {
3098             eos_at_start = 0;
3099         }
3100     }
3101
3102     /* decode the NAL units */
3103     for (i = 0; i < s->pkt.nb_nals; i++) {
3104         H2645NAL *nal = &s->pkt.nals[i];
3105
3106         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3107             (s->avctx->skip_frame >= AVDISCARD_NONREF
3108             && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
3109             continue;
3110
3111         ret = decode_nal_unit(s, nal);
3112         if (ret >= 0 && s->overlap > 2)
3113             ret = AVERROR_INVALIDDATA;
3114         if (ret < 0) {
3115             av_log(s->avctx, AV_LOG_WARNING,
3116                    "Error parsing NAL unit #%d.\n", i);
3117             goto fail;
3118         }
3119     }
3120
3121 fail:
3122     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3123         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3124
3125     return ret;
3126 }
3127
3128 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3129 {
3130     int i;
3131     for (i = 0; i < 16; i++)
3132         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3133 }
3134
3135 static int verify_md5(HEVCContext *s, AVFrame *frame)
3136 {
3137     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3138     int pixel_shift;
3139     int i, j;
3140
3141     if (!desc)
3142         return AVERROR(EINVAL);
3143
3144     pixel_shift = desc->comp[0].depth > 8;
3145
3146     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3147            s->poc);
3148
3149     /* the checksums are LE, so we have to byteswap for >8bpp formats
3150      * on BE arches */
3151 #if HAVE_BIGENDIAN
3152     if (pixel_shift && !s->checksum_buf) {
3153         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3154                        FFMAX3(frame->linesize[0], frame->linesize[1],
3155                               frame->linesize[2]));
3156         if (!s->checksum_buf)
3157             return AVERROR(ENOMEM);
3158     }
3159 #endif
3160
3161     for (i = 0; frame->data[i]; i++) {
3162         int width  = s->avctx->coded_width;
3163         int height = s->avctx->coded_height;
3164         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3165         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3166         uint8_t md5[16];
3167
3168         av_md5_init(s->md5_ctx);
3169         for (j = 0; j < h; j++) {
3170             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3171 #if HAVE_BIGENDIAN
3172             if (pixel_shift) {
3173                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3174                                     (const uint16_t *) src, w);
3175                 src = s->checksum_buf;
3176             }
3177 #endif
3178             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3179         }
3180         av_md5_final(s->md5_ctx, md5);
3181
3182         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3183             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3184             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3185             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3186         } else {
3187             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3188             print_md5(s->avctx, AV_LOG_ERROR, md5);
3189             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3190             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3191             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3192             return AVERROR_INVALIDDATA;
3193         }
3194     }
3195
3196     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3197
3198     return 0;
3199 }
3200
3201 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3202 {
3203     int ret, i;
3204
3205     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3206                                    &s->nal_length_size, s->avctx->err_recognition,
3207                                    s->apply_defdispwin, s->avctx);
3208     if (ret < 0)
3209         return ret;
3210
3211     /* export stream parameters from the first SPS */
3212     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3213         if (first && s->ps.sps_list[i]) {
3214             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3215             export_stream_params(s, sps);
3216             break;
3217         }
3218     }
3219
3220     return 0;
3221 }
3222
3223 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3224                              AVPacket *avpkt)
3225 {
3226     int ret;
3227     int new_extradata_size;
3228     uint8_t *new_extradata;
3229     HEVCContext *s = avctx->priv_data;
3230
3231     if (!avpkt->size) {
3232         ret = ff_hevc_output_frame(s, data, 1);
3233         if (ret < 0)
3234             return ret;
3235
3236         *got_output = ret;
3237         return 0;
3238     }
3239
3240     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3241                                             &new_extradata_size);
3242     if (new_extradata && new_extradata_size > 0) {
3243         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3244         if (ret < 0)
3245             return ret;
3246     }
3247
3248     s->ref = NULL;
3249     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3250     if (ret < 0)
3251         return ret;
3252
3253     if (avctx->hwaccel) {
3254         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3255             av_log(avctx, AV_LOG_ERROR,
3256                    "hardware accelerator failed to decode picture\n");
3257             ff_hevc_unref_frame(s, s->ref, ~0);
3258             return ret;
3259         }
3260     } else {
3261         /* verify the SEI checksum */
3262         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3263             s->sei.picture_hash.is_md5) {
3264             ret = verify_md5(s, s->ref->frame);
3265             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3266                 ff_hevc_unref_frame(s, s->ref, ~0);
3267                 return ret;
3268             }
3269         }
3270     }
3271     s->sei.picture_hash.is_md5 = 0;
3272
3273     if (s->is_decoded) {
3274         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3275         s->is_decoded = 0;
3276     }
3277
3278     if (s->output_frame->buf[0]) {
3279         av_frame_move_ref(data, s->output_frame);
3280         *got_output = 1;
3281     }
3282
3283     return avpkt->size;
3284 }
3285
3286 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3287 {
3288     int ret;
3289
3290     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3291     if (ret < 0)
3292         return ret;
3293
3294     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3295     if (!dst->tab_mvf_buf)
3296         goto fail;
3297     dst->tab_mvf = src->tab_mvf;
3298
3299     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3300     if (!dst->rpl_tab_buf)
3301         goto fail;
3302     dst->rpl_tab = src->rpl_tab;
3303
3304     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3305     if (!dst->rpl_buf)
3306         goto fail;
3307
3308     dst->poc        = src->poc;
3309     dst->ctb_count  = src->ctb_count;
3310     dst->flags      = src->flags;
3311     dst->sequence   = src->sequence;
3312
3313     if (src->hwaccel_picture_private) {
3314         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3315         if (!dst->hwaccel_priv_buf)
3316             goto fail;
3317         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3318     }
3319
3320     return 0;
3321 fail:
3322     ff_hevc_unref_frame(s, dst, ~0);
3323     return AVERROR(ENOMEM);
3324 }
3325
3326 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3327 {
3328     HEVCContext       *s = avctx->priv_data;
3329     int i;
3330
3331     pic_arrays_free(s);
3332
3333     av_freep(&s->md5_ctx);
3334
3335     av_freep(&s->cabac_state);
3336
3337     for (i = 0; i < 3; i++) {
3338         av_freep(&s->sao_pixel_buffer_h[i]);
3339         av_freep(&s->sao_pixel_buffer_v[i]);
3340     }
3341     av_frame_free(&s->output_frame);
3342
3343     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3344         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3345         av_frame_free(&s->DPB[i].frame);
3346     }
3347
3348     ff_hevc_ps_uninit(&s->ps);
3349
3350     av_freep(&s->sh.entry_point_offset);
3351     av_freep(&s->sh.offset);
3352     av_freep(&s->sh.size);
3353
3354     for (i = 1; i < s->threads_number; i++) {
3355         HEVCLocalContext *lc = s->HEVClcList[i];
3356         if (lc) {
3357             av_freep(&s->HEVClcList[i]);
3358             av_freep(&s->sList[i]);
3359         }
3360     }
3361     if (s->HEVClc == s->HEVClcList[0])
3362         s->HEVClc = NULL;
3363     av_freep(&s->HEVClcList[0]);
3364
3365     ff_h2645_packet_uninit(&s->pkt);
3366
3367     ff_hevc_reset_sei(&s->sei);
3368
3369     return 0;
3370 }
3371
3372 static av_cold int hevc_init_context(AVCodecContext *avctx)
3373 {
3374     HEVCContext *s = avctx->priv_data;
3375     int i;
3376
3377     s->avctx = avctx;
3378
3379     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3380     if (!s->HEVClc)
3381         goto fail;
3382     s->HEVClcList[0] = s->HEVClc;
3383     s->sList[0] = s;
3384
3385     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3386     if (!s->cabac_state)
3387         goto fail;
3388
3389     s->output_frame = av_frame_alloc();
3390     if (!s->output_frame)
3391         goto fail;
3392
3393     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3394         s->DPB[i].frame = av_frame_alloc();
3395         if (!s->DPB[i].frame)
3396             goto fail;
3397         s->DPB[i].tf.f = s->DPB[i].frame;
3398     }
3399
3400     s->max_ra = INT_MAX;
3401
3402     s->md5_ctx = av_md5_alloc();
3403     if (!s->md5_ctx)
3404         goto fail;
3405
3406     ff_bswapdsp_init(&s->bdsp);
3407
3408     s->context_initialized = 1;
3409     s->eos = 0;
3410
3411     ff_hevc_reset_sei(&s->sei);
3412
3413     return 0;
3414
3415 fail:
3416     hevc_decode_free(avctx);
3417     return AVERROR(ENOMEM);
3418 }
3419
3420 #if HAVE_THREADS
3421 static int hevc_update_thread_context(AVCodecContext *dst,
3422                                       const AVCodecContext *src)
3423 {
3424     HEVCContext *s  = dst->priv_data;
3425     HEVCContext *s0 = src->priv_data;
3426     int i, ret;
3427
3428     if (!s->context_initialized) {
3429         ret = hevc_init_context(dst);
3430         if (ret < 0)
3431             return ret;
3432     }
3433
3434     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3435         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3436         if (s0->DPB[i].frame->buf[0]) {
3437             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3438             if (ret < 0)
3439                 return ret;
3440         }
3441     }
3442
3443     if (s->ps.sps != s0->ps.sps)
3444         s->ps.sps = NULL;
3445     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3446         av_buffer_unref(&s->ps.vps_list[i]);
3447         if (s0->ps.vps_list[i]) {
3448             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3449             if (!s->ps.vps_list[i])
3450                 return AVERROR(ENOMEM);
3451         }
3452     }
3453
3454     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3455         av_buffer_unref(&s->ps.sps_list[i]);
3456         if (s0->ps.sps_list[i]) {
3457             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3458             if (!s->ps.sps_list[i])
3459                 return AVERROR(ENOMEM);
3460         }
3461     }
3462
3463     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3464         av_buffer_unref(&s->ps.pps_list[i]);
3465         if (s0->ps.pps_list[i]) {
3466             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3467             if (!s->ps.pps_list[i])
3468                 return AVERROR(ENOMEM);
3469         }
3470     }
3471
3472     if (s->ps.sps != s0->ps.sps)
3473         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3474             return ret;
3475
3476     s->seq_decode = s0->seq_decode;
3477     s->seq_output = s0->seq_output;
3478     s->pocTid0    = s0->pocTid0;
3479     s->max_ra     = s0->max_ra;
3480     s->eos        = s0->eos;
3481     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3482
3483     s->is_nalff        = s0->is_nalff;
3484     s->nal_length_size = s0->nal_length_size;
3485
3486     s->threads_number      = s0->threads_number;
3487     s->threads_type        = s0->threads_type;
3488
3489     if (s0->eos) {
3490         s->seq_decode = (s->seq_decode + 1) & 0xff;
3491         s->max_ra = INT_MAX;
3492     }
3493
3494     av_buffer_unref(&s->sei.a53_caption.buf_ref);
3495     if (s0->sei.a53_caption.buf_ref) {
3496         s->sei.a53_caption.buf_ref = av_buffer_ref(s0->sei.a53_caption.buf_ref);
3497         if (!s->sei.a53_caption.buf_ref)
3498             return AVERROR(ENOMEM);
3499     }
3500
3501     s->sei.frame_packing        = s0->sei.frame_packing;
3502     s->sei.display_orientation  = s0->sei.display_orientation;
3503     s->sei.mastering_display    = s0->sei.mastering_display;
3504     s->sei.content_light        = s0->sei.content_light;
3505     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3506
3507     return 0;
3508 }
3509 #endif
3510
3511 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3512 {
3513     HEVCContext *s = avctx->priv_data;
3514     int ret;
3515
3516     ret = hevc_init_context(avctx);
3517     if (ret < 0)
3518         return ret;
3519
3520     s->enable_parallel_tiles = 0;
3521     s->sei.picture_timing.picture_struct = 0;
3522     s->eos = 1;
3523
3524     atomic_init(&s->wpp_err, 0);
3525
3526     if(avctx->active_thread_type & FF_THREAD_SLICE)
3527         s->threads_number = avctx->thread_count;
3528     else
3529         s->threads_number = 1;
3530
3531     if (!avctx->internal->is_copy) {
3532         if (avctx->extradata_size > 0 && avctx->extradata) {
3533             ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3534             if (ret < 0) {
3535                 hevc_decode_free(avctx);
3536                 return ret;
3537             }
3538         }
3539     }
3540
3541     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3542             s->threads_type = FF_THREAD_FRAME;
3543         else
3544             s->threads_type = FF_THREAD_SLICE;
3545
3546     return 0;
3547 }
3548
3549 static void hevc_decode_flush(AVCodecContext *avctx)
3550 {
3551     HEVCContext *s = avctx->priv_data;
3552     ff_hevc_flush_dpb(s);
3553     ff_hevc_reset_sei(&s->sei);
3554     s->max_ra = INT_MAX;
3555     s->eos = 1;
3556 }
3557
3558 #define OFFSET(x) offsetof(HEVCContext, x)
3559 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3560
3561 static const AVOption options[] = {
3562     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3563         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3564     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3565         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3566     { NULL },
3567 };
3568
3569 static const AVClass hevc_decoder_class = {
3570     .class_name = "HEVC decoder",
3571     .item_name  = av_default_item_name,
3572     .option     = options,
3573     .version    = LIBAVUTIL_VERSION_INT,
3574 };
3575
3576 AVCodec ff_hevc_decoder = {
3577     .name                  = "hevc",
3578     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3579     .type                  = AVMEDIA_TYPE_VIDEO,
3580     .id                    = AV_CODEC_ID_HEVC,
3581     .priv_data_size        = sizeof(HEVCContext),
3582     .priv_class            = &hevc_decoder_class,
3583     .init                  = hevc_decode_init,
3584     .close                 = hevc_decode_free,
3585     .decode                = hevc_decode_frame,
3586     .flush                 = hevc_decode_flush,
3587     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3588     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3589                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3590     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING |
3591                              FF_CODEC_CAP_ALLOCATE_PROGRESS,
3592     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3593     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3594 #if CONFIG_HEVC_DXVA2_HWACCEL
3595                                HWACCEL_DXVA2(hevc),
3596 #endif
3597 #if CONFIG_HEVC_D3D11VA_HWACCEL
3598                                HWACCEL_D3D11VA(hevc),
3599 #endif
3600 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3601                                HWACCEL_D3D11VA2(hevc),
3602 #endif
3603 #if CONFIG_HEVC_NVDEC_HWACCEL
3604                                HWACCEL_NVDEC(hevc),
3605 #endif
3606 #if CONFIG_HEVC_VAAPI_HWACCEL
3607                                HWACCEL_VAAPI(hevc),
3608 #endif
3609 #if CONFIG_HEVC_VDPAU_HWACCEL
3610                                HWACCEL_VDPAU(hevc),
3611 #endif
3612 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3613                                HWACCEL_VIDEOTOOLBOX(hevc),
3614 #endif
3615                                NULL
3616                            },
3617 };