]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
avcodec/hevcdec: sync SEI derived AVCodecContext fields across threads
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/mastering_display_metadata.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
35 #include "libavutil/timecode.h"
36
37 #include "bswapdsp.h"
38 #include "bytestream.h"
39 #include "cabac_functions.h"
40 #include "golomb.h"
41 #include "hevc.h"
42 #include "hevc_data.h"
43 #include "hevc_parse.h"
44 #include "hevcdec.h"
45 #include "hwconfig.h"
46 #include "profiles.h"
47
48 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
49
50 /**
51  * NOTE: Each function hls_foo correspond to the function foo in the
52  * specification (HLS stands for High Level Syntax).
53  */
54
55 /**
56  * Section 5.7
57  */
58
59 /* free everything allocated  by pic_arrays_init() */
60 static void pic_arrays_free(HEVCContext *s)
61 {
62     av_freep(&s->sao);
63     av_freep(&s->deblock);
64
65     av_freep(&s->skip_flag);
66     av_freep(&s->tab_ct_depth);
67
68     av_freep(&s->tab_ipm);
69     av_freep(&s->cbf_luma);
70     av_freep(&s->is_pcm);
71
72     av_freep(&s->qp_y_tab);
73     av_freep(&s->tab_slice_address);
74     av_freep(&s->filter_slice_edges);
75
76     av_freep(&s->horizontal_bs);
77     av_freep(&s->vertical_bs);
78
79     av_freep(&s->sh.entry_point_offset);
80     av_freep(&s->sh.size);
81     av_freep(&s->sh.offset);
82
83     av_buffer_pool_uninit(&s->tab_mvf_pool);
84     av_buffer_pool_uninit(&s->rpl_tab_pool);
85 }
86
87 /* allocate arrays that depend on frame dimensions */
88 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
89 {
90     int log2_min_cb_size = sps->log2_min_cb_size;
91     int width            = sps->width;
92     int height           = sps->height;
93     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
94                            ((height >> log2_min_cb_size) + 1);
95     int ctb_count        = sps->ctb_width * sps->ctb_height;
96     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
97
98     s->bs_width  = (width  >> 2) + 1;
99     s->bs_height = (height >> 2) + 1;
100
101     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
102     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
103     if (!s->sao || !s->deblock)
104         goto fail;
105
106     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
108     if (!s->skip_flag || !s->tab_ct_depth)
109         goto fail;
110
111     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
112     s->tab_ipm  = av_mallocz(min_pu_size);
113     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
114     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
115         goto fail;
116
117     s->filter_slice_edges = av_mallocz(ctb_count);
118     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
119                                       sizeof(*s->tab_slice_address));
120     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
121                                       sizeof(*s->qp_y_tab));
122     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
123         goto fail;
124
125     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
126     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
127     if (!s->horizontal_bs || !s->vertical_bs)
128         goto fail;
129
130     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
131                                           av_buffer_allocz);
132     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
133                                           av_buffer_allocz);
134     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135         goto fail;
136
137     return 0;
138
139 fail:
140     pic_arrays_free(s);
141     return AVERROR(ENOMEM);
142 }
143
144 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
145 {
146     int i = 0;
147     int j = 0;
148     uint8_t luma_weight_l0_flag[16];
149     uint8_t chroma_weight_l0_flag[16];
150     uint8_t luma_weight_l1_flag[16];
151     uint8_t chroma_weight_l1_flag[16];
152     int luma_log2_weight_denom;
153
154     luma_log2_weight_denom = get_ue_golomb_long(gb);
155     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
156         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
157         return AVERROR_INVALIDDATA;
158     }
159     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
160     if (s->ps.sps->chroma_format_idc != 0) {
161         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
162         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
163             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
164             return AVERROR_INVALIDDATA;
165         }
166         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
167     }
168
169     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
170         luma_weight_l0_flag[i] = get_bits1(gb);
171         if (!luma_weight_l0_flag[i]) {
172             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
173             s->sh.luma_offset_l0[i] = 0;
174         }
175     }
176     if (s->ps.sps->chroma_format_idc != 0) {
177         for (i = 0; i < s->sh.nb_refs[L0]; i++)
178             chroma_weight_l0_flag[i] = get_bits1(gb);
179     } else {
180         for (i = 0; i < s->sh.nb_refs[L0]; i++)
181             chroma_weight_l0_flag[i] = 0;
182     }
183     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
184         if (luma_weight_l0_flag[i]) {
185             int delta_luma_weight_l0 = get_se_golomb(gb);
186             if ((int8_t)delta_luma_weight_l0 != delta_luma_weight_l0)
187                 return AVERROR_INVALIDDATA;
188             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
189             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
190         }
191         if (chroma_weight_l0_flag[i]) {
192             for (j = 0; j < 2; j++) {
193                 int delta_chroma_weight_l0 = get_se_golomb(gb);
194                 int delta_chroma_offset_l0 = get_se_golomb(gb);
195
196                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
197                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
198                     return AVERROR_INVALIDDATA;
199                 }
200
201                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
202                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
203                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
204             }
205         } else {
206             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
207             s->sh.chroma_offset_l0[i][0] = 0;
208             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
209             s->sh.chroma_offset_l0[i][1] = 0;
210         }
211     }
212     if (s->sh.slice_type == HEVC_SLICE_B) {
213         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
214             luma_weight_l1_flag[i] = get_bits1(gb);
215             if (!luma_weight_l1_flag[i]) {
216                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
217                 s->sh.luma_offset_l1[i] = 0;
218             }
219         }
220         if (s->ps.sps->chroma_format_idc != 0) {
221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
222                 chroma_weight_l1_flag[i] = get_bits1(gb);
223         } else {
224             for (i = 0; i < s->sh.nb_refs[L1]; i++)
225                 chroma_weight_l1_flag[i] = 0;
226         }
227         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
228             if (luma_weight_l1_flag[i]) {
229                 int delta_luma_weight_l1 = get_se_golomb(gb);
230                 if ((int8_t)delta_luma_weight_l1 != delta_luma_weight_l1)
231                     return AVERROR_INVALIDDATA;
232                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
233                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
234             }
235             if (chroma_weight_l1_flag[i]) {
236                 for (j = 0; j < 2; j++) {
237                     int delta_chroma_weight_l1 = get_se_golomb(gb);
238                     int delta_chroma_offset_l1 = get_se_golomb(gb);
239
240                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
241                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
242                         return AVERROR_INVALIDDATA;
243                     }
244
245                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
246                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
247                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
248                 }
249             } else {
250                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
251                 s->sh.chroma_offset_l1[i][0] = 0;
252                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
253                 s->sh.chroma_offset_l1[i][1] = 0;
254             }
255         }
256     }
257     return 0;
258 }
259
260 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
261 {
262     const HEVCSPS *sps = s->ps.sps;
263     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
264     int prev_delta_msb = 0;
265     unsigned int nb_sps = 0, nb_sh;
266     int i;
267
268     rps->nb_refs = 0;
269     if (!sps->long_term_ref_pics_present_flag)
270         return 0;
271
272     if (sps->num_long_term_ref_pics_sps > 0)
273         nb_sps = get_ue_golomb_long(gb);
274     nb_sh = get_ue_golomb_long(gb);
275
276     if (nb_sps > sps->num_long_term_ref_pics_sps)
277         return AVERROR_INVALIDDATA;
278     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
279         return AVERROR_INVALIDDATA;
280
281     rps->nb_refs = nb_sh + nb_sps;
282
283     for (i = 0; i < rps->nb_refs; i++) {
284
285         if (i < nb_sps) {
286             uint8_t lt_idx_sps = 0;
287
288             if (sps->num_long_term_ref_pics_sps > 1)
289                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
290
291             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
292             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
293         } else {
294             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
295             rps->used[i] = get_bits1(gb);
296         }
297
298         rps->poc_msb_present[i] = get_bits1(gb);
299         if (rps->poc_msb_present[i]) {
300             int64_t delta = get_ue_golomb_long(gb);
301             int64_t poc;
302
303             if (i && i != nb_sps)
304                 delta += prev_delta_msb;
305
306             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
307             if (poc != (int32_t)poc)
308                 return AVERROR_INVALIDDATA;
309             rps->poc[i] = poc;
310             prev_delta_msb = delta;
311         }
312     }
313
314     return 0;
315 }
316
317 static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
318 {
319     AVCodecContext *avctx = s->avctx;
320     const HEVCParamSets *ps = &s->ps;
321     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
322     const HEVCWindow *ow = &sps->output_window;
323     unsigned int num = 0, den = 0;
324
325     avctx->pix_fmt             = sps->pix_fmt;
326     avctx->coded_width         = sps->width;
327     avctx->coded_height        = sps->height;
328     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
329     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
330     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
331     avctx->profile             = sps->ptl.general_ptl.profile_idc;
332     avctx->level               = sps->ptl.general_ptl.level_idc;
333
334     ff_set_sar(avctx, sps->vui.sar);
335
336     if (sps->vui.video_signal_type_present_flag)
337         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
338                                                             : AVCOL_RANGE_MPEG;
339     else
340         avctx->color_range = AVCOL_RANGE_MPEG;
341
342     if (sps->vui.colour_description_present_flag) {
343         avctx->color_primaries = sps->vui.colour_primaries;
344         avctx->color_trc       = sps->vui.transfer_characteristic;
345         avctx->colorspace      = sps->vui.matrix_coeffs;
346     } else {
347         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
348         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
349         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
350     }
351
352     avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
353     if (sps->chroma_format_idc == 1) {
354         if (sps->vui.chroma_loc_info_present_flag) {
355             if (sps->vui.chroma_sample_loc_type_top_field <= 5)
356                 avctx->chroma_sample_location = sps->vui.chroma_sample_loc_type_top_field + 1;
357         } else
358             avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
359     }
360
361     if (vps->vps_timing_info_present_flag) {
362         num = vps->vps_num_units_in_tick;
363         den = vps->vps_time_scale;
364     } else if (sps->vui.vui_timing_info_present_flag) {
365         num = sps->vui.vui_num_units_in_tick;
366         den = sps->vui.vui_time_scale;
367     }
368
369     if (num != 0 && den != 0)
370         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
371                   num, den, 1 << 30);
372 }
373
374 static int export_stream_params_from_sei(HEVCContext *s)
375 {
376     AVCodecContext *avctx = s->avctx;
377
378     if (s->sei.a53_caption.buf_ref)
379         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
380
381     if (s->sei.alternative_transfer.present &&
382         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
383         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
384         avctx->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
385     }
386
387     return 0;
388 }
389
390 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
391 {
392 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
393                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
394                      CONFIG_HEVC_NVDEC_HWACCEL + \
395                      CONFIG_HEVC_VAAPI_HWACCEL + \
396                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
397                      CONFIG_HEVC_VDPAU_HWACCEL)
398     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
399
400     switch (sps->pix_fmt) {
401     case AV_PIX_FMT_YUV420P:
402     case AV_PIX_FMT_YUVJ420P:
403 #if CONFIG_HEVC_DXVA2_HWACCEL
404         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
405 #endif
406 #if CONFIG_HEVC_D3D11VA_HWACCEL
407         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
408         *fmt++ = AV_PIX_FMT_D3D11;
409 #endif
410 #if CONFIG_HEVC_VAAPI_HWACCEL
411         *fmt++ = AV_PIX_FMT_VAAPI;
412 #endif
413 #if CONFIG_HEVC_VDPAU_HWACCEL
414         *fmt++ = AV_PIX_FMT_VDPAU;
415 #endif
416 #if CONFIG_HEVC_NVDEC_HWACCEL
417         *fmt++ = AV_PIX_FMT_CUDA;
418 #endif
419 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
420         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
421 #endif
422         break;
423     case AV_PIX_FMT_YUV420P10:
424 #if CONFIG_HEVC_DXVA2_HWACCEL
425         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
426 #endif
427 #if CONFIG_HEVC_D3D11VA_HWACCEL
428         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
429         *fmt++ = AV_PIX_FMT_D3D11;
430 #endif
431 #if CONFIG_HEVC_VAAPI_HWACCEL
432         *fmt++ = AV_PIX_FMT_VAAPI;
433 #endif
434 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
435         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
436 #endif
437 #if CONFIG_HEVC_VDPAU_HWACCEL
438         *fmt++ = AV_PIX_FMT_VDPAU;
439 #endif
440 #if CONFIG_HEVC_NVDEC_HWACCEL
441         *fmt++ = AV_PIX_FMT_CUDA;
442 #endif
443         break;
444     case AV_PIX_FMT_YUV444P:
445 #if CONFIG_HEVC_VDPAU_HWACCEL
446         *fmt++ = AV_PIX_FMT_VDPAU;
447 #endif
448 #if CONFIG_HEVC_NVDEC_HWACCEL
449         *fmt++ = AV_PIX_FMT_CUDA;
450 #endif
451         break;
452     case AV_PIX_FMT_YUV422P:
453     case AV_PIX_FMT_YUV422P10LE:
454 #if CONFIG_HEVC_VAAPI_HWACCEL
455        *fmt++ = AV_PIX_FMT_VAAPI;
456 #endif
457         break;
458     case AV_PIX_FMT_YUV420P12:
459     case AV_PIX_FMT_YUV444P10:
460     case AV_PIX_FMT_YUV444P12:
461 #if CONFIG_HEVC_VDPAU_HWACCEL
462         *fmt++ = AV_PIX_FMT_VDPAU;
463 #endif
464 #if CONFIG_HEVC_NVDEC_HWACCEL
465         *fmt++ = AV_PIX_FMT_CUDA;
466 #endif
467         break;
468     }
469
470     *fmt++ = sps->pix_fmt;
471     *fmt = AV_PIX_FMT_NONE;
472
473     return ff_thread_get_format(s->avctx, pix_fmts);
474 }
475
476 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
477                    enum AVPixelFormat pix_fmt)
478 {
479     int ret, i;
480
481     pic_arrays_free(s);
482     s->ps.sps = NULL;
483     s->ps.vps = NULL;
484
485     if (!sps)
486         return 0;
487
488     ret = pic_arrays_init(s, sps);
489     if (ret < 0)
490         goto fail;
491
492     export_stream_params(s, sps);
493
494     s->avctx->pix_fmt = pix_fmt;
495
496     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
497     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
498     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
499
500     for (i = 0; i < 3; i++) {
501         av_freep(&s->sao_pixel_buffer_h[i]);
502         av_freep(&s->sao_pixel_buffer_v[i]);
503     }
504
505     if (sps->sao_enabled && !s->avctx->hwaccel) {
506         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
507         int c_idx;
508
509         for(c_idx = 0; c_idx < c_count; c_idx++) {
510             int w = sps->width >> sps->hshift[c_idx];
511             int h = sps->height >> sps->vshift[c_idx];
512             s->sao_pixel_buffer_h[c_idx] =
513                 av_malloc((w * 2 * sps->ctb_height) <<
514                           sps->pixel_shift);
515             s->sao_pixel_buffer_v[c_idx] =
516                 av_malloc((h * 2 * sps->ctb_width) <<
517                           sps->pixel_shift);
518         }
519     }
520
521     s->ps.sps = sps;
522     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
523
524     return 0;
525
526 fail:
527     pic_arrays_free(s);
528     s->ps.sps = NULL;
529     return ret;
530 }
531
532 static int hls_slice_header(HEVCContext *s)
533 {
534     GetBitContext *gb = &s->HEVClc->gb;
535     SliceHeader *sh   = &s->sh;
536     int i, ret;
537
538     // Coded parameters
539     sh->first_slice_in_pic_flag = get_bits1(gb);
540     if (s->ref && sh->first_slice_in_pic_flag) {
541         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
542         return 1; // This slice will be skipped later, do not corrupt state
543     }
544
545     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
546         s->seq_decode = (s->seq_decode + 1) & 0xff;
547         s->max_ra     = INT_MAX;
548         if (IS_IDR(s))
549             ff_hevc_clear_refs(s);
550     }
551     sh->no_output_of_prior_pics_flag = 0;
552     if (IS_IRAP(s))
553         sh->no_output_of_prior_pics_flag = get_bits1(gb);
554
555     sh->pps_id = get_ue_golomb_long(gb);
556     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
557         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
558         return AVERROR_INVALIDDATA;
559     }
560     if (!sh->first_slice_in_pic_flag &&
561         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
562         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
563         return AVERROR_INVALIDDATA;
564     }
565     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
566     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
567         sh->no_output_of_prior_pics_flag = 1;
568
569     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
570         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
571         const HEVCSPS *last_sps = s->ps.sps;
572         enum AVPixelFormat pix_fmt;
573
574         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
575             if (sps->width != last_sps->width || sps->height != last_sps->height ||
576                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
577                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
578                 sh->no_output_of_prior_pics_flag = 0;
579         }
580         ff_hevc_clear_refs(s);
581
582         ret = set_sps(s, sps, sps->pix_fmt);
583         if (ret < 0)
584             return ret;
585
586         pix_fmt = get_format(s, sps);
587         if (pix_fmt < 0)
588             return pix_fmt;
589         s->avctx->pix_fmt = pix_fmt;
590
591         s->seq_decode = (s->seq_decode + 1) & 0xff;
592         s->max_ra     = INT_MAX;
593     }
594
595     ret = export_stream_params_from_sei(s);
596     if (ret < 0)
597         return ret;
598
599     sh->dependent_slice_segment_flag = 0;
600     if (!sh->first_slice_in_pic_flag) {
601         int slice_address_length;
602
603         if (s->ps.pps->dependent_slice_segments_enabled_flag)
604             sh->dependent_slice_segment_flag = get_bits1(gb);
605
606         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
607                                             s->ps.sps->ctb_height);
608         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
609         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
610             av_log(s->avctx, AV_LOG_ERROR,
611                    "Invalid slice segment address: %u.\n",
612                    sh->slice_segment_addr);
613             return AVERROR_INVALIDDATA;
614         }
615
616         if (!sh->dependent_slice_segment_flag) {
617             sh->slice_addr = sh->slice_segment_addr;
618             s->slice_idx++;
619         }
620     } else {
621         sh->slice_segment_addr = sh->slice_addr = 0;
622         s->slice_idx           = 0;
623         s->slice_initialized   = 0;
624     }
625
626     if (!sh->dependent_slice_segment_flag) {
627         s->slice_initialized = 0;
628
629         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
630             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
631
632         sh->slice_type = get_ue_golomb_long(gb);
633         if (!(sh->slice_type == HEVC_SLICE_I ||
634               sh->slice_type == HEVC_SLICE_P ||
635               sh->slice_type == HEVC_SLICE_B)) {
636             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
637                    sh->slice_type);
638             return AVERROR_INVALIDDATA;
639         }
640         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
641             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
642             return AVERROR_INVALIDDATA;
643         }
644
645         // when flag is not present, picture is inferred to be output
646         sh->pic_output_flag = 1;
647         if (s->ps.pps->output_flag_present_flag)
648             sh->pic_output_flag = get_bits1(gb);
649
650         if (s->ps.sps->separate_colour_plane_flag)
651             sh->colour_plane_id = get_bits(gb, 2);
652
653         if (!IS_IDR(s)) {
654             int poc, pos;
655
656             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
657             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
658             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
659                 av_log(s->avctx, AV_LOG_WARNING,
660                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
661                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
662                     return AVERROR_INVALIDDATA;
663                 poc = s->poc;
664             }
665             s->poc = poc;
666
667             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
668             pos = get_bits_left(gb);
669             if (!sh->short_term_ref_pic_set_sps_flag) {
670                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
671                 if (ret < 0)
672                     return ret;
673
674                 sh->short_term_rps = &sh->slice_rps;
675             } else {
676                 int numbits, rps_idx;
677
678                 if (!s->ps.sps->nb_st_rps) {
679                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
680                     return AVERROR_INVALIDDATA;
681                 }
682
683                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
684                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
685                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
686             }
687             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
688
689             pos = get_bits_left(gb);
690             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
691             if (ret < 0) {
692                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
693                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
694                     return AVERROR_INVALIDDATA;
695             }
696             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
697
698             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
699                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
700             else
701                 sh->slice_temporal_mvp_enabled_flag = 0;
702         } else {
703             s->sh.short_term_rps = NULL;
704             s->poc               = 0;
705         }
706
707         /* 8.3.1 */
708         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
709             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
710             s->nal_unit_type != HEVC_NAL_TSA_N   &&
711             s->nal_unit_type != HEVC_NAL_STSA_N  &&
712             s->nal_unit_type != HEVC_NAL_RADL_N  &&
713             s->nal_unit_type != HEVC_NAL_RADL_R  &&
714             s->nal_unit_type != HEVC_NAL_RASL_N  &&
715             s->nal_unit_type != HEVC_NAL_RASL_R)
716             s->pocTid0 = s->poc;
717
718         if (s->ps.sps->sao_enabled) {
719             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
720             if (s->ps.sps->chroma_format_idc) {
721                 sh->slice_sample_adaptive_offset_flag[1] =
722                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
723             }
724         } else {
725             sh->slice_sample_adaptive_offset_flag[0] = 0;
726             sh->slice_sample_adaptive_offset_flag[1] = 0;
727             sh->slice_sample_adaptive_offset_flag[2] = 0;
728         }
729
730         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
731         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
732             int nb_refs;
733
734             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
735             if (sh->slice_type == HEVC_SLICE_B)
736                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
737
738             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
739                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
740                 if (sh->slice_type == HEVC_SLICE_B)
741                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
742             }
743             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
744                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
745                        sh->nb_refs[L0], sh->nb_refs[L1]);
746                 return AVERROR_INVALIDDATA;
747             }
748
749             sh->rpl_modification_flag[0] = 0;
750             sh->rpl_modification_flag[1] = 0;
751             nb_refs = ff_hevc_frame_nb_refs(s);
752             if (!nb_refs) {
753                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
754                 return AVERROR_INVALIDDATA;
755             }
756
757             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
758                 sh->rpl_modification_flag[0] = get_bits1(gb);
759                 if (sh->rpl_modification_flag[0]) {
760                     for (i = 0; i < sh->nb_refs[L0]; i++)
761                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
762                 }
763
764                 if (sh->slice_type == HEVC_SLICE_B) {
765                     sh->rpl_modification_flag[1] = get_bits1(gb);
766                     if (sh->rpl_modification_flag[1] == 1)
767                         for (i = 0; i < sh->nb_refs[L1]; i++)
768                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
769                 }
770             }
771
772             if (sh->slice_type == HEVC_SLICE_B)
773                 sh->mvd_l1_zero_flag = get_bits1(gb);
774
775             if (s->ps.pps->cabac_init_present_flag)
776                 sh->cabac_init_flag = get_bits1(gb);
777             else
778                 sh->cabac_init_flag = 0;
779
780             sh->collocated_ref_idx = 0;
781             if (sh->slice_temporal_mvp_enabled_flag) {
782                 sh->collocated_list = L0;
783                 if (sh->slice_type == HEVC_SLICE_B)
784                     sh->collocated_list = !get_bits1(gb);
785
786                 if (sh->nb_refs[sh->collocated_list] > 1) {
787                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
788                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
789                         av_log(s->avctx, AV_LOG_ERROR,
790                                "Invalid collocated_ref_idx: %d.\n",
791                                sh->collocated_ref_idx);
792                         return AVERROR_INVALIDDATA;
793                     }
794                 }
795             }
796
797             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
798                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
799                 int ret = pred_weight_table(s, gb);
800                 if (ret < 0)
801                     return ret;
802             }
803
804             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
805             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
806                 av_log(s->avctx, AV_LOG_ERROR,
807                        "Invalid number of merging MVP candidates: %d.\n",
808                        sh->max_num_merge_cand);
809                 return AVERROR_INVALIDDATA;
810             }
811         }
812
813         sh->slice_qp_delta = get_se_golomb(gb);
814
815         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
816             sh->slice_cb_qp_offset = get_se_golomb(gb);
817             sh->slice_cr_qp_offset = get_se_golomb(gb);
818         } else {
819             sh->slice_cb_qp_offset = 0;
820             sh->slice_cr_qp_offset = 0;
821         }
822
823         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
824             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
825         else
826             sh->cu_chroma_qp_offset_enabled_flag = 0;
827
828         if (s->ps.pps->deblocking_filter_control_present_flag) {
829             int deblocking_filter_override_flag = 0;
830
831             if (s->ps.pps->deblocking_filter_override_enabled_flag)
832                 deblocking_filter_override_flag = get_bits1(gb);
833
834             if (deblocking_filter_override_flag) {
835                 sh->disable_deblocking_filter_flag = get_bits1(gb);
836                 if (!sh->disable_deblocking_filter_flag) {
837                     int beta_offset_div2 = get_se_golomb(gb);
838                     int tc_offset_div2   = get_se_golomb(gb) ;
839                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
840                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
841                         av_log(s->avctx, AV_LOG_ERROR,
842                             "Invalid deblock filter offsets: %d, %d\n",
843                             beta_offset_div2, tc_offset_div2);
844                         return AVERROR_INVALIDDATA;
845                     }
846                     sh->beta_offset = beta_offset_div2 * 2;
847                     sh->tc_offset   =   tc_offset_div2 * 2;
848                 }
849             } else {
850                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
851                 sh->beta_offset                    = s->ps.pps->beta_offset;
852                 sh->tc_offset                      = s->ps.pps->tc_offset;
853             }
854         } else {
855             sh->disable_deblocking_filter_flag = 0;
856             sh->beta_offset                    = 0;
857             sh->tc_offset                      = 0;
858         }
859
860         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
861             (sh->slice_sample_adaptive_offset_flag[0] ||
862              sh->slice_sample_adaptive_offset_flag[1] ||
863              !sh->disable_deblocking_filter_flag)) {
864             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
865         } else {
866             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
867         }
868     } else if (!s->slice_initialized) {
869         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
870         return AVERROR_INVALIDDATA;
871     }
872
873     sh->num_entry_point_offsets = 0;
874     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
875         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
876         // It would be possible to bound this tighter but this here is simpler
877         if (num_entry_point_offsets > get_bits_left(gb)) {
878             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
879             return AVERROR_INVALIDDATA;
880         }
881
882         sh->num_entry_point_offsets = num_entry_point_offsets;
883         if (sh->num_entry_point_offsets > 0) {
884             int offset_len = get_ue_golomb_long(gb) + 1;
885
886             if (offset_len < 1 || offset_len > 32) {
887                 sh->num_entry_point_offsets = 0;
888                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
889                 return AVERROR_INVALIDDATA;
890             }
891
892             av_freep(&sh->entry_point_offset);
893             av_freep(&sh->offset);
894             av_freep(&sh->size);
895             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
896             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
897             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
898             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
899                 sh->num_entry_point_offsets = 0;
900                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
901                 return AVERROR(ENOMEM);
902             }
903             for (i = 0; i < sh->num_entry_point_offsets; i++) {
904                 unsigned val = get_bits_long(gb, offset_len);
905                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
906             }
907             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
908                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
909                 s->threads_number = 1;
910             } else
911                 s->enable_parallel_tiles = 0;
912         } else
913             s->enable_parallel_tiles = 0;
914     }
915
916     if (s->ps.pps->slice_header_extension_present_flag) {
917         unsigned int length = get_ue_golomb_long(gb);
918         if (length*8LL > get_bits_left(gb)) {
919             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
920             return AVERROR_INVALIDDATA;
921         }
922         for (i = 0; i < length; i++)
923             skip_bits(gb, 8);  // slice_header_extension_data_byte
924     }
925
926     // Inferred parameters
927     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
928     if (sh->slice_qp > 51 ||
929         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
930         av_log(s->avctx, AV_LOG_ERROR,
931                "The slice_qp %d is outside the valid range "
932                "[%d, 51].\n",
933                sh->slice_qp,
934                -s->ps.sps->qp_bd_offset);
935         return AVERROR_INVALIDDATA;
936     }
937
938     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
939
940     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
941         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
942         return AVERROR_INVALIDDATA;
943     }
944
945     if (get_bits_left(gb) < 0) {
946         av_log(s->avctx, AV_LOG_ERROR,
947                "Overread slice header by %d bits\n", -get_bits_left(gb));
948         return AVERROR_INVALIDDATA;
949     }
950
951     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
952
953     if (!s->ps.pps->cu_qp_delta_enabled_flag)
954         s->HEVClc->qp_y = s->sh.slice_qp;
955
956     s->slice_initialized = 1;
957     s->HEVClc->tu.cu_qp_offset_cb = 0;
958     s->HEVClc->tu.cu_qp_offset_cr = 0;
959
960     return 0;
961 }
962
963 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
964
965 #define SET_SAO(elem, value)                            \
966 do {                                                    \
967     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
968         sao->elem = value;                              \
969     else if (sao_merge_left_flag)                       \
970         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
971     else if (sao_merge_up_flag)                         \
972         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
973     else                                                \
974         sao->elem = 0;                                  \
975 } while (0)
976
977 static void hls_sao_param(HEVCContext *s, int rx, int ry)
978 {
979     HEVCLocalContext *lc    = s->HEVClc;
980     int sao_merge_left_flag = 0;
981     int sao_merge_up_flag   = 0;
982     SAOParams *sao          = &CTB(s->sao, rx, ry);
983     int c_idx, i;
984
985     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
986         s->sh.slice_sample_adaptive_offset_flag[1]) {
987         if (rx > 0) {
988             if (lc->ctb_left_flag)
989                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
990         }
991         if (ry > 0 && !sao_merge_left_flag) {
992             if (lc->ctb_up_flag)
993                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
994         }
995     }
996
997     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
998         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
999                                                  s->ps.pps->log2_sao_offset_scale_chroma;
1000
1001         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
1002             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
1003             continue;
1004         }
1005
1006         if (c_idx == 2) {
1007             sao->type_idx[2] = sao->type_idx[1];
1008             sao->eo_class[2] = sao->eo_class[1];
1009         } else {
1010             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
1011         }
1012
1013         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
1014             continue;
1015
1016         for (i = 0; i < 4; i++)
1017             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
1018
1019         if (sao->type_idx[c_idx] == SAO_BAND) {
1020             for (i = 0; i < 4; i++) {
1021                 if (sao->offset_abs[c_idx][i]) {
1022                     SET_SAO(offset_sign[c_idx][i],
1023                             ff_hevc_sao_offset_sign_decode(s));
1024                 } else {
1025                     sao->offset_sign[c_idx][i] = 0;
1026                 }
1027             }
1028             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
1029         } else if (c_idx != 2) {
1030             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
1031         }
1032
1033         // Inferred parameters
1034         sao->offset_val[c_idx][0] = 0;
1035         for (i = 0; i < 4; i++) {
1036             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
1037             if (sao->type_idx[c_idx] == SAO_EDGE) {
1038                 if (i > 1)
1039                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1040             } else if (sao->offset_sign[c_idx][i]) {
1041                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1042             }
1043             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
1044         }
1045     }
1046 }
1047
1048 #undef SET_SAO
1049 #undef CTB
1050
1051 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1052     HEVCLocalContext *lc    = s->HEVClc;
1053     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1054
1055     if (log2_res_scale_abs_plus1 !=  0) {
1056         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1057         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1058                                (1 - 2 * res_scale_sign_flag);
1059     } else {
1060         lc->tu.res_scale_val = 0;
1061     }
1062
1063
1064     return 0;
1065 }
1066
1067 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1068                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1069                               int log2_cb_size, int log2_trafo_size,
1070                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1071 {
1072     HEVCLocalContext *lc = s->HEVClc;
1073     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1074     int i;
1075
1076     if (lc->cu.pred_mode == MODE_INTRA) {
1077         int trafo_size = 1 << log2_trafo_size;
1078         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1079
1080         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1081     }
1082
1083     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1084         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1085         int scan_idx   = SCAN_DIAG;
1086         int scan_idx_c = SCAN_DIAG;
1087         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1088                          (s->ps.sps->chroma_format_idc == 2 &&
1089                          (cbf_cb[1] || cbf_cr[1]));
1090
1091         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1092             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1093             if (lc->tu.cu_qp_delta != 0)
1094                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1095                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1096             lc->tu.is_cu_qp_delta_coded = 1;
1097
1098             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1099                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1100                 av_log(s->avctx, AV_LOG_ERROR,
1101                        "The cu_qp_delta %d is outside the valid range "
1102                        "[%d, %d].\n",
1103                        lc->tu.cu_qp_delta,
1104                        -(26 + s->ps.sps->qp_bd_offset / 2),
1105                         (25 + s->ps.sps->qp_bd_offset / 2));
1106                 return AVERROR_INVALIDDATA;
1107             }
1108
1109             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1110         }
1111
1112         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1113             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1114             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1115             if (cu_chroma_qp_offset_flag) {
1116                 int cu_chroma_qp_offset_idx  = 0;
1117                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1118                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1119                     av_log(s->avctx, AV_LOG_ERROR,
1120                         "cu_chroma_qp_offset_idx not yet tested.\n");
1121                 }
1122                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1123                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1124             } else {
1125                 lc->tu.cu_qp_offset_cb = 0;
1126                 lc->tu.cu_qp_offset_cr = 0;
1127             }
1128             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1129         }
1130
1131         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1132             if (lc->tu.intra_pred_mode >= 6 &&
1133                 lc->tu.intra_pred_mode <= 14) {
1134                 scan_idx = SCAN_VERT;
1135             } else if (lc->tu.intra_pred_mode >= 22 &&
1136                        lc->tu.intra_pred_mode <= 30) {
1137                 scan_idx = SCAN_HORIZ;
1138             }
1139
1140             if (lc->tu.intra_pred_mode_c >=  6 &&
1141                 lc->tu.intra_pred_mode_c <= 14) {
1142                 scan_idx_c = SCAN_VERT;
1143             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1144                        lc->tu.intra_pred_mode_c <= 30) {
1145                 scan_idx_c = SCAN_HORIZ;
1146             }
1147         }
1148
1149         lc->tu.cross_pf = 0;
1150
1151         if (cbf_luma)
1152             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1153         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1154             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1155             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1156             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1157                                 (lc->cu.pred_mode == MODE_INTER ||
1158                                  (lc->tu.chroma_mode_c ==  4)));
1159
1160             if (lc->tu.cross_pf) {
1161                 hls_cross_component_pred(s, 0);
1162             }
1163             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1164                 if (lc->cu.pred_mode == MODE_INTRA) {
1165                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1166                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1167                 }
1168                 if (cbf_cb[i])
1169                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1170                                                 log2_trafo_size_c, scan_idx_c, 1);
1171                 else
1172                     if (lc->tu.cross_pf) {
1173                         ptrdiff_t stride = s->frame->linesize[1];
1174                         int hshift = s->ps.sps->hshift[1];
1175                         int vshift = s->ps.sps->vshift[1];
1176                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1177                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1178                         int size = 1 << log2_trafo_size_c;
1179
1180                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1181                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1182                         for (i = 0; i < (size * size); i++) {
1183                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1184                         }
1185                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1186                     }
1187             }
1188
1189             if (lc->tu.cross_pf) {
1190                 hls_cross_component_pred(s, 1);
1191             }
1192             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1193                 if (lc->cu.pred_mode == MODE_INTRA) {
1194                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1195                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1196                 }
1197                 if (cbf_cr[i])
1198                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1199                                                 log2_trafo_size_c, scan_idx_c, 2);
1200                 else
1201                     if (lc->tu.cross_pf) {
1202                         ptrdiff_t stride = s->frame->linesize[2];
1203                         int hshift = s->ps.sps->hshift[2];
1204                         int vshift = s->ps.sps->vshift[2];
1205                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1206                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1207                         int size = 1 << log2_trafo_size_c;
1208
1209                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1210                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1211                         for (i = 0; i < (size * size); i++) {
1212                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1213                         }
1214                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1215                     }
1216             }
1217         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1218             int trafo_size_h = 1 << (log2_trafo_size + 1);
1219             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1220             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1221                 if (lc->cu.pred_mode == MODE_INTRA) {
1222                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1223                                                     trafo_size_h, trafo_size_v);
1224                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1225                 }
1226                 if (cbf_cb[i])
1227                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1228                                                 log2_trafo_size, scan_idx_c, 1);
1229             }
1230             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1231                 if (lc->cu.pred_mode == MODE_INTRA) {
1232                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1233                                                 trafo_size_h, trafo_size_v);
1234                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1235                 }
1236                 if (cbf_cr[i])
1237                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1238                                                 log2_trafo_size, scan_idx_c, 2);
1239             }
1240         }
1241     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1242         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1243             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1244             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1245             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1246             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1247             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1248             if (s->ps.sps->chroma_format_idc == 2) {
1249                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1250                                                 trafo_size_h, trafo_size_v);
1251                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1252                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1253             }
1254         } else if (blk_idx == 3) {
1255             int trafo_size_h = 1 << (log2_trafo_size + 1);
1256             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1257             ff_hevc_set_neighbour_available(s, xBase, yBase,
1258                                             trafo_size_h, trafo_size_v);
1259             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1260             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1261             if (s->ps.sps->chroma_format_idc == 2) {
1262                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1263                                                 trafo_size_h, trafo_size_v);
1264                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1265                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1266             }
1267         }
1268     }
1269
1270     return 0;
1271 }
1272
1273 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1274 {
1275     int cb_size          = 1 << log2_cb_size;
1276     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1277
1278     int min_pu_width     = s->ps.sps->min_pu_width;
1279     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1280     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1281     int i, j;
1282
1283     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1284         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1285             s->is_pcm[i + j * min_pu_width] = 2;
1286 }
1287
1288 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1289                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1290                               int log2_cb_size, int log2_trafo_size,
1291                               int trafo_depth, int blk_idx,
1292                               const int *base_cbf_cb, const int *base_cbf_cr)
1293 {
1294     HEVCLocalContext *lc = s->HEVClc;
1295     uint8_t split_transform_flag;
1296     int cbf_cb[2];
1297     int cbf_cr[2];
1298     int ret;
1299
1300     cbf_cb[0] = base_cbf_cb[0];
1301     cbf_cb[1] = base_cbf_cb[1];
1302     cbf_cr[0] = base_cbf_cr[0];
1303     cbf_cr[1] = base_cbf_cr[1];
1304
1305     if (lc->cu.intra_split_flag) {
1306         if (trafo_depth == 1) {
1307             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1308             if (s->ps.sps->chroma_format_idc == 3) {
1309                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1310                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1311             } else {
1312                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1313                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1314             }
1315         }
1316     } else {
1317         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1318         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1319         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1320     }
1321
1322     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1323         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1324         trafo_depth     < lc->cu.max_trafo_depth       &&
1325         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1326         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1327     } else {
1328         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1329                           lc->cu.pred_mode == MODE_INTER &&
1330                           lc->cu.part_mode != PART_2Nx2N &&
1331                           trafo_depth == 0;
1332
1333         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1334                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1335                                inter_split;
1336     }
1337
1338     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1339         if (trafo_depth == 0 || cbf_cb[0]) {
1340             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1341             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1342                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1343             }
1344         }
1345
1346         if (trafo_depth == 0 || cbf_cr[0]) {
1347             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1348             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1349                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1350             }
1351         }
1352     }
1353
1354     if (split_transform_flag) {
1355         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1356         const int x1 = x0 + trafo_size_split;
1357         const int y1 = y0 + trafo_size_split;
1358
1359 #define SUBDIVIDE(x, y, idx)                                                    \
1360 do {                                                                            \
1361     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1362                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1363                              cbf_cb, cbf_cr);                                   \
1364     if (ret < 0)                                                                \
1365         return ret;                                                             \
1366 } while (0)
1367
1368         SUBDIVIDE(x0, y0, 0);
1369         SUBDIVIDE(x1, y0, 1);
1370         SUBDIVIDE(x0, y1, 2);
1371         SUBDIVIDE(x1, y1, 3);
1372
1373 #undef SUBDIVIDE
1374     } else {
1375         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1376         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1377         int min_tu_width     = s->ps.sps->min_tb_width;
1378         int cbf_luma         = 1;
1379
1380         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1381             cbf_cb[0] || cbf_cr[0] ||
1382             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1383             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1384         }
1385
1386         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1387                                  log2_cb_size, log2_trafo_size,
1388                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1389         if (ret < 0)
1390             return ret;
1391         // TODO: store cbf_luma somewhere else
1392         if (cbf_luma) {
1393             int i, j;
1394             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1395                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1396                     int x_tu = (x0 + j) >> log2_min_tu_size;
1397                     int y_tu = (y0 + i) >> log2_min_tu_size;
1398                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1399                 }
1400         }
1401         if (!s->sh.disable_deblocking_filter_flag) {
1402             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1403             if (s->ps.pps->transquant_bypass_enable_flag &&
1404                 lc->cu.cu_transquant_bypass_flag)
1405                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1406         }
1407     }
1408     return 0;
1409 }
1410
1411 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1412 {
1413     HEVCLocalContext *lc = s->HEVClc;
1414     GetBitContext gb;
1415     int cb_size   = 1 << log2_cb_size;
1416     ptrdiff_t stride0 = s->frame->linesize[0];
1417     ptrdiff_t stride1 = s->frame->linesize[1];
1418     ptrdiff_t stride2 = s->frame->linesize[2];
1419     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1420     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1421     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1422
1423     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1424                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1425                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1426                           s->ps.sps->pcm.bit_depth_chroma;
1427     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1428     int ret;
1429
1430     if (!s->sh.disable_deblocking_filter_flag)
1431         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1432
1433     ret = init_get_bits(&gb, pcm, length);
1434     if (ret < 0)
1435         return ret;
1436
1437     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1438     if (s->ps.sps->chroma_format_idc) {
1439         s->hevcdsp.put_pcm(dst1, stride1,
1440                            cb_size >> s->ps.sps->hshift[1],
1441                            cb_size >> s->ps.sps->vshift[1],
1442                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1443         s->hevcdsp.put_pcm(dst2, stride2,
1444                            cb_size >> s->ps.sps->hshift[2],
1445                            cb_size >> s->ps.sps->vshift[2],
1446                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1447     }
1448
1449     return 0;
1450 }
1451
1452 /**
1453  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1454  *
1455  * @param s HEVC decoding context
1456  * @param dst target buffer for block data at block position
1457  * @param dststride stride of the dst buffer
1458  * @param ref reference picture buffer at origin (0, 0)
1459  * @param mv motion vector (relative to block position) to get pixel data from
1460  * @param x_off horizontal position of block from origin (0, 0)
1461  * @param y_off vertical position of block from origin (0, 0)
1462  * @param block_w width of block
1463  * @param block_h height of block
1464  * @param luma_weight weighting factor applied to the luma prediction
1465  * @param luma_offset additive offset applied to the luma prediction value
1466  */
1467
1468 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1469                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1470                         int block_w, int block_h, int luma_weight, int luma_offset)
1471 {
1472     HEVCLocalContext *lc = s->HEVClc;
1473     uint8_t *src         = ref->data[0];
1474     ptrdiff_t srcstride  = ref->linesize[0];
1475     int pic_width        = s->ps.sps->width;
1476     int pic_height       = s->ps.sps->height;
1477     int mx               = mv->x & 3;
1478     int my               = mv->y & 3;
1479     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1480                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1481     int idx              = ff_hevc_pel_weight[block_w];
1482
1483     x_off += mv->x >> 2;
1484     y_off += mv->y >> 2;
1485     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1486
1487     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1488         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1489         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1490         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1491         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1492         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1493
1494         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1495                                  edge_emu_stride, srcstride,
1496                                  block_w + QPEL_EXTRA,
1497                                  block_h + QPEL_EXTRA,
1498                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1499                                  pic_width, pic_height);
1500         src = lc->edge_emu_buffer + buf_offset;
1501         srcstride = edge_emu_stride;
1502     }
1503
1504     if (!weight_flag)
1505         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1506                                                       block_h, mx, my, block_w);
1507     else
1508         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1509                                                         block_h, s->sh.luma_log2_weight_denom,
1510                                                         luma_weight, luma_offset, mx, my, block_w);
1511 }
1512
1513 /**
1514  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1515  *
1516  * @param s HEVC decoding context
1517  * @param dst target buffer for block data at block position
1518  * @param dststride stride of the dst buffer
1519  * @param ref0 reference picture0 buffer at origin (0, 0)
1520  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1521  * @param x_off horizontal position of block from origin (0, 0)
1522  * @param y_off vertical position of block from origin (0, 0)
1523  * @param block_w width of block
1524  * @param block_h height of block
1525  * @param ref1 reference picture1 buffer at origin (0, 0)
1526  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1527  * @param current_mv current motion vector structure
1528  */
1529  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1530                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1531                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1532 {
1533     HEVCLocalContext *lc = s->HEVClc;
1534     ptrdiff_t src0stride  = ref0->linesize[0];
1535     ptrdiff_t src1stride  = ref1->linesize[0];
1536     int pic_width        = s->ps.sps->width;
1537     int pic_height       = s->ps.sps->height;
1538     int mx0              = mv0->x & 3;
1539     int my0              = mv0->y & 3;
1540     int mx1              = mv1->x & 3;
1541     int my1              = mv1->y & 3;
1542     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1543                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1544     int x_off0           = x_off + (mv0->x >> 2);
1545     int y_off0           = y_off + (mv0->y >> 2);
1546     int x_off1           = x_off + (mv1->x >> 2);
1547     int y_off1           = y_off + (mv1->y >> 2);
1548     int idx              = ff_hevc_pel_weight[block_w];
1549
1550     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1551     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1552
1553     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1554         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1555         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1556         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1557         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1558         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1559
1560         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1561                                  edge_emu_stride, src0stride,
1562                                  block_w + QPEL_EXTRA,
1563                                  block_h + QPEL_EXTRA,
1564                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1565                                  pic_width, pic_height);
1566         src0 = lc->edge_emu_buffer + buf_offset;
1567         src0stride = edge_emu_stride;
1568     }
1569
1570     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1571         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1572         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1573         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1574         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1575         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1576
1577         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1578                                  edge_emu_stride, src1stride,
1579                                  block_w + QPEL_EXTRA,
1580                                  block_h + QPEL_EXTRA,
1581                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1582                                  pic_width, pic_height);
1583         src1 = lc->edge_emu_buffer2 + buf_offset;
1584         src1stride = edge_emu_stride;
1585     }
1586
1587     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1588                                                 block_h, mx0, my0, block_w);
1589     if (!weight_flag)
1590         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1591                                                        block_h, mx1, my1, block_w);
1592     else
1593         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1594                                                          block_h, s->sh.luma_log2_weight_denom,
1595                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1596                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1597                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1598                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1599                                                          mx1, my1, block_w);
1600
1601 }
1602
1603 /**
1604  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1605  *
1606  * @param s HEVC decoding context
1607  * @param dst1 target buffer for block data at block position (U plane)
1608  * @param dst2 target buffer for block data at block position (V plane)
1609  * @param dststride stride of the dst1 and dst2 buffers
1610  * @param ref reference picture buffer at origin (0, 0)
1611  * @param mv motion vector (relative to block position) to get pixel data from
1612  * @param x_off horizontal position of block from origin (0, 0)
1613  * @param y_off vertical position of block from origin (0, 0)
1614  * @param block_w width of block
1615  * @param block_h height of block
1616  * @param chroma_weight weighting factor applied to the chroma prediction
1617  * @param chroma_offset additive offset applied to the chroma prediction value
1618  */
1619
1620 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1621                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1622                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1623 {
1624     HEVCLocalContext *lc = s->HEVClc;
1625     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1626     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1627     const Mv *mv         = &current_mv->mv[reflist];
1628     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1629                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1630     int idx              = ff_hevc_pel_weight[block_w];
1631     int hshift           = s->ps.sps->hshift[1];
1632     int vshift           = s->ps.sps->vshift[1];
1633     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1634     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1635     intptr_t _mx         = mx << (1 - hshift);
1636     intptr_t _my         = my << (1 - vshift);
1637
1638     x_off += mv->x >> (2 + hshift);
1639     y_off += mv->y >> (2 + vshift);
1640     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1641
1642     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1643         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1644         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1645         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1646         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1647         int buf_offset0 = EPEL_EXTRA_BEFORE *
1648                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1649         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1650                                  edge_emu_stride, srcstride,
1651                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1652                                  x_off - EPEL_EXTRA_BEFORE,
1653                                  y_off - EPEL_EXTRA_BEFORE,
1654                                  pic_width, pic_height);
1655
1656         src0 = lc->edge_emu_buffer + buf_offset0;
1657         srcstride = edge_emu_stride;
1658     }
1659     if (!weight_flag)
1660         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1661                                                   block_h, _mx, _my, block_w);
1662     else
1663         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1664                                                         block_h, s->sh.chroma_log2_weight_denom,
1665                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1666 }
1667
1668 /**
1669  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1670  *
1671  * @param s HEVC decoding context
1672  * @param dst target buffer for block data at block position
1673  * @param dststride stride of the dst buffer
1674  * @param ref0 reference picture0 buffer at origin (0, 0)
1675  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1676  * @param x_off horizontal position of block from origin (0, 0)
1677  * @param y_off vertical position of block from origin (0, 0)
1678  * @param block_w width of block
1679  * @param block_h height of block
1680  * @param ref1 reference picture1 buffer at origin (0, 0)
1681  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1682  * @param current_mv current motion vector structure
1683  * @param cidx chroma component(cb, cr)
1684  */
1685 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1686                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1687 {
1688     HEVCLocalContext *lc = s->HEVClc;
1689     uint8_t *src1        = ref0->data[cidx+1];
1690     uint8_t *src2        = ref1->data[cidx+1];
1691     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1692     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1693     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1694                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1695     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1696     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1697     Mv *mv0              = &current_mv->mv[0];
1698     Mv *mv1              = &current_mv->mv[1];
1699     int hshift = s->ps.sps->hshift[1];
1700     int vshift = s->ps.sps->vshift[1];
1701
1702     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1703     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1704     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1705     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1706     intptr_t _mx0 = mx0 << (1 - hshift);
1707     intptr_t _my0 = my0 << (1 - vshift);
1708     intptr_t _mx1 = mx1 << (1 - hshift);
1709     intptr_t _my1 = my1 << (1 - vshift);
1710
1711     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1712     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1713     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1714     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1715     int idx = ff_hevc_pel_weight[block_w];
1716     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1717     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1718
1719     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1720         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1721         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1722         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1723         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1724         int buf_offset1 = EPEL_EXTRA_BEFORE *
1725                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1726
1727         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1728                                  edge_emu_stride, src1stride,
1729                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1730                                  x_off0 - EPEL_EXTRA_BEFORE,
1731                                  y_off0 - EPEL_EXTRA_BEFORE,
1732                                  pic_width, pic_height);
1733
1734         src1 = lc->edge_emu_buffer + buf_offset1;
1735         src1stride = edge_emu_stride;
1736     }
1737
1738     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1739         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1740         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1741         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1742         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1743         int buf_offset1 = EPEL_EXTRA_BEFORE *
1744                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1745
1746         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1747                                  edge_emu_stride, src2stride,
1748                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1749                                  x_off1 - EPEL_EXTRA_BEFORE,
1750                                  y_off1 - EPEL_EXTRA_BEFORE,
1751                                  pic_width, pic_height);
1752
1753         src2 = lc->edge_emu_buffer2 + buf_offset1;
1754         src2stride = edge_emu_stride;
1755     }
1756
1757     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1758                                                 block_h, _mx0, _my0, block_w);
1759     if (!weight_flag)
1760         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1761                                                        src2, src2stride, lc->tmp,
1762                                                        block_h, _mx1, _my1, block_w);
1763     else
1764         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1765                                                          src2, src2stride, lc->tmp,
1766                                                          block_h,
1767                                                          s->sh.chroma_log2_weight_denom,
1768                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1769                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1770                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1771                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1772                                                          _mx1, _my1, block_w);
1773 }
1774
1775 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1776                                 const Mv *mv, int y0, int height)
1777 {
1778     if (s->threads_type == FF_THREAD_FRAME ) {
1779         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1780
1781         ff_thread_await_progress(&ref->tf, y, 0);
1782     }
1783 }
1784
1785 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1786                                   int nPbH, int log2_cb_size, int part_idx,
1787                                   int merge_idx, MvField *mv)
1788 {
1789     HEVCLocalContext *lc = s->HEVClc;
1790     enum InterPredIdc inter_pred_idc = PRED_L0;
1791     int mvp_flag;
1792
1793     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1794     mv->pred_flag = 0;
1795     if (s->sh.slice_type == HEVC_SLICE_B)
1796         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1797
1798     if (inter_pred_idc != PRED_L1) {
1799         if (s->sh.nb_refs[L0])
1800             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1801
1802         mv->pred_flag = PF_L0;
1803         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1804         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1805         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1806                                  part_idx, merge_idx, mv, mvp_flag, 0);
1807         mv->mv[0].x += lc->pu.mvd.x;
1808         mv->mv[0].y += lc->pu.mvd.y;
1809     }
1810
1811     if (inter_pred_idc != PRED_L0) {
1812         if (s->sh.nb_refs[L1])
1813             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1814
1815         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1816             AV_ZERO32(&lc->pu.mvd);
1817         } else {
1818             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1819         }
1820
1821         mv->pred_flag += PF_L1;
1822         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1823         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1824                                  part_idx, merge_idx, mv, mvp_flag, 1);
1825         mv->mv[1].x += lc->pu.mvd.x;
1826         mv->mv[1].y += lc->pu.mvd.y;
1827     }
1828 }
1829
1830 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1831                                 int nPbW, int nPbH,
1832                                 int log2_cb_size, int partIdx, int idx)
1833 {
1834 #define POS(c_idx, x, y)                                                              \
1835     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1836                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1837     HEVCLocalContext *lc = s->HEVClc;
1838     int merge_idx = 0;
1839     struct MvField current_mv = {{{ 0 }}};
1840
1841     int min_pu_width = s->ps.sps->min_pu_width;
1842
1843     MvField *tab_mvf = s->ref->tab_mvf;
1844     RefPicList  *refPicList = s->ref->refPicList;
1845     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1846     uint8_t *dst0 = POS(0, x0, y0);
1847     uint8_t *dst1 = POS(1, x0, y0);
1848     uint8_t *dst2 = POS(2, x0, y0);
1849     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1850     int min_cb_width     = s->ps.sps->min_cb_width;
1851     int x_cb             = x0 >> log2_min_cb_size;
1852     int y_cb             = y0 >> log2_min_cb_size;
1853     int x_pu, y_pu;
1854     int i, j;
1855
1856     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1857
1858     if (!skip_flag)
1859         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1860
1861     if (skip_flag || lc->pu.merge_flag) {
1862         if (s->sh.max_num_merge_cand > 1)
1863             merge_idx = ff_hevc_merge_idx_decode(s);
1864         else
1865             merge_idx = 0;
1866
1867         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1868                                    partIdx, merge_idx, &current_mv);
1869     } else {
1870         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1871                               partIdx, merge_idx, &current_mv);
1872     }
1873
1874     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1875     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1876
1877     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1878         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1879             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1880
1881     if (current_mv.pred_flag & PF_L0) {
1882         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1883         if (!ref0)
1884             return;
1885         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1886     }
1887     if (current_mv.pred_flag & PF_L1) {
1888         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1889         if (!ref1)
1890             return;
1891         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1892     }
1893
1894     if (current_mv.pred_flag == PF_L0) {
1895         int x0_c = x0 >> s->ps.sps->hshift[1];
1896         int y0_c = y0 >> s->ps.sps->vshift[1];
1897         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1898         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1899
1900         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1901                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1902                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1903                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1904
1905         if (s->ps.sps->chroma_format_idc) {
1906             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1907                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1908                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1909             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1910                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1911                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1912         }
1913     } else if (current_mv.pred_flag == PF_L1) {
1914         int x0_c = x0 >> s->ps.sps->hshift[1];
1915         int y0_c = y0 >> s->ps.sps->vshift[1];
1916         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1917         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1918
1919         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1920                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1921                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1922                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1923
1924         if (s->ps.sps->chroma_format_idc) {
1925             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1926                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1927                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1928
1929             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1930                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1931                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1932         }
1933     } else if (current_mv.pred_flag == PF_BI) {
1934         int x0_c = x0 >> s->ps.sps->hshift[1];
1935         int y0_c = y0 >> s->ps.sps->vshift[1];
1936         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1937         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1938
1939         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1940                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1941                    ref1->frame, &current_mv.mv[1], &current_mv);
1942
1943         if (s->ps.sps->chroma_format_idc) {
1944             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1945                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1946
1947             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1948                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1949         }
1950     }
1951 }
1952
1953 /**
1954  * 8.4.1
1955  */
1956 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1957                                 int prev_intra_luma_pred_flag)
1958 {
1959     HEVCLocalContext *lc = s->HEVClc;
1960     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1961     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1962     int min_pu_width     = s->ps.sps->min_pu_width;
1963     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1964     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1965     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1966
1967     int cand_up   = (lc->ctb_up_flag || y0b) ?
1968                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1969     int cand_left = (lc->ctb_left_flag || x0b) ?
1970                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1971
1972     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1973
1974     MvField *tab_mvf = s->ref->tab_mvf;
1975     int intra_pred_mode;
1976     int candidate[3];
1977     int i, j;
1978
1979     // intra_pred_mode prediction does not cross vertical CTB boundaries
1980     if ((y0 - 1) < y_ctb)
1981         cand_up = INTRA_DC;
1982
1983     if (cand_left == cand_up) {
1984         if (cand_left < 2) {
1985             candidate[0] = INTRA_PLANAR;
1986             candidate[1] = INTRA_DC;
1987             candidate[2] = INTRA_ANGULAR_26;
1988         } else {
1989             candidate[0] = cand_left;
1990             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1991             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1992         }
1993     } else {
1994         candidate[0] = cand_left;
1995         candidate[1] = cand_up;
1996         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1997             candidate[2] = INTRA_PLANAR;
1998         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1999             candidate[2] = INTRA_DC;
2000         } else {
2001             candidate[2] = INTRA_ANGULAR_26;
2002         }
2003     }
2004
2005     if (prev_intra_luma_pred_flag) {
2006         intra_pred_mode = candidate[lc->pu.mpm_idx];
2007     } else {
2008         if (candidate[0] > candidate[1])
2009             FFSWAP(uint8_t, candidate[0], candidate[1]);
2010         if (candidate[0] > candidate[2])
2011             FFSWAP(uint8_t, candidate[0], candidate[2]);
2012         if (candidate[1] > candidate[2])
2013             FFSWAP(uint8_t, candidate[1], candidate[2]);
2014
2015         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
2016         for (i = 0; i < 3; i++)
2017             if (intra_pred_mode >= candidate[i])
2018                 intra_pred_mode++;
2019     }
2020
2021     /* write the intra prediction units into the mv array */
2022     if (!size_in_pus)
2023         size_in_pus = 1;
2024     for (i = 0; i < size_in_pus; i++) {
2025         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
2026                intra_pred_mode, size_in_pus);
2027
2028         for (j = 0; j < size_in_pus; j++) {
2029             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
2030         }
2031     }
2032
2033     return intra_pred_mode;
2034 }
2035
2036 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2037                                           int log2_cb_size, int ct_depth)
2038 {
2039     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
2040     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
2041     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
2042     int y;
2043
2044     for (y = 0; y < length; y++)
2045         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2046                ct_depth, length);
2047 }
2048
2049 static const uint8_t tab_mode_idx[] = {
2050      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2051     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2052
2053 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2054                                   int log2_cb_size)
2055 {
2056     HEVCLocalContext *lc = s->HEVClc;
2057     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2058     uint8_t prev_intra_luma_pred_flag[4];
2059     int split   = lc->cu.part_mode == PART_NxN;
2060     int pb_size = (1 << log2_cb_size) >> split;
2061     int side    = split + 1;
2062     int chroma_mode;
2063     int i, j;
2064
2065     for (i = 0; i < side; i++)
2066         for (j = 0; j < side; j++)
2067             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2068
2069     for (i = 0; i < side; i++) {
2070         for (j = 0; j < side; j++) {
2071             if (prev_intra_luma_pred_flag[2 * i + j])
2072                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2073             else
2074                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2075
2076             lc->pu.intra_pred_mode[2 * i + j] =
2077                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2078                                      prev_intra_luma_pred_flag[2 * i + j]);
2079         }
2080     }
2081
2082     if (s->ps.sps->chroma_format_idc == 3) {
2083         for (i = 0; i < side; i++) {
2084             for (j = 0; j < side; j++) {
2085                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2086                 if (chroma_mode != 4) {
2087                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2088                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2089                     else
2090                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2091                 } else {
2092                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2093                 }
2094             }
2095         }
2096     } else if (s->ps.sps->chroma_format_idc == 2) {
2097         int mode_idx;
2098         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2099         if (chroma_mode != 4) {
2100             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2101                 mode_idx = 34;
2102             else
2103                 mode_idx = intra_chroma_table[chroma_mode];
2104         } else {
2105             mode_idx = lc->pu.intra_pred_mode[0];
2106         }
2107         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2108     } else if (s->ps.sps->chroma_format_idc != 0) {
2109         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2110         if (chroma_mode != 4) {
2111             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2112                 lc->pu.intra_pred_mode_c[0] = 34;
2113             else
2114                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2115         } else {
2116             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2117         }
2118     }
2119 }
2120
2121 static void intra_prediction_unit_default_value(HEVCContext *s,
2122                                                 int x0, int y0,
2123                                                 int log2_cb_size)
2124 {
2125     HEVCLocalContext *lc = s->HEVClc;
2126     int pb_size          = 1 << log2_cb_size;
2127     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2128     int min_pu_width     = s->ps.sps->min_pu_width;
2129     MvField *tab_mvf     = s->ref->tab_mvf;
2130     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2131     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2132     int j, k;
2133
2134     if (size_in_pus == 0)
2135         size_in_pus = 1;
2136     for (j = 0; j < size_in_pus; j++)
2137         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2138     if (lc->cu.pred_mode == MODE_INTRA)
2139         for (j = 0; j < size_in_pus; j++)
2140             for (k = 0; k < size_in_pus; k++)
2141                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2142 }
2143
2144 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2145 {
2146     int cb_size          = 1 << log2_cb_size;
2147     HEVCLocalContext *lc = s->HEVClc;
2148     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2149     int length           = cb_size >> log2_min_cb_size;
2150     int min_cb_width     = s->ps.sps->min_cb_width;
2151     int x_cb             = x0 >> log2_min_cb_size;
2152     int y_cb             = y0 >> log2_min_cb_size;
2153     int idx              = log2_cb_size - 2;
2154     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2155     int x, y, ret;
2156
2157     lc->cu.x                = x0;
2158     lc->cu.y                = y0;
2159     lc->cu.pred_mode        = MODE_INTRA;
2160     lc->cu.part_mode        = PART_2Nx2N;
2161     lc->cu.intra_split_flag = 0;
2162
2163     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2164     for (x = 0; x < 4; x++)
2165         lc->pu.intra_pred_mode[x] = 1;
2166     if (s->ps.pps->transquant_bypass_enable_flag) {
2167         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2168         if (lc->cu.cu_transquant_bypass_flag)
2169             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2170     } else
2171         lc->cu.cu_transquant_bypass_flag = 0;
2172
2173     if (s->sh.slice_type != HEVC_SLICE_I) {
2174         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2175
2176         x = y_cb * min_cb_width + x_cb;
2177         for (y = 0; y < length; y++) {
2178             memset(&s->skip_flag[x], skip_flag, length);
2179             x += min_cb_width;
2180         }
2181         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2182     } else {
2183         x = y_cb * min_cb_width + x_cb;
2184         for (y = 0; y < length; y++) {
2185             memset(&s->skip_flag[x], 0, length);
2186             x += min_cb_width;
2187         }
2188     }
2189
2190     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2191         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2192         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2193
2194         if (!s->sh.disable_deblocking_filter_flag)
2195             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2196     } else {
2197         int pcm_flag = 0;
2198
2199         if (s->sh.slice_type != HEVC_SLICE_I)
2200             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2201         if (lc->cu.pred_mode != MODE_INTRA ||
2202             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2203             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2204             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2205                                       lc->cu.pred_mode == MODE_INTRA;
2206         }
2207
2208         if (lc->cu.pred_mode == MODE_INTRA) {
2209             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2210                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2211                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2212                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2213             }
2214             if (pcm_flag) {
2215                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2216                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2217                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2218                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2219
2220                 if (ret < 0)
2221                     return ret;
2222             } else {
2223                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2224             }
2225         } else {
2226             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2227             switch (lc->cu.part_mode) {
2228             case PART_2Nx2N:
2229                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2230                 break;
2231             case PART_2NxN:
2232                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2233                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2234                 break;
2235             case PART_Nx2N:
2236                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2237                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2238                 break;
2239             case PART_2NxnU:
2240                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2241                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2242                 break;
2243             case PART_2NxnD:
2244                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2245                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2246                 break;
2247             case PART_nLx2N:
2248                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2249                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2250                 break;
2251             case PART_nRx2N:
2252                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2253                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2254                 break;
2255             case PART_NxN:
2256                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2257                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2258                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2259                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2260                 break;
2261             }
2262         }
2263
2264         if (!pcm_flag) {
2265             int rqt_root_cbf = 1;
2266
2267             if (lc->cu.pred_mode != MODE_INTRA &&
2268                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2269                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2270             }
2271             if (rqt_root_cbf) {
2272                 const static int cbf[2] = { 0 };
2273                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2274                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2275                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2276                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2277                                          log2_cb_size,
2278                                          log2_cb_size, 0, 0, cbf, cbf);
2279                 if (ret < 0)
2280                     return ret;
2281             } else {
2282                 if (!s->sh.disable_deblocking_filter_flag)
2283                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2284             }
2285         }
2286     }
2287
2288     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2289         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2290
2291     x = y_cb * min_cb_width + x_cb;
2292     for (y = 0; y < length; y++) {
2293         memset(&s->qp_y_tab[x], lc->qp_y, length);
2294         x += min_cb_width;
2295     }
2296
2297     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2298        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2299         lc->qPy_pred = lc->qp_y;
2300     }
2301
2302     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2303
2304     return 0;
2305 }
2306
2307 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2308                                int log2_cb_size, int cb_depth)
2309 {
2310     HEVCLocalContext *lc = s->HEVClc;
2311     const int cb_size    = 1 << log2_cb_size;
2312     int ret;
2313     int split_cu;
2314
2315     lc->ct_depth = cb_depth;
2316     if (x0 + cb_size <= s->ps.sps->width  &&
2317         y0 + cb_size <= s->ps.sps->height &&
2318         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2319         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2320     } else {
2321         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2322     }
2323     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2324         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2325         lc->tu.is_cu_qp_delta_coded = 0;
2326         lc->tu.cu_qp_delta          = 0;
2327     }
2328
2329     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2330         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2331         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2332     }
2333
2334     if (split_cu) {
2335         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2336         const int cb_size_split = cb_size >> 1;
2337         const int x1 = x0 + cb_size_split;
2338         const int y1 = y0 + cb_size_split;
2339
2340         int more_data = 0;
2341
2342         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2343         if (more_data < 0)
2344             return more_data;
2345
2346         if (more_data && x1 < s->ps.sps->width) {
2347             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2348             if (more_data < 0)
2349                 return more_data;
2350         }
2351         if (more_data && y1 < s->ps.sps->height) {
2352             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2353             if (more_data < 0)
2354                 return more_data;
2355         }
2356         if (more_data && x1 < s->ps.sps->width &&
2357             y1 < s->ps.sps->height) {
2358             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2359             if (more_data < 0)
2360                 return more_data;
2361         }
2362
2363         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2364             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2365             lc->qPy_pred = lc->qp_y;
2366
2367         if (more_data)
2368             return ((x1 + cb_size_split) < s->ps.sps->width ||
2369                     (y1 + cb_size_split) < s->ps.sps->height);
2370         else
2371             return 0;
2372     } else {
2373         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2374         if (ret < 0)
2375             return ret;
2376         if ((!((x0 + cb_size) %
2377                (1 << (s->ps.sps->log2_ctb_size))) ||
2378              (x0 + cb_size >= s->ps.sps->width)) &&
2379             (!((y0 + cb_size) %
2380                (1 << (s->ps.sps->log2_ctb_size))) ||
2381              (y0 + cb_size >= s->ps.sps->height))) {
2382             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2383             return !end_of_slice_flag;
2384         } else {
2385             return 1;
2386         }
2387     }
2388
2389     return 0;
2390 }
2391
2392 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2393                                  int ctb_addr_ts)
2394 {
2395     HEVCLocalContext *lc  = s->HEVClc;
2396     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2397     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2398     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2399
2400     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2401
2402     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2403         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2404             lc->first_qp_group = 1;
2405         lc->end_of_tiles_x = s->ps.sps->width;
2406     } else if (s->ps.pps->tiles_enabled_flag) {
2407         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2408             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2409             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2410             lc->first_qp_group   = 1;
2411         }
2412     } else {
2413         lc->end_of_tiles_x = s->ps.sps->width;
2414     }
2415
2416     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2417
2418     lc->boundary_flags = 0;
2419     if (s->ps.pps->tiles_enabled_flag) {
2420         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2421             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2422         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2423             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2424         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2425             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2426         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2427             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2428     } else {
2429         if (ctb_addr_in_slice <= 0)
2430             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2431         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2432             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2433     }
2434
2435     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2436     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2437     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2438     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2439 }
2440
2441 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2442 {
2443     HEVCContext *s  = avctxt->priv_data;
2444     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2445     int more_data   = 1;
2446     int x_ctb       = 0;
2447     int y_ctb       = 0;
2448     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2449     int ret;
2450
2451     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2452         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2453         return AVERROR_INVALIDDATA;
2454     }
2455
2456     if (s->sh.dependent_slice_segment_flag) {
2457         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2458         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2459             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2460             return AVERROR_INVALIDDATA;
2461         }
2462     }
2463
2464     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2465         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2466
2467         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2468         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2469         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2470
2471         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2472         if (ret < 0) {
2473             s->tab_slice_address[ctb_addr_rs] = -1;
2474             return ret;
2475         }
2476
2477         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2478
2479         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2480         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2481         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2482
2483         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2484         if (more_data < 0) {
2485             s->tab_slice_address[ctb_addr_rs] = -1;
2486             return more_data;
2487         }
2488
2489
2490         ctb_addr_ts++;
2491         ff_hevc_save_states(s, ctb_addr_ts);
2492         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2493     }
2494
2495     if (x_ctb + ctb_size >= s->ps.sps->width &&
2496         y_ctb + ctb_size >= s->ps.sps->height)
2497         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2498
2499     return ctb_addr_ts;
2500 }
2501
2502 static int hls_slice_data(HEVCContext *s)
2503 {
2504     int arg[2];
2505     int ret[2];
2506
2507     arg[0] = 0;
2508     arg[1] = 1;
2509
2510     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2511     return ret[0];
2512 }
2513 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2514 {
2515     HEVCContext *s1  = avctxt->priv_data, *s;
2516     HEVCLocalContext *lc;
2517     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2518     int more_data   = 1;
2519     int *ctb_row_p    = input_ctb_row;
2520     int ctb_row = ctb_row_p[job];
2521     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2522     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2523     int thread = ctb_row % s1->threads_number;
2524     int ret;
2525
2526     s = s1->sList[self_id];
2527     lc = s->HEVClc;
2528
2529     if(ctb_row) {
2530         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2531         if (ret < 0)
2532             goto error;
2533         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2534     }
2535
2536     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2537         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2538         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2539
2540         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2541
2542         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2543
2544         if (atomic_load(&s1->wpp_err)) {
2545             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2546             return 0;
2547         }
2548
2549         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2550         if (ret < 0)
2551             goto error;
2552         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2553         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2554
2555         if (more_data < 0) {
2556             ret = more_data;
2557             goto error;
2558         }
2559
2560         ctb_addr_ts++;
2561
2562         ff_hevc_save_states(s, ctb_addr_ts);
2563         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2564         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2565
2566         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2567             atomic_store(&s1->wpp_err, 1);
2568             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2569             return 0;
2570         }
2571
2572         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2573             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2574             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2575             return ctb_addr_ts;
2576         }
2577         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2578         x_ctb+=ctb_size;
2579
2580         if(x_ctb >= s->ps.sps->width) {
2581             break;
2582         }
2583     }
2584     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2585
2586     return 0;
2587 error:
2588     s->tab_slice_address[ctb_addr_rs] = -1;
2589     atomic_store(&s1->wpp_err, 1);
2590     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2591     return ret;
2592 }
2593
2594 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2595 {
2596     const uint8_t *data = nal->data;
2597     int length          = nal->size;
2598     HEVCLocalContext *lc = s->HEVClc;
2599     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2600     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2601     int64_t offset;
2602     int64_t startheader, cmpt = 0;
2603     int i, j, res = 0;
2604
2605     if (!ret || !arg) {
2606         av_free(ret);
2607         av_free(arg);
2608         return AVERROR(ENOMEM);
2609     }
2610
2611     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2612         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2613             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2614             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2615         );
2616         res = AVERROR_INVALIDDATA;
2617         goto error;
2618     }
2619
2620     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2621
2622     if (!s->sList[1]) {
2623         for (i = 1; i < s->threads_number; i++) {
2624             s->sList[i] = av_malloc(sizeof(HEVCContext));
2625             memcpy(s->sList[i], s, sizeof(HEVCContext));
2626             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2627             s->sList[i]->HEVClc = s->HEVClcList[i];
2628         }
2629     }
2630
2631     offset = (lc->gb.index >> 3);
2632
2633     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2634         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2635             startheader--;
2636             cmpt++;
2637         }
2638     }
2639
2640     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2641         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2642         for (j = 0, cmpt = 0, startheader = offset
2643              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2644             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2645                 startheader--;
2646                 cmpt++;
2647             }
2648         }
2649         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2650         s->sh.offset[i - 1] = offset;
2651
2652     }
2653     if (s->sh.num_entry_point_offsets != 0) {
2654         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2655         if (length < offset) {
2656             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2657             res = AVERROR_INVALIDDATA;
2658             goto error;
2659         }
2660         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2661         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2662
2663     }
2664     s->data = data;
2665
2666     for (i = 1; i < s->threads_number; i++) {
2667         s->sList[i]->HEVClc->first_qp_group = 1;
2668         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2669         memcpy(s->sList[i], s, sizeof(HEVCContext));
2670         s->sList[i]->HEVClc = s->HEVClcList[i];
2671     }
2672
2673     atomic_store(&s->wpp_err, 0);
2674     ff_reset_entries(s->avctx);
2675
2676     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2677         arg[i] = i;
2678         ret[i] = 0;
2679     }
2680
2681     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2682         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2683
2684     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2685         res += ret[i];
2686 error:
2687     av_free(ret);
2688     av_free(arg);
2689     return res;
2690 }
2691
2692 static int set_side_data(HEVCContext *s)
2693 {
2694     AVFrame *out = s->ref->frame;
2695
2696     if (s->sei.frame_packing.present &&
2697         s->sei.frame_packing.arrangement_type >= 3 &&
2698         s->sei.frame_packing.arrangement_type <= 5 &&
2699         s->sei.frame_packing.content_interpretation_type > 0 &&
2700         s->sei.frame_packing.content_interpretation_type < 3) {
2701         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2702         if (!stereo)
2703             return AVERROR(ENOMEM);
2704
2705         switch (s->sei.frame_packing.arrangement_type) {
2706         case 3:
2707             if (s->sei.frame_packing.quincunx_subsampling)
2708                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2709             else
2710                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2711             break;
2712         case 4:
2713             stereo->type = AV_STEREO3D_TOPBOTTOM;
2714             break;
2715         case 5:
2716             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2717             break;
2718         }
2719
2720         if (s->sei.frame_packing.content_interpretation_type == 2)
2721             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2722
2723         if (s->sei.frame_packing.arrangement_type == 5) {
2724             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2725                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2726             else
2727                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2728         }
2729     }
2730
2731     if (s->sei.display_orientation.present &&
2732         (s->sei.display_orientation.anticlockwise_rotation ||
2733          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2734         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2735         AVFrameSideData *rotation = av_frame_new_side_data(out,
2736                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2737                                                            sizeof(int32_t) * 9);
2738         if (!rotation)
2739             return AVERROR(ENOMEM);
2740
2741         av_display_rotation_set((int32_t *)rotation->data, angle);
2742         av_display_matrix_flip((int32_t *)rotation->data,
2743                                s->sei.display_orientation.hflip,
2744                                s->sei.display_orientation.vflip);
2745     }
2746
2747     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2748     // so the side data persists for the entire coded video sequence.
2749     if (s->sei.mastering_display.present > 0 &&
2750         IS_IRAP(s) && s->no_rasl_output_flag) {
2751         s->sei.mastering_display.present--;
2752     }
2753     if (s->sei.mastering_display.present) {
2754         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2755         const int mapping[3] = {2, 0, 1};
2756         const int chroma_den = 50000;
2757         const int luma_den = 10000;
2758         int i;
2759         AVMasteringDisplayMetadata *metadata =
2760             av_mastering_display_metadata_create_side_data(out);
2761         if (!metadata)
2762             return AVERROR(ENOMEM);
2763
2764         for (i = 0; i < 3; i++) {
2765             const int j = mapping[i];
2766             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2767             metadata->display_primaries[i][0].den = chroma_den;
2768             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2769             metadata->display_primaries[i][1].den = chroma_den;
2770         }
2771         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2772         metadata->white_point[0].den = chroma_den;
2773         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2774         metadata->white_point[1].den = chroma_den;
2775
2776         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2777         metadata->max_luminance.den = luma_den;
2778         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2779         metadata->min_luminance.den = luma_den;
2780         metadata->has_luminance = 1;
2781         metadata->has_primaries = 1;
2782
2783         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2784         av_log(s->avctx, AV_LOG_DEBUG,
2785                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2786                av_q2d(metadata->display_primaries[0][0]),
2787                av_q2d(metadata->display_primaries[0][1]),
2788                av_q2d(metadata->display_primaries[1][0]),
2789                av_q2d(metadata->display_primaries[1][1]),
2790                av_q2d(metadata->display_primaries[2][0]),
2791                av_q2d(metadata->display_primaries[2][1]),
2792                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2793         av_log(s->avctx, AV_LOG_DEBUG,
2794                "min_luminance=%f, max_luminance=%f\n",
2795                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2796     }
2797     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2798     // so the side data persists for the entire coded video sequence.
2799     if (s->sei.content_light.present > 0 &&
2800         IS_IRAP(s) && s->no_rasl_output_flag) {
2801         s->sei.content_light.present--;
2802     }
2803     if (s->sei.content_light.present) {
2804         AVContentLightMetadata *metadata =
2805             av_content_light_metadata_create_side_data(out);
2806         if (!metadata)
2807             return AVERROR(ENOMEM);
2808         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2809         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2810
2811         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2812         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2813                metadata->MaxCLL, metadata->MaxFALL);
2814     }
2815
2816     if (s->sei.a53_caption.buf_ref) {
2817         HEVCSEIA53Caption *a53 = &s->sei.a53_caption;
2818
2819         AVFrameSideData *sd = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_A53_CC, a53->buf_ref);
2820         if (!sd)
2821             av_buffer_unref(&a53->buf_ref);
2822         a53->buf_ref = NULL;
2823     }
2824
2825     for (int i = 0; i < s->sei.unregistered.nb_buf_ref; i++) {
2826         HEVCSEIUnregistered *unreg = &s->sei.unregistered;
2827
2828         if (unreg->buf_ref[i]) {
2829             AVFrameSideData *sd = av_frame_new_side_data_from_buf(out,
2830                     AV_FRAME_DATA_SEI_UNREGISTERED,
2831                     unreg->buf_ref[i]);
2832             if (!sd)
2833                 av_buffer_unref(&unreg->buf_ref[i]);
2834             unreg->buf_ref[i] = NULL;
2835         }
2836     }
2837     s->sei.unregistered.nb_buf_ref = 0;
2838
2839     if (s->sei.timecode.present) {
2840         uint32_t *tc_sd;
2841         char tcbuf[AV_TIMECODE_STR_SIZE];
2842         AVFrameSideData *tcside = av_frame_new_side_data(out, AV_FRAME_DATA_S12M_TIMECODE,
2843                                                          sizeof(uint32_t) * 4);
2844         if (!tcside)
2845             return AVERROR(ENOMEM);
2846
2847         tc_sd = (uint32_t*)tcside->data;
2848         tc_sd[0] = s->sei.timecode.num_clock_ts;
2849
2850         for (int i = 0; i < tc_sd[0]; i++) {
2851             int drop = s->sei.timecode.cnt_dropped_flag[i];
2852             int   hh = s->sei.timecode.hours_value[i];
2853             int   mm = s->sei.timecode.minutes_value[i];
2854             int   ss = s->sei.timecode.seconds_value[i];
2855             int   ff = s->sei.timecode.n_frames[i];
2856
2857             tc_sd[i + 1] = av_timecode_get_smpte(s->avctx->framerate, drop, hh, mm, ss, ff);
2858             av_timecode_make_smpte_tc_string2(tcbuf, s->avctx->framerate, tc_sd[i + 1], 0, 0);
2859             av_dict_set(&out->metadata, "timecode", tcbuf, 0);
2860         }
2861
2862         s->sei.timecode.num_clock_ts = 0;
2863     }
2864
2865     return 0;
2866 }
2867
2868 static int hevc_frame_start(HEVCContext *s)
2869 {
2870     HEVCLocalContext *lc = s->HEVClc;
2871     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2872                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2873     int ret;
2874
2875     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2876     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2877     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2878     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2879     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2880
2881     s->is_decoded        = 0;
2882     s->first_nal_type    = s->nal_unit_type;
2883
2884     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2885
2886     if (s->ps.pps->tiles_enabled_flag)
2887         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2888
2889     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2890     if (ret < 0)
2891         goto fail;
2892
2893     ret = ff_hevc_frame_rps(s);
2894     if (ret < 0) {
2895         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2896         goto fail;
2897     }
2898
2899     s->ref->frame->key_frame = IS_IRAP(s);
2900
2901     ret = set_side_data(s);
2902     if (ret < 0)
2903         goto fail;
2904
2905     s->frame->pict_type = 3 - s->sh.slice_type;
2906
2907     if (!IS_IRAP(s))
2908         ff_hevc_bump_frame(s);
2909
2910     av_frame_unref(s->output_frame);
2911     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2912     if (ret < 0)
2913         goto fail;
2914
2915     if (!s->avctx->hwaccel)
2916         ff_thread_finish_setup(s->avctx);
2917
2918     return 0;
2919
2920 fail:
2921     if (s->ref)
2922         ff_hevc_unref_frame(s, s->ref, ~0);
2923     s->ref = NULL;
2924     return ret;
2925 }
2926
2927 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2928 {
2929     HEVCLocalContext *lc = s->HEVClc;
2930     GetBitContext *gb    = &lc->gb;
2931     int ctb_addr_ts, ret;
2932
2933     *gb              = nal->gb;
2934     s->nal_unit_type = nal->type;
2935     s->temporal_id   = nal->temporal_id;
2936
2937     switch (s->nal_unit_type) {
2938     case HEVC_NAL_VPS:
2939         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2940             ret = s->avctx->hwaccel->decode_params(s->avctx,
2941                                                    nal->type,
2942                                                    nal->raw_data,
2943                                                    nal->raw_size);
2944             if (ret < 0)
2945                 goto fail;
2946         }
2947         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2948         if (ret < 0)
2949             goto fail;
2950         break;
2951     case HEVC_NAL_SPS:
2952         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2953             ret = s->avctx->hwaccel->decode_params(s->avctx,
2954                                                    nal->type,
2955                                                    nal->raw_data,
2956                                                    nal->raw_size);
2957             if (ret < 0)
2958                 goto fail;
2959         }
2960         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2961                                      s->apply_defdispwin);
2962         if (ret < 0)
2963             goto fail;
2964         break;
2965     case HEVC_NAL_PPS:
2966         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2967             ret = s->avctx->hwaccel->decode_params(s->avctx,
2968                                                    nal->type,
2969                                                    nal->raw_data,
2970                                                    nal->raw_size);
2971             if (ret < 0)
2972                 goto fail;
2973         }
2974         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2975         if (ret < 0)
2976             goto fail;
2977         break;
2978     case HEVC_NAL_SEI_PREFIX:
2979     case HEVC_NAL_SEI_SUFFIX:
2980         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2981             ret = s->avctx->hwaccel->decode_params(s->avctx,
2982                                                    nal->type,
2983                                                    nal->raw_data,
2984                                                    nal->raw_size);
2985             if (ret < 0)
2986                 goto fail;
2987         }
2988         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2989         if (ret < 0)
2990             goto fail;
2991         break;
2992     case HEVC_NAL_TRAIL_R:
2993     case HEVC_NAL_TRAIL_N:
2994     case HEVC_NAL_TSA_N:
2995     case HEVC_NAL_TSA_R:
2996     case HEVC_NAL_STSA_N:
2997     case HEVC_NAL_STSA_R:
2998     case HEVC_NAL_BLA_W_LP:
2999     case HEVC_NAL_BLA_W_RADL:
3000     case HEVC_NAL_BLA_N_LP:
3001     case HEVC_NAL_IDR_W_RADL:
3002     case HEVC_NAL_IDR_N_LP:
3003     case HEVC_NAL_CRA_NUT:
3004     case HEVC_NAL_RADL_N:
3005     case HEVC_NAL_RADL_R:
3006     case HEVC_NAL_RASL_N:
3007     case HEVC_NAL_RASL_R:
3008         ret = hls_slice_header(s);
3009         if (ret < 0)
3010             return ret;
3011         if (ret == 1) {
3012             ret = AVERROR_INVALIDDATA;
3013             goto fail;
3014         }
3015
3016
3017         if (
3018             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
3019             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
3020             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
3021             break;
3022         }
3023
3024         if (s->sh.first_slice_in_pic_flag) {
3025             if (s->max_ra == INT_MAX) {
3026                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
3027                     s->max_ra = s->poc;
3028                 } else {
3029                     if (IS_IDR(s))
3030                         s->max_ra = INT_MIN;
3031                 }
3032             }
3033
3034             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
3035                 s->poc <= s->max_ra) {
3036                 s->is_decoded = 0;
3037                 break;
3038             } else {
3039                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
3040                     s->max_ra = INT_MIN;
3041             }
3042
3043             s->overlap ++;
3044             ret = hevc_frame_start(s);
3045             if (ret < 0)
3046                 return ret;
3047         } else if (!s->ref) {
3048             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
3049             goto fail;
3050         }
3051
3052         if (s->nal_unit_type != s->first_nal_type) {
3053             av_log(s->avctx, AV_LOG_ERROR,
3054                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
3055                    s->first_nal_type, s->nal_unit_type);
3056             return AVERROR_INVALIDDATA;
3057         }
3058
3059         if (!s->sh.dependent_slice_segment_flag &&
3060             s->sh.slice_type != HEVC_SLICE_I) {
3061             ret = ff_hevc_slice_rpl(s);
3062             if (ret < 0) {
3063                 av_log(s->avctx, AV_LOG_WARNING,
3064                        "Error constructing the reference lists for the current slice.\n");
3065                 goto fail;
3066             }
3067         }
3068
3069         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
3070             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
3071             if (ret < 0)
3072                 goto fail;
3073         }
3074
3075         if (s->avctx->hwaccel) {
3076             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
3077             if (ret < 0)
3078                 goto fail;
3079         } else {
3080             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3081                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3082             else
3083                 ctb_addr_ts = hls_slice_data(s);
3084             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3085                 s->is_decoded = 1;
3086             }
3087
3088             if (ctb_addr_ts < 0) {
3089                 ret = ctb_addr_ts;
3090                 goto fail;
3091             }
3092         }
3093         break;
3094     case HEVC_NAL_EOS_NUT:
3095     case HEVC_NAL_EOB_NUT:
3096         s->seq_decode = (s->seq_decode + 1) & 0xff;
3097         s->max_ra     = INT_MAX;
3098         break;
3099     case HEVC_NAL_AUD:
3100     case HEVC_NAL_FD_NUT:
3101         break;
3102     default:
3103         av_log(s->avctx, AV_LOG_INFO,
3104                "Skipping NAL unit %d\n", s->nal_unit_type);
3105     }
3106
3107     return 0;
3108 fail:
3109     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3110         return ret;
3111     return 0;
3112 }
3113
3114 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3115 {
3116     int i, ret = 0;
3117     int eos_at_start = 1;
3118
3119     s->ref = NULL;
3120     s->last_eos = s->eos;
3121     s->eos = 0;
3122     s->overlap = 0;
3123
3124     /* split the input packet into NAL units, so we know the upper bound on the
3125      * number of slices in the frame */
3126     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3127                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3128     if (ret < 0) {
3129         av_log(s->avctx, AV_LOG_ERROR,
3130                "Error splitting the input into NAL units.\n");
3131         return ret;
3132     }
3133
3134     for (i = 0; i < s->pkt.nb_nals; i++) {
3135         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3136             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3137             if (eos_at_start) {
3138                 s->last_eos = 1;
3139             } else {
3140                 s->eos = 1;
3141             }
3142         } else {
3143             eos_at_start = 0;
3144         }
3145     }
3146
3147     /* decode the NAL units */
3148     for (i = 0; i < s->pkt.nb_nals; i++) {
3149         H2645NAL *nal = &s->pkt.nals[i];
3150
3151         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3152             (s->avctx->skip_frame >= AVDISCARD_NONREF
3153             && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
3154             continue;
3155
3156         ret = decode_nal_unit(s, nal);
3157         if (ret >= 0 && s->overlap > 2)
3158             ret = AVERROR_INVALIDDATA;
3159         if (ret < 0) {
3160             av_log(s->avctx, AV_LOG_WARNING,
3161                    "Error parsing NAL unit #%d.\n", i);
3162             goto fail;
3163         }
3164     }
3165
3166 fail:
3167     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3168         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3169
3170     return ret;
3171 }
3172
3173 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3174 {
3175     int i;
3176     for (i = 0; i < 16; i++)
3177         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3178 }
3179
3180 static int verify_md5(HEVCContext *s, AVFrame *frame)
3181 {
3182     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3183     int pixel_shift;
3184     int i, j;
3185
3186     if (!desc)
3187         return AVERROR(EINVAL);
3188
3189     pixel_shift = desc->comp[0].depth > 8;
3190
3191     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3192            s->poc);
3193
3194     /* the checksums are LE, so we have to byteswap for >8bpp formats
3195      * on BE arches */
3196 #if HAVE_BIGENDIAN
3197     if (pixel_shift && !s->checksum_buf) {
3198         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3199                        FFMAX3(frame->linesize[0], frame->linesize[1],
3200                               frame->linesize[2]));
3201         if (!s->checksum_buf)
3202             return AVERROR(ENOMEM);
3203     }
3204 #endif
3205
3206     for (i = 0; frame->data[i]; i++) {
3207         int width  = s->avctx->coded_width;
3208         int height = s->avctx->coded_height;
3209         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3210         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3211         uint8_t md5[16];
3212
3213         av_md5_init(s->md5_ctx);
3214         for (j = 0; j < h; j++) {
3215             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3216 #if HAVE_BIGENDIAN
3217             if (pixel_shift) {
3218                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3219                                     (const uint16_t *) src, w);
3220                 src = s->checksum_buf;
3221             }
3222 #endif
3223             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3224         }
3225         av_md5_final(s->md5_ctx, md5);
3226
3227         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3228             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3229             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3230             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3231         } else {
3232             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3233             print_md5(s->avctx, AV_LOG_ERROR, md5);
3234             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3235             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3236             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3237             return AVERROR_INVALIDDATA;
3238         }
3239     }
3240
3241     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3242
3243     return 0;
3244 }
3245
3246 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3247 {
3248     int ret, i;
3249
3250     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3251                                    &s->nal_length_size, s->avctx->err_recognition,
3252                                    s->apply_defdispwin, s->avctx);
3253     if (ret < 0)
3254         return ret;
3255
3256     /* export stream parameters from the first SPS */
3257     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3258         if (first && s->ps.sps_list[i]) {
3259             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3260             export_stream_params(s, sps);
3261             break;
3262         }
3263     }
3264
3265     /* export stream parameters from SEI */
3266     ret = export_stream_params_from_sei(s);
3267     if (ret < 0)
3268         return ret;
3269
3270     return 0;
3271 }
3272
3273 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3274                              AVPacket *avpkt)
3275 {
3276     int ret;
3277     int new_extradata_size;
3278     uint8_t *new_extradata;
3279     HEVCContext *s = avctx->priv_data;
3280
3281     if (!avpkt->size) {
3282         ret = ff_hevc_output_frame(s, data, 1);
3283         if (ret < 0)
3284             return ret;
3285
3286         *got_output = ret;
3287         return 0;
3288     }
3289
3290     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3291                                             &new_extradata_size);
3292     if (new_extradata && new_extradata_size > 0) {
3293         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3294         if (ret < 0)
3295             return ret;
3296     }
3297
3298     s->ref = NULL;
3299     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3300     if (ret < 0)
3301         return ret;
3302
3303     if (avctx->hwaccel) {
3304         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3305             av_log(avctx, AV_LOG_ERROR,
3306                    "hardware accelerator failed to decode picture\n");
3307             ff_hevc_unref_frame(s, s->ref, ~0);
3308             return ret;
3309         }
3310     } else {
3311         /* verify the SEI checksum */
3312         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3313             s->sei.picture_hash.is_md5) {
3314             ret = verify_md5(s, s->ref->frame);
3315             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3316                 ff_hevc_unref_frame(s, s->ref, ~0);
3317                 return ret;
3318             }
3319         }
3320     }
3321     s->sei.picture_hash.is_md5 = 0;
3322
3323     if (s->is_decoded) {
3324         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3325         s->is_decoded = 0;
3326     }
3327
3328     if (s->output_frame->buf[0]) {
3329         av_frame_move_ref(data, s->output_frame);
3330         *got_output = 1;
3331     }
3332
3333     return avpkt->size;
3334 }
3335
3336 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3337 {
3338     int ret;
3339
3340     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3341     if (ret < 0)
3342         return ret;
3343
3344     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3345     if (!dst->tab_mvf_buf)
3346         goto fail;
3347     dst->tab_mvf = src->tab_mvf;
3348
3349     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3350     if (!dst->rpl_tab_buf)
3351         goto fail;
3352     dst->rpl_tab = src->rpl_tab;
3353
3354     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3355     if (!dst->rpl_buf)
3356         goto fail;
3357
3358     dst->poc        = src->poc;
3359     dst->ctb_count  = src->ctb_count;
3360     dst->flags      = src->flags;
3361     dst->sequence   = src->sequence;
3362
3363     if (src->hwaccel_picture_private) {
3364         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3365         if (!dst->hwaccel_priv_buf)
3366             goto fail;
3367         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3368     }
3369
3370     return 0;
3371 fail:
3372     ff_hevc_unref_frame(s, dst, ~0);
3373     return AVERROR(ENOMEM);
3374 }
3375
3376 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3377 {
3378     HEVCContext       *s = avctx->priv_data;
3379     int i;
3380
3381     pic_arrays_free(s);
3382
3383     av_freep(&s->md5_ctx);
3384
3385     av_freep(&s->cabac_state);
3386
3387     for (i = 0; i < 3; i++) {
3388         av_freep(&s->sao_pixel_buffer_h[i]);
3389         av_freep(&s->sao_pixel_buffer_v[i]);
3390     }
3391     av_frame_free(&s->output_frame);
3392
3393     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3394         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3395         av_frame_free(&s->DPB[i].frame);
3396     }
3397
3398     ff_hevc_ps_uninit(&s->ps);
3399
3400     av_freep(&s->sh.entry_point_offset);
3401     av_freep(&s->sh.offset);
3402     av_freep(&s->sh.size);
3403
3404     for (i = 1; i < s->threads_number; i++) {
3405         HEVCLocalContext *lc = s->HEVClcList[i];
3406         if (lc) {
3407             av_freep(&s->HEVClcList[i]);
3408             av_freep(&s->sList[i]);
3409         }
3410     }
3411     if (s->HEVClc == s->HEVClcList[0])
3412         s->HEVClc = NULL;
3413     av_freep(&s->HEVClcList[0]);
3414
3415     ff_h2645_packet_uninit(&s->pkt);
3416
3417     ff_hevc_reset_sei(&s->sei);
3418
3419     return 0;
3420 }
3421
3422 static av_cold int hevc_init_context(AVCodecContext *avctx)
3423 {
3424     HEVCContext *s = avctx->priv_data;
3425     int i;
3426
3427     s->avctx = avctx;
3428
3429     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3430     if (!s->HEVClc)
3431         goto fail;
3432     s->HEVClcList[0] = s->HEVClc;
3433     s->sList[0] = s;
3434
3435     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3436     if (!s->cabac_state)
3437         goto fail;
3438
3439     s->output_frame = av_frame_alloc();
3440     if (!s->output_frame)
3441         goto fail;
3442
3443     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3444         s->DPB[i].frame = av_frame_alloc();
3445         if (!s->DPB[i].frame)
3446             goto fail;
3447         s->DPB[i].tf.f = s->DPB[i].frame;
3448     }
3449
3450     s->max_ra = INT_MAX;
3451
3452     s->md5_ctx = av_md5_alloc();
3453     if (!s->md5_ctx)
3454         goto fail;
3455
3456     ff_bswapdsp_init(&s->bdsp);
3457
3458     s->context_initialized = 1;
3459     s->eos = 0;
3460
3461     ff_hevc_reset_sei(&s->sei);
3462
3463     return 0;
3464
3465 fail:
3466     hevc_decode_free(avctx);
3467     return AVERROR(ENOMEM);
3468 }
3469
3470 #if HAVE_THREADS
3471 static int hevc_update_thread_context(AVCodecContext *dst,
3472                                       const AVCodecContext *src)
3473 {
3474     HEVCContext *s  = dst->priv_data;
3475     HEVCContext *s0 = src->priv_data;
3476     int i, ret;
3477
3478     if (!s->context_initialized) {
3479         ret = hevc_init_context(dst);
3480         if (ret < 0)
3481             return ret;
3482     }
3483
3484     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3485         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3486         if (s0->DPB[i].frame->buf[0]) {
3487             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3488             if (ret < 0)
3489                 return ret;
3490         }
3491     }
3492
3493     if (s->ps.sps != s0->ps.sps)
3494         s->ps.sps = NULL;
3495     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3496         av_buffer_unref(&s->ps.vps_list[i]);
3497         if (s0->ps.vps_list[i]) {
3498             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3499             if (!s->ps.vps_list[i])
3500                 return AVERROR(ENOMEM);
3501         }
3502     }
3503
3504     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3505         av_buffer_unref(&s->ps.sps_list[i]);
3506         if (s0->ps.sps_list[i]) {
3507             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3508             if (!s->ps.sps_list[i])
3509                 return AVERROR(ENOMEM);
3510         }
3511     }
3512
3513     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3514         av_buffer_unref(&s->ps.pps_list[i]);
3515         if (s0->ps.pps_list[i]) {
3516             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3517             if (!s->ps.pps_list[i])
3518                 return AVERROR(ENOMEM);
3519         }
3520     }
3521
3522     if (s->ps.sps != s0->ps.sps)
3523         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3524             return ret;
3525
3526     s->seq_decode = s0->seq_decode;
3527     s->seq_output = s0->seq_output;
3528     s->pocTid0    = s0->pocTid0;
3529     s->max_ra     = s0->max_ra;
3530     s->eos        = s0->eos;
3531     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3532
3533     s->is_nalff        = s0->is_nalff;
3534     s->nal_length_size = s0->nal_length_size;
3535
3536     s->threads_number      = s0->threads_number;
3537     s->threads_type        = s0->threads_type;
3538
3539     if (s0->eos) {
3540         s->seq_decode = (s->seq_decode + 1) & 0xff;
3541         s->max_ra = INT_MAX;
3542     }
3543
3544     av_buffer_unref(&s->sei.a53_caption.buf_ref);
3545     if (s0->sei.a53_caption.buf_ref) {
3546         s->sei.a53_caption.buf_ref = av_buffer_ref(s0->sei.a53_caption.buf_ref);
3547         if (!s->sei.a53_caption.buf_ref)
3548             return AVERROR(ENOMEM);
3549     }
3550
3551     s->sei.frame_packing        = s0->sei.frame_packing;
3552     s->sei.display_orientation  = s0->sei.display_orientation;
3553     s->sei.mastering_display    = s0->sei.mastering_display;
3554     s->sei.content_light        = s0->sei.content_light;
3555     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3556
3557     ret = export_stream_params_from_sei(s);
3558     if (ret < 0)
3559         return ret;
3560
3561     return 0;
3562 }
3563 #endif
3564
3565 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3566 {
3567     HEVCContext *s = avctx->priv_data;
3568     int ret;
3569
3570     ret = hevc_init_context(avctx);
3571     if (ret < 0)
3572         return ret;
3573
3574     s->enable_parallel_tiles = 0;
3575     s->sei.picture_timing.picture_struct = 0;
3576     s->eos = 1;
3577
3578     atomic_init(&s->wpp_err, 0);
3579
3580     if(avctx->active_thread_type & FF_THREAD_SLICE)
3581         s->threads_number = avctx->thread_count;
3582     else
3583         s->threads_number = 1;
3584
3585     if (!avctx->internal->is_copy) {
3586         if (avctx->extradata_size > 0 && avctx->extradata) {
3587             ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3588             if (ret < 0) {
3589                 hevc_decode_free(avctx);
3590                 return ret;
3591             }
3592         }
3593     }
3594
3595     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3596             s->threads_type = FF_THREAD_FRAME;
3597         else
3598             s->threads_type = FF_THREAD_SLICE;
3599
3600     return 0;
3601 }
3602
3603 static void hevc_decode_flush(AVCodecContext *avctx)
3604 {
3605     HEVCContext *s = avctx->priv_data;
3606     ff_hevc_flush_dpb(s);
3607     ff_hevc_reset_sei(&s->sei);
3608     s->max_ra = INT_MAX;
3609     s->eos = 1;
3610 }
3611
3612 #define OFFSET(x) offsetof(HEVCContext, x)
3613 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3614
3615 static const AVOption options[] = {
3616     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3617         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3618     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3619         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3620     { NULL },
3621 };
3622
3623 static const AVClass hevc_decoder_class = {
3624     .class_name = "HEVC decoder",
3625     .item_name  = av_default_item_name,
3626     .option     = options,
3627     .version    = LIBAVUTIL_VERSION_INT,
3628 };
3629
3630 AVCodec ff_hevc_decoder = {
3631     .name                  = "hevc",
3632     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3633     .type                  = AVMEDIA_TYPE_VIDEO,
3634     .id                    = AV_CODEC_ID_HEVC,
3635     .priv_data_size        = sizeof(HEVCContext),
3636     .priv_class            = &hevc_decoder_class,
3637     .init                  = hevc_decode_init,
3638     .close                 = hevc_decode_free,
3639     .decode                = hevc_decode_frame,
3640     .flush                 = hevc_decode_flush,
3641     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3642     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3643                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3644     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING |
3645                              FF_CODEC_CAP_ALLOCATE_PROGRESS,
3646     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3647     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3648 #if CONFIG_HEVC_DXVA2_HWACCEL
3649                                HWACCEL_DXVA2(hevc),
3650 #endif
3651 #if CONFIG_HEVC_D3D11VA_HWACCEL
3652                                HWACCEL_D3D11VA(hevc),
3653 #endif
3654 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3655                                HWACCEL_D3D11VA2(hevc),
3656 #endif
3657 #if CONFIG_HEVC_NVDEC_HWACCEL
3658                                HWACCEL_NVDEC(hevc),
3659 #endif
3660 #if CONFIG_HEVC_VAAPI_HWACCEL
3661                                HWACCEL_VAAPI(hevc),
3662 #endif
3663 #if CONFIG_HEVC_VDPAU_HWACCEL
3664                                HWACCEL_VDPAU(hevc),
3665 #endif
3666 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3667                                HWACCEL_VIDEOTOOLBOX(hevc),
3668 #endif
3669                                NULL
3670                            },
3671 };