]> git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c
Merge commit '7e929dac100916fc45cb95e231025f3439c20156'
[ffmpeg] / libavcodec / hevcdec.c
1 /*
2  * HEVC video Decoder
3  *
4  * Copyright (C) 2012 - 2013 Guillaume Martres
5  * Copyright (C) 2012 - 2013 Mickael Raulet
6  * Copyright (C) 2012 - 2013 Gildas Cocherel
7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25
26 #include "libavutil/attributes.h"
27 #include "libavutil/common.h"
28 #include "libavutil/display.h"
29 #include "libavutil/internal.h"
30 #include "libavutil/mastering_display_metadata.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
35
36 #include "bswapdsp.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
39 #include "golomb.h"
40 #include "hevc.h"
41 #include "hevc_data.h"
42 #include "hevc_parse.h"
43 #include "hevcdec.h"
44 #include "hwaccel.h"
45 #include "profiles.h"
46
47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
48
49 /**
50  * NOTE: Each function hls_foo correspond to the function foo in the
51  * specification (HLS stands for High Level Syntax).
52  */
53
54 /**
55  * Section 5.7
56  */
57
58 /* free everything allocated  by pic_arrays_init() */
59 static void pic_arrays_free(HEVCContext *s)
60 {
61     av_freep(&s->sao);
62     av_freep(&s->deblock);
63
64     av_freep(&s->skip_flag);
65     av_freep(&s->tab_ct_depth);
66
67     av_freep(&s->tab_ipm);
68     av_freep(&s->cbf_luma);
69     av_freep(&s->is_pcm);
70
71     av_freep(&s->qp_y_tab);
72     av_freep(&s->tab_slice_address);
73     av_freep(&s->filter_slice_edges);
74
75     av_freep(&s->horizontal_bs);
76     av_freep(&s->vertical_bs);
77
78     av_freep(&s->sh.entry_point_offset);
79     av_freep(&s->sh.size);
80     av_freep(&s->sh.offset);
81
82     av_buffer_pool_uninit(&s->tab_mvf_pool);
83     av_buffer_pool_uninit(&s->rpl_tab_pool);
84 }
85
86 /* allocate arrays that depend on frame dimensions */
87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
88 {
89     int log2_min_cb_size = sps->log2_min_cb_size;
90     int width            = sps->width;
91     int height           = sps->height;
92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
93                            ((height >> log2_min_cb_size) + 1);
94     int ctb_count        = sps->ctb_width * sps->ctb_height;
95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
96
97     s->bs_width  = (width  >> 2) + 1;
98     s->bs_height = (height >> 2) + 1;
99
100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
102     if (!s->sao || !s->deblock)
103         goto fail;
104
105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
107     if (!s->skip_flag || !s->tab_ct_depth)
108         goto fail;
109
110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
111     s->tab_ipm  = av_mallocz(min_pu_size);
112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
114         goto fail;
115
116     s->filter_slice_edges = av_mallocz(ctb_count);
117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
118                                       sizeof(*s->tab_slice_address));
119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
120                                       sizeof(*s->qp_y_tab));
121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
122         goto fail;
123
124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
126     if (!s->horizontal_bs || !s->vertical_bs)
127         goto fail;
128
129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
130                                           av_buffer_allocz);
131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
132                                           av_buffer_allocz);
133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
134         goto fail;
135
136     return 0;
137
138 fail:
139     pic_arrays_free(s);
140     return AVERROR(ENOMEM);
141 }
142
143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
144 {
145     int i = 0;
146     int j = 0;
147     uint8_t luma_weight_l0_flag[16];
148     uint8_t chroma_weight_l0_flag[16];
149     uint8_t luma_weight_l1_flag[16];
150     uint8_t chroma_weight_l1_flag[16];
151     int luma_log2_weight_denom;
152
153     luma_log2_weight_denom = get_ue_golomb_long(gb);
154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
156         return AVERROR_INVALIDDATA;
157     }
158     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
159     if (s->ps.sps->chroma_format_idc != 0) {
160         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
161         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
162             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
163             return AVERROR_INVALIDDATA;
164         }
165         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
166     }
167
168     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
169         luma_weight_l0_flag[i] = get_bits1(gb);
170         if (!luma_weight_l0_flag[i]) {
171             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
172             s->sh.luma_offset_l0[i] = 0;
173         }
174     }
175     if (s->ps.sps->chroma_format_idc != 0) {
176         for (i = 0; i < s->sh.nb_refs[L0]; i++)
177             chroma_weight_l0_flag[i] = get_bits1(gb);
178     } else {
179         for (i = 0; i < s->sh.nb_refs[L0]; i++)
180             chroma_weight_l0_flag[i] = 0;
181     }
182     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
183         if (luma_weight_l0_flag[i]) {
184             int delta_luma_weight_l0 = get_se_golomb(gb);
185             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
186             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
187         }
188         if (chroma_weight_l0_flag[i]) {
189             for (j = 0; j < 2; j++) {
190                 int delta_chroma_weight_l0 = get_se_golomb(gb);
191                 int delta_chroma_offset_l0 = get_se_golomb(gb);
192
193                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
194                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
195                     return AVERROR_INVALIDDATA;
196                 }
197
198                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
199                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
200                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
201             }
202         } else {
203             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
204             s->sh.chroma_offset_l0[i][0] = 0;
205             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
206             s->sh.chroma_offset_l0[i][1] = 0;
207         }
208     }
209     if (s->sh.slice_type == HEVC_SLICE_B) {
210         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
211             luma_weight_l1_flag[i] = get_bits1(gb);
212             if (!luma_weight_l1_flag[i]) {
213                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
214                 s->sh.luma_offset_l1[i] = 0;
215             }
216         }
217         if (s->ps.sps->chroma_format_idc != 0) {
218             for (i = 0; i < s->sh.nb_refs[L1]; i++)
219                 chroma_weight_l1_flag[i] = get_bits1(gb);
220         } else {
221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
222                 chroma_weight_l1_flag[i] = 0;
223         }
224         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
225             if (luma_weight_l1_flag[i]) {
226                 int delta_luma_weight_l1 = get_se_golomb(gb);
227                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
228                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
229             }
230             if (chroma_weight_l1_flag[i]) {
231                 for (j = 0; j < 2; j++) {
232                     int delta_chroma_weight_l1 = get_se_golomb(gb);
233                     int delta_chroma_offset_l1 = get_se_golomb(gb);
234
235                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
236                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
237                         return AVERROR_INVALIDDATA;
238                     }
239
240                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
241                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
242                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
243                 }
244             } else {
245                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
246                 s->sh.chroma_offset_l1[i][0] = 0;
247                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
248                 s->sh.chroma_offset_l1[i][1] = 0;
249             }
250         }
251     }
252     return 0;
253 }
254
255 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
256 {
257     const HEVCSPS *sps = s->ps.sps;
258     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
259     int prev_delta_msb = 0;
260     unsigned int nb_sps = 0, nb_sh;
261     int i;
262
263     rps->nb_refs = 0;
264     if (!sps->long_term_ref_pics_present_flag)
265         return 0;
266
267     if (sps->num_long_term_ref_pics_sps > 0)
268         nb_sps = get_ue_golomb_long(gb);
269     nb_sh = get_ue_golomb_long(gb);
270
271     if (nb_sps > sps->num_long_term_ref_pics_sps)
272         return AVERROR_INVALIDDATA;
273     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
274         return AVERROR_INVALIDDATA;
275
276     rps->nb_refs = nb_sh + nb_sps;
277
278     for (i = 0; i < rps->nb_refs; i++) {
279         uint8_t delta_poc_msb_present;
280
281         if (i < nb_sps) {
282             uint8_t lt_idx_sps = 0;
283
284             if (sps->num_long_term_ref_pics_sps > 1)
285                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
286
287             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
288             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
289         } else {
290             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
291             rps->used[i] = get_bits1(gb);
292         }
293
294         delta_poc_msb_present = get_bits1(gb);
295         if (delta_poc_msb_present) {
296             int64_t delta = get_ue_golomb_long(gb);
297             int64_t poc;
298
299             if (i && i != nb_sps)
300                 delta += prev_delta_msb;
301
302             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
303             if (poc != (int32_t)poc)
304                 return AVERROR_INVALIDDATA;
305             rps->poc[i] = poc;
306             prev_delta_msb = delta;
307         }
308     }
309
310     return 0;
311 }
312
313 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
314                                  const HEVCSPS *sps)
315 {
316     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
317     const HEVCWindow *ow = &sps->output_window;
318     unsigned int num = 0, den = 0;
319
320     avctx->pix_fmt             = sps->pix_fmt;
321     avctx->coded_width         = sps->width;
322     avctx->coded_height        = sps->height;
323     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
324     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
325     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
326     avctx->profile             = sps->ptl.general_ptl.profile_idc;
327     avctx->level               = sps->ptl.general_ptl.level_idc;
328
329     ff_set_sar(avctx, sps->vui.sar);
330
331     if (sps->vui.video_signal_type_present_flag)
332         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
333                                                             : AVCOL_RANGE_MPEG;
334     else
335         avctx->color_range = AVCOL_RANGE_MPEG;
336
337     if (sps->vui.colour_description_present_flag) {
338         avctx->color_primaries = sps->vui.colour_primaries;
339         avctx->color_trc       = sps->vui.transfer_characteristic;
340         avctx->colorspace      = sps->vui.matrix_coeffs;
341     } else {
342         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
343         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
344         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
345     }
346
347     if (vps->vps_timing_info_present_flag) {
348         num = vps->vps_num_units_in_tick;
349         den = vps->vps_time_scale;
350     } else if (sps->vui.vui_timing_info_present_flag) {
351         num = sps->vui.vui_num_units_in_tick;
352         den = sps->vui.vui_time_scale;
353     }
354
355     if (num != 0 && den != 0)
356         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
357                   num, den, 1 << 30);
358 }
359
360 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
361 {
362 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
363                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
364                      CONFIG_HEVC_NVDEC_HWACCEL + \
365                      CONFIG_HEVC_VAAPI_HWACCEL + \
366                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
367                      CONFIG_HEVC_VDPAU_HWACCEL)
368     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
369
370     switch (sps->pix_fmt) {
371     case AV_PIX_FMT_YUV420P:
372     case AV_PIX_FMT_YUVJ420P:
373 #if CONFIG_HEVC_DXVA2_HWACCEL
374         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
375 #endif
376 #if CONFIG_HEVC_D3D11VA_HWACCEL
377         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
378         *fmt++ = AV_PIX_FMT_D3D11;
379 #endif
380 #if CONFIG_HEVC_VAAPI_HWACCEL
381         *fmt++ = AV_PIX_FMT_VAAPI;
382 #endif
383 #if CONFIG_HEVC_VDPAU_HWACCEL
384         *fmt++ = AV_PIX_FMT_VDPAU;
385 #endif
386 #if CONFIG_HEVC_NVDEC_HWACCEL
387         *fmt++ = AV_PIX_FMT_CUDA;
388 #endif
389 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
390         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
391 #endif
392         break;
393     case AV_PIX_FMT_YUV420P10:
394 #if CONFIG_HEVC_DXVA2_HWACCEL
395         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
396 #endif
397 #if CONFIG_HEVC_D3D11VA_HWACCEL
398         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
399         *fmt++ = AV_PIX_FMT_D3D11;
400 #endif
401 #if CONFIG_HEVC_VAAPI_HWACCEL
402         *fmt++ = AV_PIX_FMT_VAAPI;
403 #endif
404 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
405         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
406 #endif
407 #if CONFIG_HEVC_NVDEC_HWACCEL
408         *fmt++ = AV_PIX_FMT_CUDA;
409 #endif
410         break;
411     case AV_PIX_FMT_YUV420P12:
412 #if CONFIG_HEVC_NVDEC_HWACCEL
413         *fmt++ = AV_PIX_FMT_CUDA;
414 #endif
415         break;
416     }
417
418     *fmt++ = sps->pix_fmt;
419     *fmt = AV_PIX_FMT_NONE;
420
421     return ff_thread_get_format(s->avctx, pix_fmts);
422 }
423
424 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
425                    enum AVPixelFormat pix_fmt)
426 {
427     int ret, i;
428
429     pic_arrays_free(s);
430     s->ps.sps = NULL;
431     s->ps.vps = NULL;
432
433     if (!sps)
434         return 0;
435
436     ret = pic_arrays_init(s, sps);
437     if (ret < 0)
438         goto fail;
439
440     export_stream_params(s->avctx, &s->ps, sps);
441
442     s->avctx->pix_fmt = pix_fmt;
443
444     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
445     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
446     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
447
448     for (i = 0; i < 3; i++) {
449         av_freep(&s->sao_pixel_buffer_h[i]);
450         av_freep(&s->sao_pixel_buffer_v[i]);
451     }
452
453     if (sps->sao_enabled && !s->avctx->hwaccel) {
454         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
455         int c_idx;
456
457         for(c_idx = 0; c_idx < c_count; c_idx++) {
458             int w = sps->width >> sps->hshift[c_idx];
459             int h = sps->height >> sps->vshift[c_idx];
460             s->sao_pixel_buffer_h[c_idx] =
461                 av_malloc((w * 2 * sps->ctb_height) <<
462                           sps->pixel_shift);
463             s->sao_pixel_buffer_v[c_idx] =
464                 av_malloc((h * 2 * sps->ctb_width) <<
465                           sps->pixel_shift);
466         }
467     }
468
469     s->ps.sps = sps;
470     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
471
472     return 0;
473
474 fail:
475     pic_arrays_free(s);
476     s->ps.sps = NULL;
477     return ret;
478 }
479
480 static int hls_slice_header(HEVCContext *s)
481 {
482     GetBitContext *gb = &s->HEVClc->gb;
483     SliceHeader *sh   = &s->sh;
484     int i, ret;
485
486     // Coded parameters
487     sh->first_slice_in_pic_flag = get_bits1(gb);
488     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
489         s->seq_decode = (s->seq_decode + 1) & 0xff;
490         s->max_ra     = INT_MAX;
491         if (IS_IDR(s))
492             ff_hevc_clear_refs(s);
493     }
494     sh->no_output_of_prior_pics_flag = 0;
495     if (IS_IRAP(s))
496         sh->no_output_of_prior_pics_flag = get_bits1(gb);
497
498     sh->pps_id = get_ue_golomb_long(gb);
499     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
500         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
501         return AVERROR_INVALIDDATA;
502     }
503     if (!sh->first_slice_in_pic_flag &&
504         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
505         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
506         return AVERROR_INVALIDDATA;
507     }
508     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
509     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
510         sh->no_output_of_prior_pics_flag = 1;
511
512     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
513         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
514         const HEVCSPS *last_sps = s->ps.sps;
515         enum AVPixelFormat pix_fmt;
516
517         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
518             if (sps->width != last_sps->width || sps->height != last_sps->height ||
519                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
520                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
521                 sh->no_output_of_prior_pics_flag = 0;
522         }
523         ff_hevc_clear_refs(s);
524
525         ret = set_sps(s, sps, sps->pix_fmt);
526         if (ret < 0)
527             return ret;
528
529         pix_fmt = get_format(s, sps);
530         if (pix_fmt < 0)
531             return pix_fmt;
532         s->avctx->pix_fmt = pix_fmt;
533
534         s->seq_decode = (s->seq_decode + 1) & 0xff;
535         s->max_ra     = INT_MAX;
536     }
537
538     sh->dependent_slice_segment_flag = 0;
539     if (!sh->first_slice_in_pic_flag) {
540         int slice_address_length;
541
542         if (s->ps.pps->dependent_slice_segments_enabled_flag)
543             sh->dependent_slice_segment_flag = get_bits1(gb);
544
545         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
546                                             s->ps.sps->ctb_height);
547         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
548         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
549             av_log(s->avctx, AV_LOG_ERROR,
550                    "Invalid slice segment address: %u.\n",
551                    sh->slice_segment_addr);
552             return AVERROR_INVALIDDATA;
553         }
554
555         if (!sh->dependent_slice_segment_flag) {
556             sh->slice_addr = sh->slice_segment_addr;
557             s->slice_idx++;
558         }
559     } else {
560         sh->slice_segment_addr = sh->slice_addr = 0;
561         s->slice_idx           = 0;
562         s->slice_initialized   = 0;
563     }
564
565     if (!sh->dependent_slice_segment_flag) {
566         s->slice_initialized = 0;
567
568         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
569             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
570
571         sh->slice_type = get_ue_golomb_long(gb);
572         if (!(sh->slice_type == HEVC_SLICE_I ||
573               sh->slice_type == HEVC_SLICE_P ||
574               sh->slice_type == HEVC_SLICE_B)) {
575             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
576                    sh->slice_type);
577             return AVERROR_INVALIDDATA;
578         }
579         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
580             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
581             return AVERROR_INVALIDDATA;
582         }
583
584         // when flag is not present, picture is inferred to be output
585         sh->pic_output_flag = 1;
586         if (s->ps.pps->output_flag_present_flag)
587             sh->pic_output_flag = get_bits1(gb);
588
589         if (s->ps.sps->separate_colour_plane_flag)
590             sh->colour_plane_id = get_bits(gb, 2);
591
592         if (!IS_IDR(s)) {
593             int poc, pos;
594
595             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
596             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
597             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
598                 av_log(s->avctx, AV_LOG_WARNING,
599                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
600                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
601                     return AVERROR_INVALIDDATA;
602                 poc = s->poc;
603             }
604             s->poc = poc;
605
606             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
607             pos = get_bits_left(gb);
608             if (!sh->short_term_ref_pic_set_sps_flag) {
609                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
610                 if (ret < 0)
611                     return ret;
612
613                 sh->short_term_rps = &sh->slice_rps;
614             } else {
615                 int numbits, rps_idx;
616
617                 if (!s->ps.sps->nb_st_rps) {
618                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
619                     return AVERROR_INVALIDDATA;
620                 }
621
622                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
623                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
624                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
625             }
626             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
627
628             pos = get_bits_left(gb);
629             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
630             if (ret < 0) {
631                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
632                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
633                     return AVERROR_INVALIDDATA;
634             }
635             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
636
637             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
638                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
639             else
640                 sh->slice_temporal_mvp_enabled_flag = 0;
641         } else {
642             s->sh.short_term_rps = NULL;
643             s->poc               = 0;
644         }
645
646         /* 8.3.1 */
647         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
648             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
649             s->nal_unit_type != HEVC_NAL_TSA_N   &&
650             s->nal_unit_type != HEVC_NAL_STSA_N  &&
651             s->nal_unit_type != HEVC_NAL_RADL_N  &&
652             s->nal_unit_type != HEVC_NAL_RADL_R  &&
653             s->nal_unit_type != HEVC_NAL_RASL_N  &&
654             s->nal_unit_type != HEVC_NAL_RASL_R)
655             s->pocTid0 = s->poc;
656
657         if (s->ps.sps->sao_enabled) {
658             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
659             if (s->ps.sps->chroma_format_idc) {
660                 sh->slice_sample_adaptive_offset_flag[1] =
661                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
662             }
663         } else {
664             sh->slice_sample_adaptive_offset_flag[0] = 0;
665             sh->slice_sample_adaptive_offset_flag[1] = 0;
666             sh->slice_sample_adaptive_offset_flag[2] = 0;
667         }
668
669         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
670         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
671             int nb_refs;
672
673             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
674             if (sh->slice_type == HEVC_SLICE_B)
675                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
676
677             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
678                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
679                 if (sh->slice_type == HEVC_SLICE_B)
680                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
681             }
682             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
683                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
684                        sh->nb_refs[L0], sh->nb_refs[L1]);
685                 return AVERROR_INVALIDDATA;
686             }
687
688             sh->rpl_modification_flag[0] = 0;
689             sh->rpl_modification_flag[1] = 0;
690             nb_refs = ff_hevc_frame_nb_refs(s);
691             if (!nb_refs) {
692                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
693                 return AVERROR_INVALIDDATA;
694             }
695
696             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
697                 sh->rpl_modification_flag[0] = get_bits1(gb);
698                 if (sh->rpl_modification_flag[0]) {
699                     for (i = 0; i < sh->nb_refs[L0]; i++)
700                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
701                 }
702
703                 if (sh->slice_type == HEVC_SLICE_B) {
704                     sh->rpl_modification_flag[1] = get_bits1(gb);
705                     if (sh->rpl_modification_flag[1] == 1)
706                         for (i = 0; i < sh->nb_refs[L1]; i++)
707                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
708                 }
709             }
710
711             if (sh->slice_type == HEVC_SLICE_B)
712                 sh->mvd_l1_zero_flag = get_bits1(gb);
713
714             if (s->ps.pps->cabac_init_present_flag)
715                 sh->cabac_init_flag = get_bits1(gb);
716             else
717                 sh->cabac_init_flag = 0;
718
719             sh->collocated_ref_idx = 0;
720             if (sh->slice_temporal_mvp_enabled_flag) {
721                 sh->collocated_list = L0;
722                 if (sh->slice_type == HEVC_SLICE_B)
723                     sh->collocated_list = !get_bits1(gb);
724
725                 if (sh->nb_refs[sh->collocated_list] > 1) {
726                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
727                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
728                         av_log(s->avctx, AV_LOG_ERROR,
729                                "Invalid collocated_ref_idx: %d.\n",
730                                sh->collocated_ref_idx);
731                         return AVERROR_INVALIDDATA;
732                     }
733                 }
734             }
735
736             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
737                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
738                 int ret = pred_weight_table(s, gb);
739                 if (ret < 0)
740                     return ret;
741             }
742
743             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
744             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
745                 av_log(s->avctx, AV_LOG_ERROR,
746                        "Invalid number of merging MVP candidates: %d.\n",
747                        sh->max_num_merge_cand);
748                 return AVERROR_INVALIDDATA;
749             }
750         }
751
752         sh->slice_qp_delta = get_se_golomb(gb);
753
754         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
755             sh->slice_cb_qp_offset = get_se_golomb(gb);
756             sh->slice_cr_qp_offset = get_se_golomb(gb);
757         } else {
758             sh->slice_cb_qp_offset = 0;
759             sh->slice_cr_qp_offset = 0;
760         }
761
762         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
763             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
764         else
765             sh->cu_chroma_qp_offset_enabled_flag = 0;
766
767         if (s->ps.pps->deblocking_filter_control_present_flag) {
768             int deblocking_filter_override_flag = 0;
769
770             if (s->ps.pps->deblocking_filter_override_enabled_flag)
771                 deblocking_filter_override_flag = get_bits1(gb);
772
773             if (deblocking_filter_override_flag) {
774                 sh->disable_deblocking_filter_flag = get_bits1(gb);
775                 if (!sh->disable_deblocking_filter_flag) {
776                     int beta_offset_div2 = get_se_golomb(gb);
777                     int tc_offset_div2   = get_se_golomb(gb) ;
778                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
779                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
780                         av_log(s->avctx, AV_LOG_ERROR,
781                             "Invalid deblock filter offsets: %d, %d\n",
782                             beta_offset_div2, tc_offset_div2);
783                         return AVERROR_INVALIDDATA;
784                     }
785                     sh->beta_offset = beta_offset_div2 * 2;
786                     sh->tc_offset   =   tc_offset_div2 * 2;
787                 }
788             } else {
789                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
790                 sh->beta_offset                    = s->ps.pps->beta_offset;
791                 sh->tc_offset                      = s->ps.pps->tc_offset;
792             }
793         } else {
794             sh->disable_deblocking_filter_flag = 0;
795             sh->beta_offset                    = 0;
796             sh->tc_offset                      = 0;
797         }
798
799         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
800             (sh->slice_sample_adaptive_offset_flag[0] ||
801              sh->slice_sample_adaptive_offset_flag[1] ||
802              !sh->disable_deblocking_filter_flag)) {
803             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
804         } else {
805             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
806         }
807     } else if (!s->slice_initialized) {
808         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
809         return AVERROR_INVALIDDATA;
810     }
811
812     sh->num_entry_point_offsets = 0;
813     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
814         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
815         // It would be possible to bound this tighter but this here is simpler
816         if (num_entry_point_offsets > get_bits_left(gb)) {
817             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
818             return AVERROR_INVALIDDATA;
819         }
820
821         sh->num_entry_point_offsets = num_entry_point_offsets;
822         if (sh->num_entry_point_offsets > 0) {
823             int offset_len = get_ue_golomb_long(gb) + 1;
824
825             if (offset_len < 1 || offset_len > 32) {
826                 sh->num_entry_point_offsets = 0;
827                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
828                 return AVERROR_INVALIDDATA;
829             }
830
831             av_freep(&sh->entry_point_offset);
832             av_freep(&sh->offset);
833             av_freep(&sh->size);
834             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
835             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
836             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
837             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
838                 sh->num_entry_point_offsets = 0;
839                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
840                 return AVERROR(ENOMEM);
841             }
842             for (i = 0; i < sh->num_entry_point_offsets; i++) {
843                 unsigned val = get_bits_long(gb, offset_len);
844                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
845             }
846             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
847                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
848                 s->threads_number = 1;
849             } else
850                 s->enable_parallel_tiles = 0;
851         } else
852             s->enable_parallel_tiles = 0;
853     }
854
855     if (s->ps.pps->slice_header_extension_present_flag) {
856         unsigned int length = get_ue_golomb_long(gb);
857         if (length*8LL > get_bits_left(gb)) {
858             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
859             return AVERROR_INVALIDDATA;
860         }
861         for (i = 0; i < length; i++)
862             skip_bits(gb, 8);  // slice_header_extension_data_byte
863     }
864
865     // Inferred parameters
866     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
867     if (sh->slice_qp > 51 ||
868         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
869         av_log(s->avctx, AV_LOG_ERROR,
870                "The slice_qp %d is outside the valid range "
871                "[%d, 51].\n",
872                sh->slice_qp,
873                -s->ps.sps->qp_bd_offset);
874         return AVERROR_INVALIDDATA;
875     }
876
877     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
878
879     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
880         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
881         return AVERROR_INVALIDDATA;
882     }
883
884     if (get_bits_left(gb) < 0) {
885         av_log(s->avctx, AV_LOG_ERROR,
886                "Overread slice header by %d bits\n", -get_bits_left(gb));
887         return AVERROR_INVALIDDATA;
888     }
889
890     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
891
892     if (!s->ps.pps->cu_qp_delta_enabled_flag)
893         s->HEVClc->qp_y = s->sh.slice_qp;
894
895     s->slice_initialized = 1;
896     s->HEVClc->tu.cu_qp_offset_cb = 0;
897     s->HEVClc->tu.cu_qp_offset_cr = 0;
898
899     return 0;
900 }
901
902 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
903
904 #define SET_SAO(elem, value)                            \
905 do {                                                    \
906     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
907         sao->elem = value;                              \
908     else if (sao_merge_left_flag)                       \
909         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
910     else if (sao_merge_up_flag)                         \
911         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
912     else                                                \
913         sao->elem = 0;                                  \
914 } while (0)
915
916 static void hls_sao_param(HEVCContext *s, int rx, int ry)
917 {
918     HEVCLocalContext *lc    = s->HEVClc;
919     int sao_merge_left_flag = 0;
920     int sao_merge_up_flag   = 0;
921     SAOParams *sao          = &CTB(s->sao, rx, ry);
922     int c_idx, i;
923
924     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
925         s->sh.slice_sample_adaptive_offset_flag[1]) {
926         if (rx > 0) {
927             if (lc->ctb_left_flag)
928                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
929         }
930         if (ry > 0 && !sao_merge_left_flag) {
931             if (lc->ctb_up_flag)
932                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
933         }
934     }
935
936     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
937         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
938                                                  s->ps.pps->log2_sao_offset_scale_chroma;
939
940         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
941             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
942             continue;
943         }
944
945         if (c_idx == 2) {
946             sao->type_idx[2] = sao->type_idx[1];
947             sao->eo_class[2] = sao->eo_class[1];
948         } else {
949             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
950         }
951
952         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
953             continue;
954
955         for (i = 0; i < 4; i++)
956             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
957
958         if (sao->type_idx[c_idx] == SAO_BAND) {
959             for (i = 0; i < 4; i++) {
960                 if (sao->offset_abs[c_idx][i]) {
961                     SET_SAO(offset_sign[c_idx][i],
962                             ff_hevc_sao_offset_sign_decode(s));
963                 } else {
964                     sao->offset_sign[c_idx][i] = 0;
965                 }
966             }
967             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
968         } else if (c_idx != 2) {
969             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
970         }
971
972         // Inferred parameters
973         sao->offset_val[c_idx][0] = 0;
974         for (i = 0; i < 4; i++) {
975             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
976             if (sao->type_idx[c_idx] == SAO_EDGE) {
977                 if (i > 1)
978                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
979             } else if (sao->offset_sign[c_idx][i]) {
980                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
981             }
982             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
983         }
984     }
985 }
986
987 #undef SET_SAO
988 #undef CTB
989
990 static int hls_cross_component_pred(HEVCContext *s, int idx) {
991     HEVCLocalContext *lc    = s->HEVClc;
992     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
993
994     if (log2_res_scale_abs_plus1 !=  0) {
995         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
996         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
997                                (1 - 2 * res_scale_sign_flag);
998     } else {
999         lc->tu.res_scale_val = 0;
1000     }
1001
1002
1003     return 0;
1004 }
1005
1006 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1007                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1008                               int log2_cb_size, int log2_trafo_size,
1009                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1010 {
1011     HEVCLocalContext *lc = s->HEVClc;
1012     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1013     int i;
1014
1015     if (lc->cu.pred_mode == MODE_INTRA) {
1016         int trafo_size = 1 << log2_trafo_size;
1017         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1018
1019         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1020     }
1021
1022     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1023         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1024         int scan_idx   = SCAN_DIAG;
1025         int scan_idx_c = SCAN_DIAG;
1026         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1027                          (s->ps.sps->chroma_format_idc == 2 &&
1028                          (cbf_cb[1] || cbf_cr[1]));
1029
1030         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1031             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1032             if (lc->tu.cu_qp_delta != 0)
1033                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1034                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1035             lc->tu.is_cu_qp_delta_coded = 1;
1036
1037             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1038                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1039                 av_log(s->avctx, AV_LOG_ERROR,
1040                        "The cu_qp_delta %d is outside the valid range "
1041                        "[%d, %d].\n",
1042                        lc->tu.cu_qp_delta,
1043                        -(26 + s->ps.sps->qp_bd_offset / 2),
1044                         (25 + s->ps.sps->qp_bd_offset / 2));
1045                 return AVERROR_INVALIDDATA;
1046             }
1047
1048             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1049         }
1050
1051         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1052             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1053             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1054             if (cu_chroma_qp_offset_flag) {
1055                 int cu_chroma_qp_offset_idx  = 0;
1056                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1057                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1058                     av_log(s->avctx, AV_LOG_ERROR,
1059                         "cu_chroma_qp_offset_idx not yet tested.\n");
1060                 }
1061                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1062                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1063             } else {
1064                 lc->tu.cu_qp_offset_cb = 0;
1065                 lc->tu.cu_qp_offset_cr = 0;
1066             }
1067             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1068         }
1069
1070         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1071             if (lc->tu.intra_pred_mode >= 6 &&
1072                 lc->tu.intra_pred_mode <= 14) {
1073                 scan_idx = SCAN_VERT;
1074             } else if (lc->tu.intra_pred_mode >= 22 &&
1075                        lc->tu.intra_pred_mode <= 30) {
1076                 scan_idx = SCAN_HORIZ;
1077             }
1078
1079             if (lc->tu.intra_pred_mode_c >=  6 &&
1080                 lc->tu.intra_pred_mode_c <= 14) {
1081                 scan_idx_c = SCAN_VERT;
1082             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1083                        lc->tu.intra_pred_mode_c <= 30) {
1084                 scan_idx_c = SCAN_HORIZ;
1085             }
1086         }
1087
1088         lc->tu.cross_pf = 0;
1089
1090         if (cbf_luma)
1091             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1092         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1093             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1094             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1095             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1096                                 (lc->cu.pred_mode == MODE_INTER ||
1097                                  (lc->tu.chroma_mode_c ==  4)));
1098
1099             if (lc->tu.cross_pf) {
1100                 hls_cross_component_pred(s, 0);
1101             }
1102             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1103                 if (lc->cu.pred_mode == MODE_INTRA) {
1104                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1105                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1106                 }
1107                 if (cbf_cb[i])
1108                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1109                                                 log2_trafo_size_c, scan_idx_c, 1);
1110                 else
1111                     if (lc->tu.cross_pf) {
1112                         ptrdiff_t stride = s->frame->linesize[1];
1113                         int hshift = s->ps.sps->hshift[1];
1114                         int vshift = s->ps.sps->vshift[1];
1115                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1116                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1117                         int size = 1 << log2_trafo_size_c;
1118
1119                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1120                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1121                         for (i = 0; i < (size * size); i++) {
1122                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1123                         }
1124                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1125                     }
1126             }
1127
1128             if (lc->tu.cross_pf) {
1129                 hls_cross_component_pred(s, 1);
1130             }
1131             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1132                 if (lc->cu.pred_mode == MODE_INTRA) {
1133                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1134                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1135                 }
1136                 if (cbf_cr[i])
1137                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1138                                                 log2_trafo_size_c, scan_idx_c, 2);
1139                 else
1140                     if (lc->tu.cross_pf) {
1141                         ptrdiff_t stride = s->frame->linesize[2];
1142                         int hshift = s->ps.sps->hshift[2];
1143                         int vshift = s->ps.sps->vshift[2];
1144                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1145                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1146                         int size = 1 << log2_trafo_size_c;
1147
1148                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1149                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1150                         for (i = 0; i < (size * size); i++) {
1151                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1152                         }
1153                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1154                     }
1155             }
1156         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1157             int trafo_size_h = 1 << (log2_trafo_size + 1);
1158             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1159             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1160                 if (lc->cu.pred_mode == MODE_INTRA) {
1161                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1162                                                     trafo_size_h, trafo_size_v);
1163                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1164                 }
1165                 if (cbf_cb[i])
1166                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1167                                                 log2_trafo_size, scan_idx_c, 1);
1168             }
1169             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1170                 if (lc->cu.pred_mode == MODE_INTRA) {
1171                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1172                                                 trafo_size_h, trafo_size_v);
1173                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1174                 }
1175                 if (cbf_cr[i])
1176                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1177                                                 log2_trafo_size, scan_idx_c, 2);
1178             }
1179         }
1180     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1181         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1182             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1183             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1184             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1185             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1186             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1187             if (s->ps.sps->chroma_format_idc == 2) {
1188                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1189                                                 trafo_size_h, trafo_size_v);
1190                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1191                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1192             }
1193         } else if (blk_idx == 3) {
1194             int trafo_size_h = 1 << (log2_trafo_size + 1);
1195             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1196             ff_hevc_set_neighbour_available(s, xBase, yBase,
1197                                             trafo_size_h, trafo_size_v);
1198             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1199             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1200             if (s->ps.sps->chroma_format_idc == 2) {
1201                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1202                                                 trafo_size_h, trafo_size_v);
1203                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1204                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1205             }
1206         }
1207     }
1208
1209     return 0;
1210 }
1211
1212 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1213 {
1214     int cb_size          = 1 << log2_cb_size;
1215     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1216
1217     int min_pu_width     = s->ps.sps->min_pu_width;
1218     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1219     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1220     int i, j;
1221
1222     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1223         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1224             s->is_pcm[i + j * min_pu_width] = 2;
1225 }
1226
1227 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1228                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1229                               int log2_cb_size, int log2_trafo_size,
1230                               int trafo_depth, int blk_idx,
1231                               const int *base_cbf_cb, const int *base_cbf_cr)
1232 {
1233     HEVCLocalContext *lc = s->HEVClc;
1234     uint8_t split_transform_flag;
1235     int cbf_cb[2];
1236     int cbf_cr[2];
1237     int ret;
1238
1239     cbf_cb[0] = base_cbf_cb[0];
1240     cbf_cb[1] = base_cbf_cb[1];
1241     cbf_cr[0] = base_cbf_cr[0];
1242     cbf_cr[1] = base_cbf_cr[1];
1243
1244     if (lc->cu.intra_split_flag) {
1245         if (trafo_depth == 1) {
1246             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1247             if (s->ps.sps->chroma_format_idc == 3) {
1248                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1249                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1250             } else {
1251                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1252                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1253             }
1254         }
1255     } else {
1256         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1257         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1258         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1259     }
1260
1261     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1262         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1263         trafo_depth     < lc->cu.max_trafo_depth       &&
1264         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1265         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1266     } else {
1267         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1268                           lc->cu.pred_mode == MODE_INTER &&
1269                           lc->cu.part_mode != PART_2Nx2N &&
1270                           trafo_depth == 0;
1271
1272         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1273                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1274                                inter_split;
1275     }
1276
1277     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1278         if (trafo_depth == 0 || cbf_cb[0]) {
1279             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1280             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1281                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1282             }
1283         }
1284
1285         if (trafo_depth == 0 || cbf_cr[0]) {
1286             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1287             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1288                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1289             }
1290         }
1291     }
1292
1293     if (split_transform_flag) {
1294         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1295         const int x1 = x0 + trafo_size_split;
1296         const int y1 = y0 + trafo_size_split;
1297
1298 #define SUBDIVIDE(x, y, idx)                                                    \
1299 do {                                                                            \
1300     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1301                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1302                              cbf_cb, cbf_cr);                                   \
1303     if (ret < 0)                                                                \
1304         return ret;                                                             \
1305 } while (0)
1306
1307         SUBDIVIDE(x0, y0, 0);
1308         SUBDIVIDE(x1, y0, 1);
1309         SUBDIVIDE(x0, y1, 2);
1310         SUBDIVIDE(x1, y1, 3);
1311
1312 #undef SUBDIVIDE
1313     } else {
1314         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1315         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1316         int min_tu_width     = s->ps.sps->min_tb_width;
1317         int cbf_luma         = 1;
1318
1319         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1320             cbf_cb[0] || cbf_cr[0] ||
1321             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1322             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1323         }
1324
1325         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1326                                  log2_cb_size, log2_trafo_size,
1327                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1328         if (ret < 0)
1329             return ret;
1330         // TODO: store cbf_luma somewhere else
1331         if (cbf_luma) {
1332             int i, j;
1333             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1334                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1335                     int x_tu = (x0 + j) >> log2_min_tu_size;
1336                     int y_tu = (y0 + i) >> log2_min_tu_size;
1337                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1338                 }
1339         }
1340         if (!s->sh.disable_deblocking_filter_flag) {
1341             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1342             if (s->ps.pps->transquant_bypass_enable_flag &&
1343                 lc->cu.cu_transquant_bypass_flag)
1344                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1345         }
1346     }
1347     return 0;
1348 }
1349
1350 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1351 {
1352     HEVCLocalContext *lc = s->HEVClc;
1353     GetBitContext gb;
1354     int cb_size   = 1 << log2_cb_size;
1355     ptrdiff_t stride0 = s->frame->linesize[0];
1356     ptrdiff_t stride1 = s->frame->linesize[1];
1357     ptrdiff_t stride2 = s->frame->linesize[2];
1358     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1359     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1360     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1361
1362     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1363                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1364                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1365                           s->ps.sps->pcm.bit_depth_chroma;
1366     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1367     int ret;
1368
1369     if (!s->sh.disable_deblocking_filter_flag)
1370         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1371
1372     ret = init_get_bits(&gb, pcm, length);
1373     if (ret < 0)
1374         return ret;
1375
1376     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1377     if (s->ps.sps->chroma_format_idc) {
1378         s->hevcdsp.put_pcm(dst1, stride1,
1379                            cb_size >> s->ps.sps->hshift[1],
1380                            cb_size >> s->ps.sps->vshift[1],
1381                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1382         s->hevcdsp.put_pcm(dst2, stride2,
1383                            cb_size >> s->ps.sps->hshift[2],
1384                            cb_size >> s->ps.sps->vshift[2],
1385                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1386     }
1387
1388     return 0;
1389 }
1390
1391 /**
1392  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1393  *
1394  * @param s HEVC decoding context
1395  * @param dst target buffer for block data at block position
1396  * @param dststride stride of the dst buffer
1397  * @param ref reference picture buffer at origin (0, 0)
1398  * @param mv motion vector (relative to block position) to get pixel data from
1399  * @param x_off horizontal position of block from origin (0, 0)
1400  * @param y_off vertical position of block from origin (0, 0)
1401  * @param block_w width of block
1402  * @param block_h height of block
1403  * @param luma_weight weighting factor applied to the luma prediction
1404  * @param luma_offset additive offset applied to the luma prediction value
1405  */
1406
1407 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1408                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1409                         int block_w, int block_h, int luma_weight, int luma_offset)
1410 {
1411     HEVCLocalContext *lc = s->HEVClc;
1412     uint8_t *src         = ref->data[0];
1413     ptrdiff_t srcstride  = ref->linesize[0];
1414     int pic_width        = s->ps.sps->width;
1415     int pic_height       = s->ps.sps->height;
1416     int mx               = mv->x & 3;
1417     int my               = mv->y & 3;
1418     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1419                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1420     int idx              = ff_hevc_pel_weight[block_w];
1421
1422     x_off += mv->x >> 2;
1423     y_off += mv->y >> 2;
1424     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1425
1426     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1427         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1428         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1429         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1430         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1431         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1432
1433         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1434                                  edge_emu_stride, srcstride,
1435                                  block_w + QPEL_EXTRA,
1436                                  block_h + QPEL_EXTRA,
1437                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1438                                  pic_width, pic_height);
1439         src = lc->edge_emu_buffer + buf_offset;
1440         srcstride = edge_emu_stride;
1441     }
1442
1443     if (!weight_flag)
1444         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1445                                                       block_h, mx, my, block_w);
1446     else
1447         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1448                                                         block_h, s->sh.luma_log2_weight_denom,
1449                                                         luma_weight, luma_offset, mx, my, block_w);
1450 }
1451
1452 /**
1453  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1454  *
1455  * @param s HEVC decoding context
1456  * @param dst target buffer for block data at block position
1457  * @param dststride stride of the dst buffer
1458  * @param ref0 reference picture0 buffer at origin (0, 0)
1459  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1460  * @param x_off horizontal position of block from origin (0, 0)
1461  * @param y_off vertical position of block from origin (0, 0)
1462  * @param block_w width of block
1463  * @param block_h height of block
1464  * @param ref1 reference picture1 buffer at origin (0, 0)
1465  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1466  * @param current_mv current motion vector structure
1467  */
1468  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1469                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1470                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1471 {
1472     HEVCLocalContext *lc = s->HEVClc;
1473     ptrdiff_t src0stride  = ref0->linesize[0];
1474     ptrdiff_t src1stride  = ref1->linesize[0];
1475     int pic_width        = s->ps.sps->width;
1476     int pic_height       = s->ps.sps->height;
1477     int mx0              = mv0->x & 3;
1478     int my0              = mv0->y & 3;
1479     int mx1              = mv1->x & 3;
1480     int my1              = mv1->y & 3;
1481     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1482                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1483     int x_off0           = x_off + (mv0->x >> 2);
1484     int y_off0           = y_off + (mv0->y >> 2);
1485     int x_off1           = x_off + (mv1->x >> 2);
1486     int y_off1           = y_off + (mv1->y >> 2);
1487     int idx              = ff_hevc_pel_weight[block_w];
1488
1489     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1490     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1491
1492     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1493         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1494         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1495         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1496         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1497         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1498
1499         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1500                                  edge_emu_stride, src0stride,
1501                                  block_w + QPEL_EXTRA,
1502                                  block_h + QPEL_EXTRA,
1503                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1504                                  pic_width, pic_height);
1505         src0 = lc->edge_emu_buffer + buf_offset;
1506         src0stride = edge_emu_stride;
1507     }
1508
1509     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1510         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1511         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1512         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1513         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1514         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1515
1516         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1517                                  edge_emu_stride, src1stride,
1518                                  block_w + QPEL_EXTRA,
1519                                  block_h + QPEL_EXTRA,
1520                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1521                                  pic_width, pic_height);
1522         src1 = lc->edge_emu_buffer2 + buf_offset;
1523         src1stride = edge_emu_stride;
1524     }
1525
1526     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1527                                                 block_h, mx0, my0, block_w);
1528     if (!weight_flag)
1529         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1530                                                        block_h, mx1, my1, block_w);
1531     else
1532         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1533                                                          block_h, s->sh.luma_log2_weight_denom,
1534                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1535                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1536                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1537                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1538                                                          mx1, my1, block_w);
1539
1540 }
1541
1542 /**
1543  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1544  *
1545  * @param s HEVC decoding context
1546  * @param dst1 target buffer for block data at block position (U plane)
1547  * @param dst2 target buffer for block data at block position (V plane)
1548  * @param dststride stride of the dst1 and dst2 buffers
1549  * @param ref reference picture buffer at origin (0, 0)
1550  * @param mv motion vector (relative to block position) to get pixel data from
1551  * @param x_off horizontal position of block from origin (0, 0)
1552  * @param y_off vertical position of block from origin (0, 0)
1553  * @param block_w width of block
1554  * @param block_h height of block
1555  * @param chroma_weight weighting factor applied to the chroma prediction
1556  * @param chroma_offset additive offset applied to the chroma prediction value
1557  */
1558
1559 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1560                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1561                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1562 {
1563     HEVCLocalContext *lc = s->HEVClc;
1564     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1565     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1566     const Mv *mv         = &current_mv->mv[reflist];
1567     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1568                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1569     int idx              = ff_hevc_pel_weight[block_w];
1570     int hshift           = s->ps.sps->hshift[1];
1571     int vshift           = s->ps.sps->vshift[1];
1572     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1573     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1574     intptr_t _mx         = mx << (1 - hshift);
1575     intptr_t _my         = my << (1 - vshift);
1576
1577     x_off += mv->x >> (2 + hshift);
1578     y_off += mv->y >> (2 + vshift);
1579     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1580
1581     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1582         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1583         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1584         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1585         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1586         int buf_offset0 = EPEL_EXTRA_BEFORE *
1587                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1588         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1589                                  edge_emu_stride, srcstride,
1590                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1591                                  x_off - EPEL_EXTRA_BEFORE,
1592                                  y_off - EPEL_EXTRA_BEFORE,
1593                                  pic_width, pic_height);
1594
1595         src0 = lc->edge_emu_buffer + buf_offset0;
1596         srcstride = edge_emu_stride;
1597     }
1598     if (!weight_flag)
1599         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1600                                                   block_h, _mx, _my, block_w);
1601     else
1602         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1603                                                         block_h, s->sh.chroma_log2_weight_denom,
1604                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1605 }
1606
1607 /**
1608  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1609  *
1610  * @param s HEVC decoding context
1611  * @param dst target buffer for block data at block position
1612  * @param dststride stride of the dst buffer
1613  * @param ref0 reference picture0 buffer at origin (0, 0)
1614  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1615  * @param x_off horizontal position of block from origin (0, 0)
1616  * @param y_off vertical position of block from origin (0, 0)
1617  * @param block_w width of block
1618  * @param block_h height of block
1619  * @param ref1 reference picture1 buffer at origin (0, 0)
1620  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1621  * @param current_mv current motion vector structure
1622  * @param cidx chroma component(cb, cr)
1623  */
1624 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1625                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1626 {
1627     HEVCLocalContext *lc = s->HEVClc;
1628     uint8_t *src1        = ref0->data[cidx+1];
1629     uint8_t *src2        = ref1->data[cidx+1];
1630     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1631     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1632     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1633                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1634     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1635     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1636     Mv *mv0              = &current_mv->mv[0];
1637     Mv *mv1              = &current_mv->mv[1];
1638     int hshift = s->ps.sps->hshift[1];
1639     int vshift = s->ps.sps->vshift[1];
1640
1641     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1642     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1643     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1644     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1645     intptr_t _mx0 = mx0 << (1 - hshift);
1646     intptr_t _my0 = my0 << (1 - vshift);
1647     intptr_t _mx1 = mx1 << (1 - hshift);
1648     intptr_t _my1 = my1 << (1 - vshift);
1649
1650     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1651     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1652     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1653     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1654     int idx = ff_hevc_pel_weight[block_w];
1655     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1656     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1657
1658     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1659         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1660         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1661         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1662         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1663         int buf_offset1 = EPEL_EXTRA_BEFORE *
1664                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1665
1666         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1667                                  edge_emu_stride, src1stride,
1668                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1669                                  x_off0 - EPEL_EXTRA_BEFORE,
1670                                  y_off0 - EPEL_EXTRA_BEFORE,
1671                                  pic_width, pic_height);
1672
1673         src1 = lc->edge_emu_buffer + buf_offset1;
1674         src1stride = edge_emu_stride;
1675     }
1676
1677     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1678         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1679         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1680         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1681         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1682         int buf_offset1 = EPEL_EXTRA_BEFORE *
1683                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1684
1685         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1686                                  edge_emu_stride, src2stride,
1687                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1688                                  x_off1 - EPEL_EXTRA_BEFORE,
1689                                  y_off1 - EPEL_EXTRA_BEFORE,
1690                                  pic_width, pic_height);
1691
1692         src2 = lc->edge_emu_buffer2 + buf_offset1;
1693         src2stride = edge_emu_stride;
1694     }
1695
1696     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1697                                                 block_h, _mx0, _my0, block_w);
1698     if (!weight_flag)
1699         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1700                                                        src2, src2stride, lc->tmp,
1701                                                        block_h, _mx1, _my1, block_w);
1702     else
1703         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1704                                                          src2, src2stride, lc->tmp,
1705                                                          block_h,
1706                                                          s->sh.chroma_log2_weight_denom,
1707                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1708                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1709                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1710                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1711                                                          _mx1, _my1, block_w);
1712 }
1713
1714 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1715                                 const Mv *mv, int y0, int height)
1716 {
1717     if (s->threads_type == FF_THREAD_FRAME ) {
1718         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1719
1720         ff_thread_await_progress(&ref->tf, y, 0);
1721     }
1722 }
1723
1724 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1725                                   int nPbH, int log2_cb_size, int part_idx,
1726                                   int merge_idx, MvField *mv)
1727 {
1728     HEVCLocalContext *lc = s->HEVClc;
1729     enum InterPredIdc inter_pred_idc = PRED_L0;
1730     int mvp_flag;
1731
1732     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1733     mv->pred_flag = 0;
1734     if (s->sh.slice_type == HEVC_SLICE_B)
1735         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1736
1737     if (inter_pred_idc != PRED_L1) {
1738         if (s->sh.nb_refs[L0])
1739             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1740
1741         mv->pred_flag = PF_L0;
1742         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1743         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1744         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1745                                  part_idx, merge_idx, mv, mvp_flag, 0);
1746         mv->mv[0].x += lc->pu.mvd.x;
1747         mv->mv[0].y += lc->pu.mvd.y;
1748     }
1749
1750     if (inter_pred_idc != PRED_L0) {
1751         if (s->sh.nb_refs[L1])
1752             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1753
1754         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1755             AV_ZERO32(&lc->pu.mvd);
1756         } else {
1757             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1758         }
1759
1760         mv->pred_flag += PF_L1;
1761         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1762         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1763                                  part_idx, merge_idx, mv, mvp_flag, 1);
1764         mv->mv[1].x += lc->pu.mvd.x;
1765         mv->mv[1].y += lc->pu.mvd.y;
1766     }
1767 }
1768
1769 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1770                                 int nPbW, int nPbH,
1771                                 int log2_cb_size, int partIdx, int idx)
1772 {
1773 #define POS(c_idx, x, y)                                                              \
1774     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1775                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1776     HEVCLocalContext *lc = s->HEVClc;
1777     int merge_idx = 0;
1778     struct MvField current_mv = {{{ 0 }}};
1779
1780     int min_pu_width = s->ps.sps->min_pu_width;
1781
1782     MvField *tab_mvf = s->ref->tab_mvf;
1783     RefPicList  *refPicList = s->ref->refPicList;
1784     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1785     uint8_t *dst0 = POS(0, x0, y0);
1786     uint8_t *dst1 = POS(1, x0, y0);
1787     uint8_t *dst2 = POS(2, x0, y0);
1788     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1789     int min_cb_width     = s->ps.sps->min_cb_width;
1790     int x_cb             = x0 >> log2_min_cb_size;
1791     int y_cb             = y0 >> log2_min_cb_size;
1792     int x_pu, y_pu;
1793     int i, j;
1794
1795     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1796
1797     if (!skip_flag)
1798         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1799
1800     if (skip_flag || lc->pu.merge_flag) {
1801         if (s->sh.max_num_merge_cand > 1)
1802             merge_idx = ff_hevc_merge_idx_decode(s);
1803         else
1804             merge_idx = 0;
1805
1806         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1807                                    partIdx, merge_idx, &current_mv);
1808     } else {
1809         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1810                               partIdx, merge_idx, &current_mv);
1811     }
1812
1813     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1814     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1815
1816     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1817         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1818             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1819
1820     if (current_mv.pred_flag & PF_L0) {
1821         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1822         if (!ref0)
1823             return;
1824         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1825     }
1826     if (current_mv.pred_flag & PF_L1) {
1827         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1828         if (!ref1)
1829             return;
1830         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1831     }
1832
1833     if (current_mv.pred_flag == PF_L0) {
1834         int x0_c = x0 >> s->ps.sps->hshift[1];
1835         int y0_c = y0 >> s->ps.sps->vshift[1];
1836         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1837         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1838
1839         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1840                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1841                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1842                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1843
1844         if (s->ps.sps->chroma_format_idc) {
1845             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1846                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1847                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1848             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1849                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1850                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1851         }
1852     } else if (current_mv.pred_flag == PF_L1) {
1853         int x0_c = x0 >> s->ps.sps->hshift[1];
1854         int y0_c = y0 >> s->ps.sps->vshift[1];
1855         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1856         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1857
1858         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1859                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1860                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1861                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1862
1863         if (s->ps.sps->chroma_format_idc) {
1864             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1865                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1866                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1867
1868             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1869                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1870                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1871         }
1872     } else if (current_mv.pred_flag == PF_BI) {
1873         int x0_c = x0 >> s->ps.sps->hshift[1];
1874         int y0_c = y0 >> s->ps.sps->vshift[1];
1875         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1876         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1877
1878         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1879                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1880                    ref1->frame, &current_mv.mv[1], &current_mv);
1881
1882         if (s->ps.sps->chroma_format_idc) {
1883             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1884                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1885
1886             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1887                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1888         }
1889     }
1890 }
1891
1892 /**
1893  * 8.4.1
1894  */
1895 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1896                                 int prev_intra_luma_pred_flag)
1897 {
1898     HEVCLocalContext *lc = s->HEVClc;
1899     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1900     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1901     int min_pu_width     = s->ps.sps->min_pu_width;
1902     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1903     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1904     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1905
1906     int cand_up   = (lc->ctb_up_flag || y0b) ?
1907                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1908     int cand_left = (lc->ctb_left_flag || x0b) ?
1909                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1910
1911     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1912
1913     MvField *tab_mvf = s->ref->tab_mvf;
1914     int intra_pred_mode;
1915     int candidate[3];
1916     int i, j;
1917
1918     // intra_pred_mode prediction does not cross vertical CTB boundaries
1919     if ((y0 - 1) < y_ctb)
1920         cand_up = INTRA_DC;
1921
1922     if (cand_left == cand_up) {
1923         if (cand_left < 2) {
1924             candidate[0] = INTRA_PLANAR;
1925             candidate[1] = INTRA_DC;
1926             candidate[2] = INTRA_ANGULAR_26;
1927         } else {
1928             candidate[0] = cand_left;
1929             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1930             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1931         }
1932     } else {
1933         candidate[0] = cand_left;
1934         candidate[1] = cand_up;
1935         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1936             candidate[2] = INTRA_PLANAR;
1937         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1938             candidate[2] = INTRA_DC;
1939         } else {
1940             candidate[2] = INTRA_ANGULAR_26;
1941         }
1942     }
1943
1944     if (prev_intra_luma_pred_flag) {
1945         intra_pred_mode = candidate[lc->pu.mpm_idx];
1946     } else {
1947         if (candidate[0] > candidate[1])
1948             FFSWAP(uint8_t, candidate[0], candidate[1]);
1949         if (candidate[0] > candidate[2])
1950             FFSWAP(uint8_t, candidate[0], candidate[2]);
1951         if (candidate[1] > candidate[2])
1952             FFSWAP(uint8_t, candidate[1], candidate[2]);
1953
1954         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1955         for (i = 0; i < 3; i++)
1956             if (intra_pred_mode >= candidate[i])
1957                 intra_pred_mode++;
1958     }
1959
1960     /* write the intra prediction units into the mv array */
1961     if (!size_in_pus)
1962         size_in_pus = 1;
1963     for (i = 0; i < size_in_pus; i++) {
1964         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1965                intra_pred_mode, size_in_pus);
1966
1967         for (j = 0; j < size_in_pus; j++) {
1968             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1969         }
1970     }
1971
1972     return intra_pred_mode;
1973 }
1974
1975 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1976                                           int log2_cb_size, int ct_depth)
1977 {
1978     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1979     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1980     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1981     int y;
1982
1983     for (y = 0; y < length; y++)
1984         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1985                ct_depth, length);
1986 }
1987
1988 static const uint8_t tab_mode_idx[] = {
1989      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1990     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1991
1992 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1993                                   int log2_cb_size)
1994 {
1995     HEVCLocalContext *lc = s->HEVClc;
1996     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1997     uint8_t prev_intra_luma_pred_flag[4];
1998     int split   = lc->cu.part_mode == PART_NxN;
1999     int pb_size = (1 << log2_cb_size) >> split;
2000     int side    = split + 1;
2001     int chroma_mode;
2002     int i, j;
2003
2004     for (i = 0; i < side; i++)
2005         for (j = 0; j < side; j++)
2006             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2007
2008     for (i = 0; i < side; i++) {
2009         for (j = 0; j < side; j++) {
2010             if (prev_intra_luma_pred_flag[2 * i + j])
2011                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2012             else
2013                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2014
2015             lc->pu.intra_pred_mode[2 * i + j] =
2016                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2017                                      prev_intra_luma_pred_flag[2 * i + j]);
2018         }
2019     }
2020
2021     if (s->ps.sps->chroma_format_idc == 3) {
2022         for (i = 0; i < side; i++) {
2023             for (j = 0; j < side; j++) {
2024                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2025                 if (chroma_mode != 4) {
2026                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2027                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2028                     else
2029                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2030                 } else {
2031                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2032                 }
2033             }
2034         }
2035     } else if (s->ps.sps->chroma_format_idc == 2) {
2036         int mode_idx;
2037         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2038         if (chroma_mode != 4) {
2039             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2040                 mode_idx = 34;
2041             else
2042                 mode_idx = intra_chroma_table[chroma_mode];
2043         } else {
2044             mode_idx = lc->pu.intra_pred_mode[0];
2045         }
2046         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2047     } else if (s->ps.sps->chroma_format_idc != 0) {
2048         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2049         if (chroma_mode != 4) {
2050             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2051                 lc->pu.intra_pred_mode_c[0] = 34;
2052             else
2053                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2054         } else {
2055             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2056         }
2057     }
2058 }
2059
2060 static void intra_prediction_unit_default_value(HEVCContext *s,
2061                                                 int x0, int y0,
2062                                                 int log2_cb_size)
2063 {
2064     HEVCLocalContext *lc = s->HEVClc;
2065     int pb_size          = 1 << log2_cb_size;
2066     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2067     int min_pu_width     = s->ps.sps->min_pu_width;
2068     MvField *tab_mvf     = s->ref->tab_mvf;
2069     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2070     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2071     int j, k;
2072
2073     if (size_in_pus == 0)
2074         size_in_pus = 1;
2075     for (j = 0; j < size_in_pus; j++)
2076         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2077     if (lc->cu.pred_mode == MODE_INTRA)
2078         for (j = 0; j < size_in_pus; j++)
2079             for (k = 0; k < size_in_pus; k++)
2080                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2081 }
2082
2083 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2084 {
2085     int cb_size          = 1 << log2_cb_size;
2086     HEVCLocalContext *lc = s->HEVClc;
2087     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2088     int length           = cb_size >> log2_min_cb_size;
2089     int min_cb_width     = s->ps.sps->min_cb_width;
2090     int x_cb             = x0 >> log2_min_cb_size;
2091     int y_cb             = y0 >> log2_min_cb_size;
2092     int idx              = log2_cb_size - 2;
2093     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2094     int x, y, ret;
2095
2096     lc->cu.x                = x0;
2097     lc->cu.y                = y0;
2098     lc->cu.pred_mode        = MODE_INTRA;
2099     lc->cu.part_mode        = PART_2Nx2N;
2100     lc->cu.intra_split_flag = 0;
2101
2102     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2103     for (x = 0; x < 4; x++)
2104         lc->pu.intra_pred_mode[x] = 1;
2105     if (s->ps.pps->transquant_bypass_enable_flag) {
2106         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2107         if (lc->cu.cu_transquant_bypass_flag)
2108             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2109     } else
2110         lc->cu.cu_transquant_bypass_flag = 0;
2111
2112     if (s->sh.slice_type != HEVC_SLICE_I) {
2113         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2114
2115         x = y_cb * min_cb_width + x_cb;
2116         for (y = 0; y < length; y++) {
2117             memset(&s->skip_flag[x], skip_flag, length);
2118             x += min_cb_width;
2119         }
2120         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2121     } else {
2122         x = y_cb * min_cb_width + x_cb;
2123         for (y = 0; y < length; y++) {
2124             memset(&s->skip_flag[x], 0, length);
2125             x += min_cb_width;
2126         }
2127     }
2128
2129     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2130         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2131         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2132
2133         if (!s->sh.disable_deblocking_filter_flag)
2134             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2135     } else {
2136         int pcm_flag = 0;
2137
2138         if (s->sh.slice_type != HEVC_SLICE_I)
2139             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2140         if (lc->cu.pred_mode != MODE_INTRA ||
2141             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2142             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2143             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2144                                       lc->cu.pred_mode == MODE_INTRA;
2145         }
2146
2147         if (lc->cu.pred_mode == MODE_INTRA) {
2148             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2149                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2150                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2151                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2152             }
2153             if (pcm_flag) {
2154                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2155                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2156                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2157                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2158
2159                 if (ret < 0)
2160                     return ret;
2161             } else {
2162                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2163             }
2164         } else {
2165             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2166             switch (lc->cu.part_mode) {
2167             case PART_2Nx2N:
2168                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2169                 break;
2170             case PART_2NxN:
2171                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2172                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2173                 break;
2174             case PART_Nx2N:
2175                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2176                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2177                 break;
2178             case PART_2NxnU:
2179                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2180                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2181                 break;
2182             case PART_2NxnD:
2183                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2184                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2185                 break;
2186             case PART_nLx2N:
2187                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2188                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2189                 break;
2190             case PART_nRx2N:
2191                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2192                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2193                 break;
2194             case PART_NxN:
2195                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2196                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2197                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2198                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2199                 break;
2200             }
2201         }
2202
2203         if (!pcm_flag) {
2204             int rqt_root_cbf = 1;
2205
2206             if (lc->cu.pred_mode != MODE_INTRA &&
2207                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2208                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2209             }
2210             if (rqt_root_cbf) {
2211                 const static int cbf[2] = { 0 };
2212                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2213                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2214                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2215                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2216                                          log2_cb_size,
2217                                          log2_cb_size, 0, 0, cbf, cbf);
2218                 if (ret < 0)
2219                     return ret;
2220             } else {
2221                 if (!s->sh.disable_deblocking_filter_flag)
2222                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2223             }
2224         }
2225     }
2226
2227     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2228         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2229
2230     x = y_cb * min_cb_width + x_cb;
2231     for (y = 0; y < length; y++) {
2232         memset(&s->qp_y_tab[x], lc->qp_y, length);
2233         x += min_cb_width;
2234     }
2235
2236     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2237        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2238         lc->qPy_pred = lc->qp_y;
2239     }
2240
2241     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2242
2243     return 0;
2244 }
2245
2246 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2247                                int log2_cb_size, int cb_depth)
2248 {
2249     HEVCLocalContext *lc = s->HEVClc;
2250     const int cb_size    = 1 << log2_cb_size;
2251     int ret;
2252     int split_cu;
2253
2254     lc->ct_depth = cb_depth;
2255     if (x0 + cb_size <= s->ps.sps->width  &&
2256         y0 + cb_size <= s->ps.sps->height &&
2257         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2258         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2259     } else {
2260         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2261     }
2262     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2263         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2264         lc->tu.is_cu_qp_delta_coded = 0;
2265         lc->tu.cu_qp_delta          = 0;
2266     }
2267
2268     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2269         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2270         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2271     }
2272
2273     if (split_cu) {
2274         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2275         const int cb_size_split = cb_size >> 1;
2276         const int x1 = x0 + cb_size_split;
2277         const int y1 = y0 + cb_size_split;
2278
2279         int more_data = 0;
2280
2281         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2282         if (more_data < 0)
2283             return more_data;
2284
2285         if (more_data && x1 < s->ps.sps->width) {
2286             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2287             if (more_data < 0)
2288                 return more_data;
2289         }
2290         if (more_data && y1 < s->ps.sps->height) {
2291             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2292             if (more_data < 0)
2293                 return more_data;
2294         }
2295         if (more_data && x1 < s->ps.sps->width &&
2296             y1 < s->ps.sps->height) {
2297             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2298             if (more_data < 0)
2299                 return more_data;
2300         }
2301
2302         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2303             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2304             lc->qPy_pred = lc->qp_y;
2305
2306         if (more_data)
2307             return ((x1 + cb_size_split) < s->ps.sps->width ||
2308                     (y1 + cb_size_split) < s->ps.sps->height);
2309         else
2310             return 0;
2311     } else {
2312         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2313         if (ret < 0)
2314             return ret;
2315         if ((!((x0 + cb_size) %
2316                (1 << (s->ps.sps->log2_ctb_size))) ||
2317              (x0 + cb_size >= s->ps.sps->width)) &&
2318             (!((y0 + cb_size) %
2319                (1 << (s->ps.sps->log2_ctb_size))) ||
2320              (y0 + cb_size >= s->ps.sps->height))) {
2321             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2322             return !end_of_slice_flag;
2323         } else {
2324             return 1;
2325         }
2326     }
2327
2328     return 0;
2329 }
2330
2331 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2332                                  int ctb_addr_ts)
2333 {
2334     HEVCLocalContext *lc  = s->HEVClc;
2335     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2336     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2337     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2338
2339     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2340
2341     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2342         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2343             lc->first_qp_group = 1;
2344         lc->end_of_tiles_x = s->ps.sps->width;
2345     } else if (s->ps.pps->tiles_enabled_flag) {
2346         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2347             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2348             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2349             lc->first_qp_group   = 1;
2350         }
2351     } else {
2352         lc->end_of_tiles_x = s->ps.sps->width;
2353     }
2354
2355     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2356
2357     lc->boundary_flags = 0;
2358     if (s->ps.pps->tiles_enabled_flag) {
2359         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2360             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2361         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2362             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2363         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2364             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2365         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2366             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2367     } else {
2368         if (ctb_addr_in_slice <= 0)
2369             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2370         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2371             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2372     }
2373
2374     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2375     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2376     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2377     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2378 }
2379
2380 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2381 {
2382     HEVCContext *s  = avctxt->priv_data;
2383     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2384     int more_data   = 1;
2385     int x_ctb       = 0;
2386     int y_ctb       = 0;
2387     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2388     int ret;
2389
2390     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2391         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2392         return AVERROR_INVALIDDATA;
2393     }
2394
2395     if (s->sh.dependent_slice_segment_flag) {
2396         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2397         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2398             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2399             return AVERROR_INVALIDDATA;
2400         }
2401     }
2402
2403     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2404         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2405
2406         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2407         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2408         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2409
2410         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2411         if (ret < 0) {
2412             s->tab_slice_address[ctb_addr_rs] = -1;
2413             return ret;
2414         }
2415
2416         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2417
2418         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2419         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2420         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2421
2422         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2423         if (more_data < 0) {
2424             s->tab_slice_address[ctb_addr_rs] = -1;
2425             return more_data;
2426         }
2427
2428
2429         ctb_addr_ts++;
2430         ff_hevc_save_states(s, ctb_addr_ts);
2431         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2432     }
2433
2434     if (x_ctb + ctb_size >= s->ps.sps->width &&
2435         y_ctb + ctb_size >= s->ps.sps->height)
2436         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2437
2438     return ctb_addr_ts;
2439 }
2440
2441 static int hls_slice_data(HEVCContext *s)
2442 {
2443     int arg[2];
2444     int ret[2];
2445
2446     arg[0] = 0;
2447     arg[1] = 1;
2448
2449     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2450     return ret[0];
2451 }
2452 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2453 {
2454     HEVCContext *s1  = avctxt->priv_data, *s;
2455     HEVCLocalContext *lc;
2456     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2457     int more_data   = 1;
2458     int *ctb_row_p    = input_ctb_row;
2459     int ctb_row = ctb_row_p[job];
2460     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2461     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2462     int thread = ctb_row % s1->threads_number;
2463     int ret;
2464
2465     s = s1->sList[self_id];
2466     lc = s->HEVClc;
2467
2468     if(ctb_row) {
2469         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2470         if (ret < 0)
2471             goto error;
2472         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2473     }
2474
2475     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2476         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2477         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2478
2479         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2480
2481         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2482
2483         if (atomic_load(&s1->wpp_err)) {
2484             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2485             return 0;
2486         }
2487
2488         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2489         if (ret < 0)
2490             goto error;
2491         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2492         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2493
2494         if (more_data < 0) {
2495             ret = more_data;
2496             goto error;
2497         }
2498
2499         ctb_addr_ts++;
2500
2501         ff_hevc_save_states(s, ctb_addr_ts);
2502         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2503         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2504
2505         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2506             atomic_store(&s1->wpp_err, 1);
2507             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2508             return 0;
2509         }
2510
2511         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2512             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2513             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2514             return ctb_addr_ts;
2515         }
2516         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2517         x_ctb+=ctb_size;
2518
2519         if(x_ctb >= s->ps.sps->width) {
2520             break;
2521         }
2522     }
2523     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2524
2525     return 0;
2526 error:
2527     s->tab_slice_address[ctb_addr_rs] = -1;
2528     atomic_store(&s1->wpp_err, 1);
2529     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2530     return ret;
2531 }
2532
2533 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2534 {
2535     const uint8_t *data = nal->data;
2536     int length          = nal->size;
2537     HEVCLocalContext *lc = s->HEVClc;
2538     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2539     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2540     int64_t offset;
2541     int64_t startheader, cmpt = 0;
2542     int i, j, res = 0;
2543
2544     if (!ret || !arg) {
2545         av_free(ret);
2546         av_free(arg);
2547         return AVERROR(ENOMEM);
2548     }
2549
2550     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2551         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2552             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2553             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2554         );
2555         res = AVERROR_INVALIDDATA;
2556         goto error;
2557     }
2558
2559     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2560
2561     if (!s->sList[1]) {
2562         for (i = 1; i < s->threads_number; i++) {
2563             s->sList[i] = av_malloc(sizeof(HEVCContext));
2564             memcpy(s->sList[i], s, sizeof(HEVCContext));
2565             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2566             s->sList[i]->HEVClc = s->HEVClcList[i];
2567         }
2568     }
2569
2570     offset = (lc->gb.index >> 3);
2571
2572     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2573         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2574             startheader--;
2575             cmpt++;
2576         }
2577     }
2578
2579     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2580         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2581         for (j = 0, cmpt = 0, startheader = offset
2582              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2583             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2584                 startheader--;
2585                 cmpt++;
2586             }
2587         }
2588         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2589         s->sh.offset[i - 1] = offset;
2590
2591     }
2592     if (s->sh.num_entry_point_offsets != 0) {
2593         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2594         if (length < offset) {
2595             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2596             res = AVERROR_INVALIDDATA;
2597             goto error;
2598         }
2599         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2600         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2601
2602     }
2603     s->data = data;
2604
2605     for (i = 1; i < s->threads_number; i++) {
2606         s->sList[i]->HEVClc->first_qp_group = 1;
2607         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2608         memcpy(s->sList[i], s, sizeof(HEVCContext));
2609         s->sList[i]->HEVClc = s->HEVClcList[i];
2610     }
2611
2612     atomic_store(&s->wpp_err, 0);
2613     ff_reset_entries(s->avctx);
2614
2615     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2616         arg[i] = i;
2617         ret[i] = 0;
2618     }
2619
2620     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2621         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2622
2623     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2624         res += ret[i];
2625 error:
2626     av_free(ret);
2627     av_free(arg);
2628     return res;
2629 }
2630
2631 static int set_side_data(HEVCContext *s)
2632 {
2633     AVFrame *out = s->ref->frame;
2634
2635     if (s->sei.frame_packing.present &&
2636         s->sei.frame_packing.arrangement_type >= 3 &&
2637         s->sei.frame_packing.arrangement_type <= 5 &&
2638         s->sei.frame_packing.content_interpretation_type > 0 &&
2639         s->sei.frame_packing.content_interpretation_type < 3) {
2640         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2641         if (!stereo)
2642             return AVERROR(ENOMEM);
2643
2644         switch (s->sei.frame_packing.arrangement_type) {
2645         case 3:
2646             if (s->sei.frame_packing.quincunx_subsampling)
2647                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2648             else
2649                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2650             break;
2651         case 4:
2652             stereo->type = AV_STEREO3D_TOPBOTTOM;
2653             break;
2654         case 5:
2655             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2656             break;
2657         }
2658
2659         if (s->sei.frame_packing.content_interpretation_type == 2)
2660             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2661
2662         if (s->sei.frame_packing.arrangement_type == 5) {
2663             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2664                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2665             else
2666                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2667         }
2668     }
2669
2670     if (s->sei.display_orientation.present &&
2671         (s->sei.display_orientation.anticlockwise_rotation ||
2672          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2673         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2674         AVFrameSideData *rotation = av_frame_new_side_data(out,
2675                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2676                                                            sizeof(int32_t) * 9);
2677         if (!rotation)
2678             return AVERROR(ENOMEM);
2679
2680         av_display_rotation_set((int32_t *)rotation->data, angle);
2681         av_display_matrix_flip((int32_t *)rotation->data,
2682                                s->sei.display_orientation.hflip,
2683                                s->sei.display_orientation.vflip);
2684     }
2685
2686     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2687     // so the side data persists for the entire coded video sequence.
2688     if (s->sei.mastering_display.present > 0 &&
2689         IS_IRAP(s) && s->no_rasl_output_flag) {
2690         s->sei.mastering_display.present--;
2691     }
2692     if (s->sei.mastering_display.present) {
2693         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2694         const int mapping[3] = {2, 0, 1};
2695         const int chroma_den = 50000;
2696         const int luma_den = 10000;
2697         int i;
2698         AVMasteringDisplayMetadata *metadata =
2699             av_mastering_display_metadata_create_side_data(out);
2700         if (!metadata)
2701             return AVERROR(ENOMEM);
2702
2703         for (i = 0; i < 3; i++) {
2704             const int j = mapping[i];
2705             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2706             metadata->display_primaries[i][0].den = chroma_den;
2707             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2708             metadata->display_primaries[i][1].den = chroma_den;
2709         }
2710         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2711         metadata->white_point[0].den = chroma_den;
2712         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2713         metadata->white_point[1].den = chroma_den;
2714
2715         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2716         metadata->max_luminance.den = luma_den;
2717         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2718         metadata->min_luminance.den = luma_den;
2719         metadata->has_luminance = 1;
2720         metadata->has_primaries = 1;
2721
2722         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2723         av_log(s->avctx, AV_LOG_DEBUG,
2724                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2725                av_q2d(metadata->display_primaries[0][0]),
2726                av_q2d(metadata->display_primaries[0][1]),
2727                av_q2d(metadata->display_primaries[1][0]),
2728                av_q2d(metadata->display_primaries[1][1]),
2729                av_q2d(metadata->display_primaries[2][0]),
2730                av_q2d(metadata->display_primaries[2][1]),
2731                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2732         av_log(s->avctx, AV_LOG_DEBUG,
2733                "min_luminance=%f, max_luminance=%f\n",
2734                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2735     }
2736     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2737     // so the side data persists for the entire coded video sequence.
2738     if (s->sei.content_light.present > 0 &&
2739         IS_IRAP(s) && s->no_rasl_output_flag) {
2740         s->sei.content_light.present--;
2741     }
2742     if (s->sei.content_light.present) {
2743         AVContentLightMetadata *metadata =
2744             av_content_light_metadata_create_side_data(out);
2745         if (!metadata)
2746             return AVERROR(ENOMEM);
2747         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2748         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2749
2750         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2751         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2752                metadata->MaxCLL, metadata->MaxFALL);
2753     }
2754
2755     if (s->sei.a53_caption.a53_caption) {
2756         AVFrameSideData* sd = av_frame_new_side_data(out,
2757                                                      AV_FRAME_DATA_A53_CC,
2758                                                      s->sei.a53_caption.a53_caption_size);
2759         if (sd)
2760             memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
2761         av_freep(&s->sei.a53_caption.a53_caption);
2762         s->sei.a53_caption.a53_caption_size = 0;
2763         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2764     }
2765
2766     if (s->sei.alternative_transfer.present &&
2767         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
2768         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2769         s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2770     }
2771
2772     return 0;
2773 }
2774
2775 static int hevc_frame_start(HEVCContext *s)
2776 {
2777     HEVCLocalContext *lc = s->HEVClc;
2778     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2779                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2780     int ret;
2781
2782     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2783     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2784     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2785     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2786     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2787
2788     s->is_decoded        = 0;
2789     s->first_nal_type    = s->nal_unit_type;
2790
2791     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2792
2793     if (s->ps.pps->tiles_enabled_flag)
2794         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2795
2796     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2797     if (ret < 0)
2798         goto fail;
2799
2800     ret = ff_hevc_frame_rps(s);
2801     if (ret < 0) {
2802         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2803         goto fail;
2804     }
2805
2806     s->ref->frame->key_frame = IS_IRAP(s);
2807
2808     ret = set_side_data(s);
2809     if (ret < 0)
2810         goto fail;
2811
2812     s->frame->pict_type = 3 - s->sh.slice_type;
2813
2814     if (!IS_IRAP(s))
2815         ff_hevc_bump_frame(s);
2816
2817     av_frame_unref(s->output_frame);
2818     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2819     if (ret < 0)
2820         goto fail;
2821
2822     if (!s->avctx->hwaccel)
2823         ff_thread_finish_setup(s->avctx);
2824
2825     return 0;
2826
2827 fail:
2828     if (s->ref)
2829         ff_hevc_unref_frame(s, s->ref, ~0);
2830     s->ref = NULL;
2831     return ret;
2832 }
2833
2834 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2835 {
2836     HEVCLocalContext *lc = s->HEVClc;
2837     GetBitContext *gb    = &lc->gb;
2838     int ctb_addr_ts, ret;
2839
2840     *gb              = nal->gb;
2841     s->nal_unit_type = nal->type;
2842     s->temporal_id   = nal->temporal_id;
2843
2844     switch (s->nal_unit_type) {
2845     case HEVC_NAL_VPS:
2846         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2847             ret = s->avctx->hwaccel->decode_params(s->avctx,
2848                                                    nal->type,
2849                                                    nal->raw_data,
2850                                                    nal->raw_size);
2851             if (ret < 0)
2852                 goto fail;
2853         }
2854         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2855         if (ret < 0)
2856             goto fail;
2857         break;
2858     case HEVC_NAL_SPS:
2859         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2860             ret = s->avctx->hwaccel->decode_params(s->avctx,
2861                                                    nal->type,
2862                                                    nal->raw_data,
2863                                                    nal->raw_size);
2864             if (ret < 0)
2865                 goto fail;
2866         }
2867         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2868                                      s->apply_defdispwin);
2869         if (ret < 0)
2870             goto fail;
2871         break;
2872     case HEVC_NAL_PPS:
2873         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2874             ret = s->avctx->hwaccel->decode_params(s->avctx,
2875                                                    nal->type,
2876                                                    nal->raw_data,
2877                                                    nal->raw_size);
2878             if (ret < 0)
2879                 goto fail;
2880         }
2881         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2882         if (ret < 0)
2883             goto fail;
2884         break;
2885     case HEVC_NAL_SEI_PREFIX:
2886     case HEVC_NAL_SEI_SUFFIX:
2887         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2888             ret = s->avctx->hwaccel->decode_params(s->avctx,
2889                                                    nal->type,
2890                                                    nal->raw_data,
2891                                                    nal->raw_size);
2892             if (ret < 0)
2893                 goto fail;
2894         }
2895         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2896         if (ret < 0)
2897             goto fail;
2898         break;
2899     case HEVC_NAL_TRAIL_R:
2900     case HEVC_NAL_TRAIL_N:
2901     case HEVC_NAL_TSA_N:
2902     case HEVC_NAL_TSA_R:
2903     case HEVC_NAL_STSA_N:
2904     case HEVC_NAL_STSA_R:
2905     case HEVC_NAL_BLA_W_LP:
2906     case HEVC_NAL_BLA_W_RADL:
2907     case HEVC_NAL_BLA_N_LP:
2908     case HEVC_NAL_IDR_W_RADL:
2909     case HEVC_NAL_IDR_N_LP:
2910     case HEVC_NAL_CRA_NUT:
2911     case HEVC_NAL_RADL_N:
2912     case HEVC_NAL_RADL_R:
2913     case HEVC_NAL_RASL_N:
2914     case HEVC_NAL_RASL_R:
2915         ret = hls_slice_header(s);
2916         if (ret < 0)
2917             return ret;
2918
2919         if (
2920             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
2921             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2922             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2923             break;
2924         }
2925
2926         if (s->sh.first_slice_in_pic_flag) {
2927             if (s->max_ra == INT_MAX) {
2928                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2929                     s->max_ra = s->poc;
2930                 } else {
2931                     if (IS_IDR(s))
2932                         s->max_ra = INT_MIN;
2933                 }
2934             }
2935
2936             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2937                 s->poc <= s->max_ra) {
2938                 s->is_decoded = 0;
2939                 break;
2940             } else {
2941                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2942                     s->max_ra = INT_MIN;
2943             }
2944
2945             ret = hevc_frame_start(s);
2946             if (ret < 0)
2947                 return ret;
2948         } else if (!s->ref) {
2949             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2950             goto fail;
2951         }
2952
2953         if (s->nal_unit_type != s->first_nal_type) {
2954             av_log(s->avctx, AV_LOG_ERROR,
2955                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2956                    s->first_nal_type, s->nal_unit_type);
2957             return AVERROR_INVALIDDATA;
2958         }
2959
2960         if (!s->sh.dependent_slice_segment_flag &&
2961             s->sh.slice_type != HEVC_SLICE_I) {
2962             ret = ff_hevc_slice_rpl(s);
2963             if (ret < 0) {
2964                 av_log(s->avctx, AV_LOG_WARNING,
2965                        "Error constructing the reference lists for the current slice.\n");
2966                 goto fail;
2967             }
2968         }
2969
2970         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2971             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2972             if (ret < 0)
2973                 goto fail;
2974         }
2975
2976         if (s->avctx->hwaccel) {
2977             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2978             if (ret < 0)
2979                 goto fail;
2980         } else {
2981             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2982                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2983             else
2984                 ctb_addr_ts = hls_slice_data(s);
2985             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2986                 s->is_decoded = 1;
2987             }
2988
2989             if (ctb_addr_ts < 0) {
2990                 ret = ctb_addr_ts;
2991                 goto fail;
2992             }
2993         }
2994         break;
2995     case HEVC_NAL_EOS_NUT:
2996     case HEVC_NAL_EOB_NUT:
2997         s->seq_decode = (s->seq_decode + 1) & 0xff;
2998         s->max_ra     = INT_MAX;
2999         break;
3000     case HEVC_NAL_AUD:
3001     case HEVC_NAL_FD_NUT:
3002         break;
3003     default:
3004         av_log(s->avctx, AV_LOG_INFO,
3005                "Skipping NAL unit %d\n", s->nal_unit_type);
3006     }
3007
3008     return 0;
3009 fail:
3010     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3011         return ret;
3012     return 0;
3013 }
3014
3015 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3016 {
3017     int i, ret = 0;
3018     int eos_at_start = 1;
3019
3020     s->ref = NULL;
3021     s->last_eos = s->eos;
3022     s->eos = 0;
3023
3024     /* split the input packet into NAL units, so we know the upper bound on the
3025      * number of slices in the frame */
3026     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3027                                 s->nal_length_size, s->avctx->codec_id, 1);
3028     if (ret < 0) {
3029         av_log(s->avctx, AV_LOG_ERROR,
3030                "Error splitting the input into NAL units.\n");
3031         return ret;
3032     }
3033
3034     for (i = 0; i < s->pkt.nb_nals; i++) {
3035         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3036             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3037             if (eos_at_start) {
3038                 s->last_eos = 1;
3039             } else {
3040                 s->eos = 1;
3041             }
3042         } else {
3043             eos_at_start = 0;
3044         }
3045     }
3046
3047     /* decode the NAL units */
3048     for (i = 0; i < s->pkt.nb_nals; i++) {
3049         H2645NAL *nal = &s->pkt.nals[i];
3050
3051         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3052             (s->avctx->skip_frame >= AVDISCARD_NONREF
3053             && ff_hevc_nal_is_nonref(nal->type)))
3054             continue;
3055
3056         ret = decode_nal_unit(s, nal);
3057         if (ret < 0) {
3058             av_log(s->avctx, AV_LOG_WARNING,
3059                    "Error parsing NAL unit #%d.\n", i);
3060             goto fail;
3061         }
3062     }
3063
3064 fail:
3065     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3066         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3067
3068     return ret;
3069 }
3070
3071 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3072 {
3073     int i;
3074     for (i = 0; i < 16; i++)
3075         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3076 }
3077
3078 static int verify_md5(HEVCContext *s, AVFrame *frame)
3079 {
3080     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3081     int pixel_shift;
3082     int i, j;
3083
3084     if (!desc)
3085         return AVERROR(EINVAL);
3086
3087     pixel_shift = desc->comp[0].depth > 8;
3088
3089     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3090            s->poc);
3091
3092     /* the checksums are LE, so we have to byteswap for >8bpp formats
3093      * on BE arches */
3094 #if HAVE_BIGENDIAN
3095     if (pixel_shift && !s->checksum_buf) {
3096         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3097                        FFMAX3(frame->linesize[0], frame->linesize[1],
3098                               frame->linesize[2]));
3099         if (!s->checksum_buf)
3100             return AVERROR(ENOMEM);
3101     }
3102 #endif
3103
3104     for (i = 0; frame->data[i]; i++) {
3105         int width  = s->avctx->coded_width;
3106         int height = s->avctx->coded_height;
3107         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3108         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3109         uint8_t md5[16];
3110
3111         av_md5_init(s->md5_ctx);
3112         for (j = 0; j < h; j++) {
3113             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3114 #if HAVE_BIGENDIAN
3115             if (pixel_shift) {
3116                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3117                                     (const uint16_t *) src, w);
3118                 src = s->checksum_buf;
3119             }
3120 #endif
3121             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3122         }
3123         av_md5_final(s->md5_ctx, md5);
3124
3125         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3126             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3127             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3128             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3129         } else {
3130             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3131             print_md5(s->avctx, AV_LOG_ERROR, md5);
3132             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3133             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3134             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3135             return AVERROR_INVALIDDATA;
3136         }
3137     }
3138
3139     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3140
3141     return 0;
3142 }
3143
3144 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3145 {
3146     int ret, i;
3147
3148     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3149                                    &s->nal_length_size, s->avctx->err_recognition,
3150                                    s->apply_defdispwin, s->avctx);
3151     if (ret < 0)
3152         return ret;
3153
3154     /* export stream parameters from the first SPS */
3155     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3156         if (first && s->ps.sps_list[i]) {
3157             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3158             export_stream_params(s->avctx, &s->ps, sps);
3159             break;
3160         }
3161     }
3162
3163     return 0;
3164 }
3165
3166 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3167                              AVPacket *avpkt)
3168 {
3169     int ret;
3170     int new_extradata_size;
3171     uint8_t *new_extradata;
3172     HEVCContext *s = avctx->priv_data;
3173
3174     if (!avpkt->size) {
3175         ret = ff_hevc_output_frame(s, data, 1);
3176         if (ret < 0)
3177             return ret;
3178
3179         *got_output = ret;
3180         return 0;
3181     }
3182
3183     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3184                                             &new_extradata_size);
3185     if (new_extradata && new_extradata_size > 0) {
3186         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3187         if (ret < 0)
3188             return ret;
3189     }
3190
3191     s->ref = NULL;
3192     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3193     if (ret < 0)
3194         return ret;
3195
3196     if (avctx->hwaccel) {
3197         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3198             av_log(avctx, AV_LOG_ERROR,
3199                    "hardware accelerator failed to decode picture\n");
3200             ff_hevc_unref_frame(s, s->ref, ~0);
3201             return ret;
3202         }
3203     } else {
3204         /* verify the SEI checksum */
3205         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3206             s->sei.picture_hash.is_md5) {
3207             ret = verify_md5(s, s->ref->frame);
3208             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3209                 ff_hevc_unref_frame(s, s->ref, ~0);
3210                 return ret;
3211             }
3212         }
3213     }
3214     s->sei.picture_hash.is_md5 = 0;
3215
3216     if (s->is_decoded) {
3217         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3218         s->is_decoded = 0;
3219     }
3220
3221     if (s->output_frame->buf[0]) {
3222         av_frame_move_ref(data, s->output_frame);
3223         *got_output = 1;
3224     }
3225
3226     return avpkt->size;
3227 }
3228
3229 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3230 {
3231     int ret;
3232
3233     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3234     if (ret < 0)
3235         return ret;
3236
3237     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3238     if (!dst->tab_mvf_buf)
3239         goto fail;
3240     dst->tab_mvf = src->tab_mvf;
3241
3242     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3243     if (!dst->rpl_tab_buf)
3244         goto fail;
3245     dst->rpl_tab = src->rpl_tab;
3246
3247     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3248     if (!dst->rpl_buf)
3249         goto fail;
3250
3251     dst->poc        = src->poc;
3252     dst->ctb_count  = src->ctb_count;
3253     dst->flags      = src->flags;
3254     dst->sequence   = src->sequence;
3255
3256     if (src->hwaccel_picture_private) {
3257         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3258         if (!dst->hwaccel_priv_buf)
3259             goto fail;
3260         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3261     }
3262
3263     return 0;
3264 fail:
3265     ff_hevc_unref_frame(s, dst, ~0);
3266     return AVERROR(ENOMEM);
3267 }
3268
3269 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3270 {
3271     HEVCContext       *s = avctx->priv_data;
3272     int i;
3273
3274     pic_arrays_free(s);
3275
3276     av_freep(&s->md5_ctx);
3277
3278     av_freep(&s->cabac_state);
3279
3280     for (i = 0; i < 3; i++) {
3281         av_freep(&s->sao_pixel_buffer_h[i]);
3282         av_freep(&s->sao_pixel_buffer_v[i]);
3283     }
3284     av_frame_free(&s->output_frame);
3285
3286     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3287         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3288         av_frame_free(&s->DPB[i].frame);
3289     }
3290
3291     ff_hevc_ps_uninit(&s->ps);
3292
3293     av_freep(&s->sh.entry_point_offset);
3294     av_freep(&s->sh.offset);
3295     av_freep(&s->sh.size);
3296
3297     for (i = 1; i < s->threads_number; i++) {
3298         HEVCLocalContext *lc = s->HEVClcList[i];
3299         if (lc) {
3300             av_freep(&s->HEVClcList[i]);
3301             av_freep(&s->sList[i]);
3302         }
3303     }
3304     if (s->HEVClc == s->HEVClcList[0])
3305         s->HEVClc = NULL;
3306     av_freep(&s->HEVClcList[0]);
3307
3308     ff_h2645_packet_uninit(&s->pkt);
3309
3310     return 0;
3311 }
3312
3313 static av_cold int hevc_init_context(AVCodecContext *avctx)
3314 {
3315     HEVCContext *s = avctx->priv_data;
3316     int i;
3317
3318     s->avctx = avctx;
3319
3320     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3321     if (!s->HEVClc)
3322         goto fail;
3323     s->HEVClcList[0] = s->HEVClc;
3324     s->sList[0] = s;
3325
3326     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3327     if (!s->cabac_state)
3328         goto fail;
3329
3330     s->output_frame = av_frame_alloc();
3331     if (!s->output_frame)
3332         goto fail;
3333
3334     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3335         s->DPB[i].frame = av_frame_alloc();
3336         if (!s->DPB[i].frame)
3337             goto fail;
3338         s->DPB[i].tf.f = s->DPB[i].frame;
3339     }
3340
3341     s->max_ra = INT_MAX;
3342
3343     s->md5_ctx = av_md5_alloc();
3344     if (!s->md5_ctx)
3345         goto fail;
3346
3347     ff_bswapdsp_init(&s->bdsp);
3348
3349     s->context_initialized = 1;
3350     s->eos = 0;
3351
3352     ff_hevc_reset_sei(&s->sei);
3353
3354     return 0;
3355
3356 fail:
3357     hevc_decode_free(avctx);
3358     return AVERROR(ENOMEM);
3359 }
3360
3361 #if HAVE_THREADS
3362 static int hevc_update_thread_context(AVCodecContext *dst,
3363                                       const AVCodecContext *src)
3364 {
3365     HEVCContext *s  = dst->priv_data;
3366     HEVCContext *s0 = src->priv_data;
3367     int i, ret;
3368
3369     if (!s->context_initialized) {
3370         ret = hevc_init_context(dst);
3371         if (ret < 0)
3372             return ret;
3373     }
3374
3375     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3376         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3377         if (s0->DPB[i].frame->buf[0]) {
3378             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3379             if (ret < 0)
3380                 return ret;
3381         }
3382     }
3383
3384     if (s->ps.sps != s0->ps.sps)
3385         s->ps.sps = NULL;
3386     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3387         av_buffer_unref(&s->ps.vps_list[i]);
3388         if (s0->ps.vps_list[i]) {
3389             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3390             if (!s->ps.vps_list[i])
3391                 return AVERROR(ENOMEM);
3392         }
3393     }
3394
3395     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3396         av_buffer_unref(&s->ps.sps_list[i]);
3397         if (s0->ps.sps_list[i]) {
3398             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3399             if (!s->ps.sps_list[i])
3400                 return AVERROR(ENOMEM);
3401         }
3402     }
3403
3404     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3405         av_buffer_unref(&s->ps.pps_list[i]);
3406         if (s0->ps.pps_list[i]) {
3407             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3408             if (!s->ps.pps_list[i])
3409                 return AVERROR(ENOMEM);
3410         }
3411     }
3412
3413     if (s->ps.sps != s0->ps.sps)
3414         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3415             return ret;
3416
3417     s->seq_decode = s0->seq_decode;
3418     s->seq_output = s0->seq_output;
3419     s->pocTid0    = s0->pocTid0;
3420     s->max_ra     = s0->max_ra;
3421     s->eos        = s0->eos;
3422     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3423
3424     s->is_nalff        = s0->is_nalff;
3425     s->nal_length_size = s0->nal_length_size;
3426
3427     s->threads_number      = s0->threads_number;
3428     s->threads_type        = s0->threads_type;
3429
3430     if (s0->eos) {
3431         s->seq_decode = (s->seq_decode + 1) & 0xff;
3432         s->max_ra = INT_MAX;
3433     }
3434
3435     s->sei.frame_packing        = s0->sei.frame_packing;
3436     s->sei.display_orientation  = s0->sei.display_orientation;
3437     s->sei.mastering_display    = s0->sei.mastering_display;
3438     s->sei.content_light        = s0->sei.content_light;
3439     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3440
3441     return 0;
3442 }
3443 #endif
3444
3445 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3446 {
3447     HEVCContext *s = avctx->priv_data;
3448     int ret;
3449
3450     avctx->internal->allocate_progress = 1;
3451
3452     ret = hevc_init_context(avctx);
3453     if (ret < 0)
3454         return ret;
3455
3456     s->enable_parallel_tiles = 0;
3457     s->sei.picture_timing.picture_struct = 0;
3458     s->eos = 1;
3459
3460     atomic_init(&s->wpp_err, 0);
3461
3462     if(avctx->active_thread_type & FF_THREAD_SLICE)
3463         s->threads_number = avctx->thread_count;
3464     else
3465         s->threads_number = 1;
3466
3467     if (avctx->extradata_size > 0 && avctx->extradata) {
3468         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3469         if (ret < 0) {
3470             hevc_decode_free(avctx);
3471             return ret;
3472         }
3473     }
3474
3475     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3476             s->threads_type = FF_THREAD_FRAME;
3477         else
3478             s->threads_type = FF_THREAD_SLICE;
3479
3480     return 0;
3481 }
3482
3483 #if HAVE_THREADS
3484 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3485 {
3486     HEVCContext *s = avctx->priv_data;
3487     int ret;
3488
3489     memset(s, 0, sizeof(*s));
3490
3491     ret = hevc_init_context(avctx);
3492     if (ret < 0)
3493         return ret;
3494
3495     return 0;
3496 }
3497 #endif
3498
3499 static void hevc_decode_flush(AVCodecContext *avctx)
3500 {
3501     HEVCContext *s = avctx->priv_data;
3502     ff_hevc_flush_dpb(s);
3503     s->max_ra = INT_MAX;
3504     s->eos = 1;
3505 }
3506
3507 #define OFFSET(x) offsetof(HEVCContext, x)
3508 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3509
3510 static const AVOption options[] = {
3511     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3512         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3513     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3514         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3515     { NULL },
3516 };
3517
3518 static const AVClass hevc_decoder_class = {
3519     .class_name = "HEVC decoder",
3520     .item_name  = av_default_item_name,
3521     .option     = options,
3522     .version    = LIBAVUTIL_VERSION_INT,
3523 };
3524
3525 AVCodec ff_hevc_decoder = {
3526     .name                  = "hevc",
3527     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3528     .type                  = AVMEDIA_TYPE_VIDEO,
3529     .id                    = AV_CODEC_ID_HEVC,
3530     .priv_data_size        = sizeof(HEVCContext),
3531     .priv_class            = &hevc_decoder_class,
3532     .init                  = hevc_decode_init,
3533     .close                 = hevc_decode_free,
3534     .decode                = hevc_decode_frame,
3535     .flush                 = hevc_decode_flush,
3536     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3537     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
3538     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3539                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3540     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3541     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3542     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3543 #if CONFIG_HEVC_DXVA2_HWACCEL
3544                                HWACCEL_DXVA2(hevc),
3545 #endif
3546 #if CONFIG_HEVC_D3D11VA_HWACCEL
3547                                HWACCEL_D3D11VA(hevc),
3548 #endif
3549 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3550                                HWACCEL_D3D11VA2(hevc),
3551 #endif
3552 #if CONFIG_HEVC_NVDEC_HWACCEL
3553                                HWACCEL_NVDEC(hevc),
3554 #endif
3555 #if CONFIG_HEVC_VAAPI_HWACCEL
3556                                HWACCEL_VAAPI(hevc),
3557 #endif
3558 #if CONFIG_HEVC_VDPAU_HWACCEL
3559                                HWACCEL_VDPAU(hevc),
3560 #endif
3561 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3562                                HWACCEL_VIDEOTOOLBOX(hevc),
3563 #endif
3564                                NULL
3565                            },
3566 };