4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/display.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
45 * NOTE: Each function hls_foo correspond to the function foo in the
46 * specification (HLS stands for High Level Syntax).
53 /* free everything allocated by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
57 av_freep(&s->deblock);
59 av_freep(&s->skip_flag);
60 av_freep(&s->tab_ct_depth);
62 av_freep(&s->tab_ipm);
63 av_freep(&s->cbf_luma);
66 av_freep(&s->qp_y_tab);
67 av_freep(&s->tab_slice_address);
68 av_freep(&s->filter_slice_edges);
70 av_freep(&s->horizontal_bs);
71 av_freep(&s->vertical_bs);
73 av_freep(&s->sh.entry_point_offset);
74 av_freep(&s->sh.size);
75 av_freep(&s->sh.offset);
77 av_buffer_pool_uninit(&s->tab_mvf_pool);
78 av_buffer_pool_uninit(&s->rpl_tab_pool);
81 /* allocate arrays that depend on frame dimensions */
82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 int log2_min_cb_size = sps->log2_min_cb_size;
85 int width = sps->width;
86 int height = sps->height;
87 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
88 ((height >> log2_min_cb_size) + 1);
89 int ctb_count = sps->ctb_width * sps->ctb_height;
90 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
92 s->bs_width = width >> 3;
93 s->bs_height = height >> 3;
95 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
96 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
97 if (!s->sao || !s->deblock)
100 s->skip_flag = av_malloc(pic_size_in_ctb);
101 s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
102 if (!s->skip_flag || !s->tab_ct_depth)
105 s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
106 s->tab_ipm = av_mallocz(min_pu_size);
107 s->is_pcm = av_malloc(min_pu_size);
108 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
111 s->filter_slice_edges = av_malloc(ctb_count);
112 s->tab_slice_address = av_malloc_array(pic_size_in_ctb,
113 sizeof(*s->tab_slice_address));
114 s->qp_y_tab = av_malloc_array(pic_size_in_ctb,
115 sizeof(*s->qp_y_tab));
116 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
119 s->horizontal_bs = av_mallocz_array(2 * s->bs_width, (s->bs_height + 1));
120 s->vertical_bs = av_mallocz_array(2 * s->bs_width, (s->bs_height + 1));
121 if (!s->horizontal_bs || !s->vertical_bs)
124 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
126 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
128 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135 return AVERROR(ENOMEM);
138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 uint8_t luma_weight_l0_flag[16];
143 uint8_t chroma_weight_l0_flag[16];
144 uint8_t luma_weight_l1_flag[16];
145 uint8_t chroma_weight_l1_flag[16];
147 s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
148 if (s->sps->chroma_format_idc != 0) {
149 int delta = get_se_golomb(gb);
150 s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
153 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
154 luma_weight_l0_flag[i] = get_bits1(gb);
155 if (!luma_weight_l0_flag[i]) {
156 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
157 s->sh.luma_offset_l0[i] = 0;
160 if (s->sps->chroma_format_idc != 0) {
161 for (i = 0; i < s->sh.nb_refs[L0]; i++)
162 chroma_weight_l0_flag[i] = get_bits1(gb);
164 for (i = 0; i < s->sh.nb_refs[L0]; i++)
165 chroma_weight_l0_flag[i] = 0;
167 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
168 if (luma_weight_l0_flag[i]) {
169 int delta_luma_weight_l0 = get_se_golomb(gb);
170 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
171 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
173 if (chroma_weight_l0_flag[i]) {
174 for (j = 0; j < 2; j++) {
175 int delta_chroma_weight_l0 = get_se_golomb(gb);
176 int delta_chroma_offset_l0 = get_se_golomb(gb);
177 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
178 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
179 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
182 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
183 s->sh.chroma_offset_l0[i][0] = 0;
184 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
185 s->sh.chroma_offset_l0[i][1] = 0;
188 if (s->sh.slice_type == B_SLICE) {
189 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
190 luma_weight_l1_flag[i] = get_bits1(gb);
191 if (!luma_weight_l1_flag[i]) {
192 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
193 s->sh.luma_offset_l1[i] = 0;
196 if (s->sps->chroma_format_idc != 0) {
197 for (i = 0; i < s->sh.nb_refs[L1]; i++)
198 chroma_weight_l1_flag[i] = get_bits1(gb);
200 for (i = 0; i < s->sh.nb_refs[L1]; i++)
201 chroma_weight_l1_flag[i] = 0;
203 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
204 if (luma_weight_l1_flag[i]) {
205 int delta_luma_weight_l1 = get_se_golomb(gb);
206 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
207 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
209 if (chroma_weight_l1_flag[i]) {
210 for (j = 0; j < 2; j++) {
211 int delta_chroma_weight_l1 = get_se_golomb(gb);
212 int delta_chroma_offset_l1 = get_se_golomb(gb);
213 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
214 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
215 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
218 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
219 s->sh.chroma_offset_l1[i][0] = 0;
220 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
221 s->sh.chroma_offset_l1[i][1] = 0;
227 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
229 const HEVCSPS *sps = s->sps;
230 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
231 int prev_delta_msb = 0;
232 unsigned int nb_sps = 0, nb_sh;
236 if (!sps->long_term_ref_pics_present_flag)
239 if (sps->num_long_term_ref_pics_sps > 0)
240 nb_sps = get_ue_golomb_long(gb);
241 nb_sh = get_ue_golomb_long(gb);
243 if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
244 return AVERROR_INVALIDDATA;
246 rps->nb_refs = nb_sh + nb_sps;
248 for (i = 0; i < rps->nb_refs; i++) {
249 uint8_t delta_poc_msb_present;
252 uint8_t lt_idx_sps = 0;
254 if (sps->num_long_term_ref_pics_sps > 1)
255 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
257 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
258 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
260 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
261 rps->used[i] = get_bits1(gb);
264 delta_poc_msb_present = get_bits1(gb);
265 if (delta_poc_msb_present) {
266 int delta = get_ue_golomb_long(gb);
268 if (i && i != nb_sps)
269 delta += prev_delta_msb;
271 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
272 prev_delta_msb = delta;
279 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
282 unsigned int num = 0, den = 0;
285 ret = pic_arrays_init(s, sps);
289 s->avctx->coded_width = sps->width;
290 s->avctx->coded_height = sps->height;
291 s->avctx->width = sps->output_width;
292 s->avctx->height = sps->output_height;
293 s->avctx->pix_fmt = sps->pix_fmt;
294 s->avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
296 ff_set_sar(s->avctx, sps->vui.sar);
298 if (sps->vui.video_signal_type_present_flag)
299 s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
302 s->avctx->color_range = AVCOL_RANGE_MPEG;
304 if (sps->vui.colour_description_present_flag) {
305 s->avctx->color_primaries = sps->vui.colour_primaries;
306 s->avctx->color_trc = sps->vui.transfer_characteristic;
307 s->avctx->colorspace = sps->vui.matrix_coeffs;
309 s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
310 s->avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
311 s->avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
314 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
315 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
316 ff_videodsp_init (&s->vdsp, sps->bit_depth);
318 if (sps->sao_enabled) {
319 av_frame_unref(s->tmp_frame);
320 ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
323 s->frame = s->tmp_frame;
327 s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
329 if (s->vps->vps_timing_info_present_flag) {
330 num = s->vps->vps_num_units_in_tick;
331 den = s->vps->vps_time_scale;
332 } else if (sps->vui.vui_timing_info_present_flag) {
333 num = sps->vui.vui_num_units_in_tick;
334 den = sps->vui.vui_time_scale;
337 if (num != 0 && den != 0)
338 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
349 static int is_sps_exist(HEVCContext *s, const HEVCSPS* last_sps)
353 for( i = 0; i < MAX_SPS_COUNT; i++)
355 if (last_sps == (HEVCSPS*)s->sps_list[i]->data)
360 static int hls_slice_header(HEVCContext *s)
362 GetBitContext *gb = &s->HEVClc->gb;
363 SliceHeader *sh = &s->sh;
367 sh->first_slice_in_pic_flag = get_bits1(gb);
368 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
369 s->seq_decode = (s->seq_decode + 1) & 0xff;
372 ff_hevc_clear_refs(s);
374 sh->no_output_of_prior_pics_flag = 0;
376 sh->no_output_of_prior_pics_flag = get_bits1(gb);
377 if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
378 sh->no_output_of_prior_pics_flag = 1;
380 sh->pps_id = get_ue_golomb_long(gb);
381 if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
382 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
383 return AVERROR_INVALIDDATA;
385 if (!sh->first_slice_in_pic_flag &&
386 s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
387 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
388 return AVERROR_INVALIDDATA;
390 s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
392 if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
393 const HEVCSPS* last_sps = s->sps;
394 s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
396 if (is_sps_exist(s, last_sps)) {
397 if (s->sps->width != last_sps->width || s->sps->height != last_sps->height ||
398 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering != last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
399 sh->no_output_of_prior_pics_flag = 0;
401 sh->no_output_of_prior_pics_flag = 0;
403 ff_hevc_clear_refs(s);
404 ret = set_sps(s, s->sps);
408 s->seq_decode = (s->seq_decode + 1) & 0xff;
412 s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
413 s->avctx->level = s->sps->ptl.general_ptl.level_idc;
415 sh->dependent_slice_segment_flag = 0;
416 if (!sh->first_slice_in_pic_flag) {
417 int slice_address_length;
419 if (s->pps->dependent_slice_segments_enabled_flag)
420 sh->dependent_slice_segment_flag = get_bits1(gb);
422 slice_address_length = av_ceil_log2(s->sps->ctb_width *
424 sh->slice_segment_addr = get_bits(gb, slice_address_length);
425 if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
426 av_log(s->avctx, AV_LOG_ERROR,
427 "Invalid slice segment address: %u.\n",
428 sh->slice_segment_addr);
429 return AVERROR_INVALIDDATA;
432 if (!sh->dependent_slice_segment_flag) {
433 sh->slice_addr = sh->slice_segment_addr;
437 sh->slice_segment_addr = sh->slice_addr = 0;
439 s->slice_initialized = 0;
442 if (!sh->dependent_slice_segment_flag) {
443 s->slice_initialized = 0;
445 for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
446 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
448 sh->slice_type = get_ue_golomb_long(gb);
449 if (!(sh->slice_type == I_SLICE ||
450 sh->slice_type == P_SLICE ||
451 sh->slice_type == B_SLICE)) {
452 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
454 return AVERROR_INVALIDDATA;
456 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
457 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
458 return AVERROR_INVALIDDATA;
461 // when flag is not present, picture is inferred to be output
462 sh->pic_output_flag = 1;
463 if (s->pps->output_flag_present_flag)
464 sh->pic_output_flag = get_bits1(gb);
466 if (s->sps->separate_colour_plane_flag)
467 sh->colour_plane_id = get_bits(gb, 2);
470 int short_term_ref_pic_set_sps_flag, poc;
472 sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
473 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
474 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
475 av_log(s->avctx, AV_LOG_WARNING,
476 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
477 if (s->avctx->err_recognition & AV_EF_EXPLODE)
478 return AVERROR_INVALIDDATA;
483 short_term_ref_pic_set_sps_flag = get_bits1(gb);
484 if (!short_term_ref_pic_set_sps_flag) {
485 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
489 sh->short_term_rps = &sh->slice_rps;
491 int numbits, rps_idx;
493 if (!s->sps->nb_st_rps) {
494 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
495 return AVERROR_INVALIDDATA;
498 numbits = av_ceil_log2(s->sps->nb_st_rps);
499 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
500 sh->short_term_rps = &s->sps->st_rps[rps_idx];
503 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
505 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
506 if (s->avctx->err_recognition & AV_EF_EXPLODE)
507 return AVERROR_INVALIDDATA;
510 if (s->sps->sps_temporal_mvp_enabled_flag)
511 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
513 sh->slice_temporal_mvp_enabled_flag = 0;
515 s->sh.short_term_rps = NULL;
520 if (s->temporal_id == 0 &&
521 s->nal_unit_type != NAL_TRAIL_N &&
522 s->nal_unit_type != NAL_TSA_N &&
523 s->nal_unit_type != NAL_STSA_N &&
524 s->nal_unit_type != NAL_RADL_N &&
525 s->nal_unit_type != NAL_RADL_R &&
526 s->nal_unit_type != NAL_RASL_N &&
527 s->nal_unit_type != NAL_RASL_R)
530 if (s->sps->sao_enabled) {
531 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
532 sh->slice_sample_adaptive_offset_flag[1] =
533 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
535 sh->slice_sample_adaptive_offset_flag[0] = 0;
536 sh->slice_sample_adaptive_offset_flag[1] = 0;
537 sh->slice_sample_adaptive_offset_flag[2] = 0;
540 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
541 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
544 sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
545 if (sh->slice_type == B_SLICE)
546 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
548 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
549 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
550 if (sh->slice_type == B_SLICE)
551 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
553 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
554 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
555 sh->nb_refs[L0], sh->nb_refs[L1]);
556 return AVERROR_INVALIDDATA;
559 sh->rpl_modification_flag[0] = 0;
560 sh->rpl_modification_flag[1] = 0;
561 nb_refs = ff_hevc_frame_nb_refs(s);
563 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
564 return AVERROR_INVALIDDATA;
567 if (s->pps->lists_modification_present_flag && nb_refs > 1) {
568 sh->rpl_modification_flag[0] = get_bits1(gb);
569 if (sh->rpl_modification_flag[0]) {
570 for (i = 0; i < sh->nb_refs[L0]; i++)
571 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
574 if (sh->slice_type == B_SLICE) {
575 sh->rpl_modification_flag[1] = get_bits1(gb);
576 if (sh->rpl_modification_flag[1] == 1)
577 for (i = 0; i < sh->nb_refs[L1]; i++)
578 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
582 if (sh->slice_type == B_SLICE)
583 sh->mvd_l1_zero_flag = get_bits1(gb);
585 if (s->pps->cabac_init_present_flag)
586 sh->cabac_init_flag = get_bits1(gb);
588 sh->cabac_init_flag = 0;
590 sh->collocated_ref_idx = 0;
591 if (sh->slice_temporal_mvp_enabled_flag) {
592 sh->collocated_list = L0;
593 if (sh->slice_type == B_SLICE)
594 sh->collocated_list = !get_bits1(gb);
596 if (sh->nb_refs[sh->collocated_list] > 1) {
597 sh->collocated_ref_idx = get_ue_golomb_long(gb);
598 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
599 av_log(s->avctx, AV_LOG_ERROR,
600 "Invalid collocated_ref_idx: %d.\n",
601 sh->collocated_ref_idx);
602 return AVERROR_INVALIDDATA;
607 if ((s->pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
608 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
609 pred_weight_table(s, gb);
612 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
613 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
614 av_log(s->avctx, AV_LOG_ERROR,
615 "Invalid number of merging MVP candidates: %d.\n",
616 sh->max_num_merge_cand);
617 return AVERROR_INVALIDDATA;
621 sh->slice_qp_delta = get_se_golomb(gb);
623 if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
624 sh->slice_cb_qp_offset = get_se_golomb(gb);
625 sh->slice_cr_qp_offset = get_se_golomb(gb);
627 sh->slice_cb_qp_offset = 0;
628 sh->slice_cr_qp_offset = 0;
631 if (s->pps->deblocking_filter_control_present_flag) {
632 int deblocking_filter_override_flag = 0;
634 if (s->pps->deblocking_filter_override_enabled_flag)
635 deblocking_filter_override_flag = get_bits1(gb);
637 if (deblocking_filter_override_flag) {
638 sh->disable_deblocking_filter_flag = get_bits1(gb);
639 if (!sh->disable_deblocking_filter_flag) {
640 sh->beta_offset = get_se_golomb(gb) * 2;
641 sh->tc_offset = get_se_golomb(gb) * 2;
644 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
645 sh->beta_offset = s->pps->beta_offset;
646 sh->tc_offset = s->pps->tc_offset;
649 sh->disable_deblocking_filter_flag = 0;
654 if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
655 (sh->slice_sample_adaptive_offset_flag[0] ||
656 sh->slice_sample_adaptive_offset_flag[1] ||
657 !sh->disable_deblocking_filter_flag)) {
658 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
660 sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
662 } else if (!s->slice_initialized) {
663 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
664 return AVERROR_INVALIDDATA;
667 sh->num_entry_point_offsets = 0;
668 if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
669 sh->num_entry_point_offsets = get_ue_golomb_long(gb);
670 if (sh->num_entry_point_offsets > 0) {
671 int offset_len = get_ue_golomb_long(gb) + 1;
672 int segments = offset_len >> 4;
673 int rest = (offset_len & 15);
674 av_freep(&sh->entry_point_offset);
675 av_freep(&sh->offset);
677 sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
678 sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
679 sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
680 if (!sh->entry_point_offset || !sh->offset || !sh->size) {
681 sh->num_entry_point_offsets = 0;
682 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
683 return AVERROR(ENOMEM);
685 for (i = 0; i < sh->num_entry_point_offsets; i++) {
687 for (j = 0; j < segments; j++) {
689 val += get_bits(gb, 16);
693 val += get_bits(gb, rest);
695 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
697 if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
698 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
699 s->threads_number = 1;
701 s->enable_parallel_tiles = 0;
703 s->enable_parallel_tiles = 0;
706 if (s->pps->slice_header_extension_present_flag) {
707 unsigned int length = get_ue_golomb_long(gb);
708 if (length*8LL > get_bits_left(gb)) {
709 av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
710 return AVERROR_INVALIDDATA;
712 for (i = 0; i < length; i++)
713 skip_bits(gb, 8); // slice_header_extension_data_byte
716 // Inferred parameters
717 sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
718 if (sh->slice_qp > 51 ||
719 sh->slice_qp < -s->sps->qp_bd_offset) {
720 av_log(s->avctx, AV_LOG_ERROR,
721 "The slice_qp %d is outside the valid range "
724 -s->sps->qp_bd_offset);
725 return AVERROR_INVALIDDATA;
728 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
730 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
731 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
732 return AVERROR_INVALIDDATA;
735 s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
737 if (!s->pps->cu_qp_delta_enabled_flag)
738 s->HEVClc->qp_y = s->sh.slice_qp;
740 s->slice_initialized = 1;
745 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
747 #define SET_SAO(elem, value) \
749 if (!sao_merge_up_flag && !sao_merge_left_flag) \
751 else if (sao_merge_left_flag) \
752 sao->elem = CTB(s->sao, rx-1, ry).elem; \
753 else if (sao_merge_up_flag) \
754 sao->elem = CTB(s->sao, rx, ry-1).elem; \
759 static void hls_sao_param(HEVCContext *s, int rx, int ry)
761 HEVCLocalContext *lc = s->HEVClc;
762 int sao_merge_left_flag = 0;
763 int sao_merge_up_flag = 0;
764 SAOParams *sao = &CTB(s->sao, rx, ry);
767 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
768 s->sh.slice_sample_adaptive_offset_flag[1]) {
770 if (lc->ctb_left_flag)
771 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
773 if (ry > 0 && !sao_merge_left_flag) {
775 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
779 for (c_idx = 0; c_idx < 3; c_idx++) {
780 int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
781 s->pps->log2_sao_offset_scale_chroma;
783 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
784 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
789 sao->type_idx[2] = sao->type_idx[1];
790 sao->eo_class[2] = sao->eo_class[1];
792 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
795 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
798 for (i = 0; i < 4; i++)
799 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
801 if (sao->type_idx[c_idx] == SAO_BAND) {
802 for (i = 0; i < 4; i++) {
803 if (sao->offset_abs[c_idx][i]) {
804 SET_SAO(offset_sign[c_idx][i],
805 ff_hevc_sao_offset_sign_decode(s));
807 sao->offset_sign[c_idx][i] = 0;
810 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
811 } else if (c_idx != 2) {
812 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
815 // Inferred parameters
816 sao->offset_val[c_idx][0] = 0;
817 for (i = 0; i < 4; i++) {
818 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
819 if (sao->type_idx[c_idx] == SAO_EDGE) {
821 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
822 } else if (sao->offset_sign[c_idx][i]) {
823 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
825 sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
833 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
834 int xBase, int yBase, int cb_xBase, int cb_yBase,
835 int log2_cb_size, int log2_trafo_size,
836 int trafo_depth, int blk_idx)
838 HEVCLocalContext *lc = s->HEVClc;
839 const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
842 if (lc->cu.pred_mode == MODE_INTRA) {
843 int trafo_size = 1 << log2_trafo_size;
844 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
846 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
849 if (lc->tt.cbf_luma ||
850 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
851 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
852 (s->sps->chroma_format_idc == 2 &&
853 (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
854 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))))) {
855 int scan_idx = SCAN_DIAG;
856 int scan_idx_c = SCAN_DIAG;
857 int cbf_luma = lc->tt.cbf_luma;
858 int cbf_chroma = SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
859 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
860 (s->sps->chroma_format_idc == 2 &&
861 (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
862 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))));
864 if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
865 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
866 if (lc->tu.cu_qp_delta != 0)
867 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
868 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
869 lc->tu.is_cu_qp_delta_coded = 1;
871 if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
872 lc->tu.cu_qp_delta > (25 + s->sps->qp_bd_offset / 2)) {
873 av_log(s->avctx, AV_LOG_ERROR,
874 "The cu_qp_delta %d is outside the valid range "
877 -(26 + s->sps->qp_bd_offset / 2),
878 (25 + s->sps->qp_bd_offset / 2));
879 return AVERROR_INVALIDDATA;
882 ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
885 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
886 if (lc->tu.intra_pred_mode >= 6 &&
887 lc->tu.intra_pred_mode <= 14) {
888 scan_idx = SCAN_VERT;
889 } else if (lc->tu.intra_pred_mode >= 22 &&
890 lc->tu.intra_pred_mode <= 30) {
891 scan_idx = SCAN_HORIZ;
894 if (lc->tu.intra_pred_mode_c >= 6 &&
895 lc->tu.intra_pred_mode_c <= 14) {
896 scan_idx_c = SCAN_VERT;
897 } else if (lc->tu.intra_pred_mode_c >= 22 &&
898 lc->tu.intra_pred_mode_c <= 30) {
899 scan_idx_c = SCAN_HORIZ;
905 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
906 if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
907 int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
908 int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
910 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
911 if (lc->cu.pred_mode == MODE_INTRA) {
912 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
913 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
915 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
916 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
917 log2_trafo_size_c, scan_idx_c, 1);
920 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
921 if (lc->cu.pred_mode == MODE_INTRA) {
922 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
923 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
925 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
926 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
927 log2_trafo_size_c, scan_idx_c, 2);
929 } else if (blk_idx == 3) {
930 int trafo_size_h = 1 << (log2_trafo_size + 1);
931 int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
932 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
933 if (lc->cu.pred_mode == MODE_INTRA) {
934 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
935 trafo_size_h, trafo_size_v);
936 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
938 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
939 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
940 log2_trafo_size, scan_idx_c, 1);
942 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
943 if (lc->cu.pred_mode == MODE_INTRA) {
944 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
945 trafo_size_h, trafo_size_v);
946 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
948 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
949 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
950 log2_trafo_size, scan_idx_c, 2);
953 } else if (lc->cu.pred_mode == MODE_INTRA) {
954 if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
955 int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
956 int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
957 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
958 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
959 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
960 if (s->sps->chroma_format_idc == 2) {
961 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
962 trafo_size_h, trafo_size_v);
963 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
964 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
966 } else if (blk_idx == 3) {
967 int trafo_size_h = 1 << (log2_trafo_size + 1);
968 int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
969 ff_hevc_set_neighbour_available(s, xBase, yBase,
970 trafo_size_h, trafo_size_v);
971 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
972 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
973 if (s->sps->chroma_format_idc == 2) {
974 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
975 trafo_size_h, trafo_size_v);
976 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
977 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
985 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
987 int cb_size = 1 << log2_cb_size;
988 int log2_min_pu_size = s->sps->log2_min_pu_size;
990 int min_pu_width = s->sps->min_pu_width;
991 int x_end = FFMIN(x0 + cb_size, s->sps->width);
992 int y_end = FFMIN(y0 + cb_size, s->sps->height);
995 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
996 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
997 s->is_pcm[i + j * min_pu_width] = 2;
1000 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1001 int xBase, int yBase, int cb_xBase, int cb_yBase,
1002 int log2_cb_size, int log2_trafo_size,
1003 int trafo_depth, int blk_idx)
1005 HEVCLocalContext *lc = s->HEVClc;
1006 uint8_t split_transform_flag;
1009 if (trafo_depth > 0 && log2_trafo_size == 2) {
1010 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1011 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1012 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1013 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1014 if (s->sps->chroma_format_idc == 2) {
1015 int xBase_cb = xBase & ((1 << log2_trafo_size) - 1);
1016 int yBase_cb = yBase & ((1 << log2_trafo_size) - 1);
1017 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1018 SAMPLE_CBF2(lc->tt.cbf_cb[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1019 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1020 SAMPLE_CBF2(lc->tt.cbf_cr[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1023 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1024 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1025 if (s->sps->chroma_format_idc == 2) {
1026 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1027 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) = 0;
1031 if (lc->cu.intra_split_flag) {
1032 if (trafo_depth == 1) {
1033 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1034 if (s->sps->chroma_format_idc == 3) {
1035 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1036 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx];
1038 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1039 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1043 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0];
1044 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1045 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1048 lc->tt.cbf_luma = 1;
1050 lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1051 lc->cu.pred_mode == MODE_INTER &&
1052 lc->cu.part_mode != PART_2Nx2N &&
1055 if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1056 log2_trafo_size > s->sps->log2_min_tb_size &&
1057 trafo_depth < lc->cu.max_trafo_depth &&
1058 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1059 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1061 split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1062 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1063 lc->tt.inter_split_flag;
1066 if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1067 if (trafo_depth == 0 ||
1068 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1069 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1070 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1071 if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1072 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1073 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1077 if (trafo_depth == 0 ||
1078 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1079 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1080 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1081 if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1082 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1083 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1088 if (split_transform_flag) {
1089 int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1090 int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1092 ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1093 log2_cb_size, log2_trafo_size - 1,
1094 trafo_depth + 1, 0);
1097 ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1098 log2_cb_size, log2_trafo_size - 1,
1099 trafo_depth + 1, 1);
1102 ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1103 log2_cb_size, log2_trafo_size - 1,
1104 trafo_depth + 1, 2);
1107 ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1108 log2_cb_size, log2_trafo_size - 1,
1109 trafo_depth + 1, 3);
1113 int min_tu_size = 1 << s->sps->log2_min_tb_size;
1114 int log2_min_tu_size = s->sps->log2_min_tb_size;
1115 int min_tu_width = s->sps->min_tb_width;
1117 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1118 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1119 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
1120 (s->sps->chroma_format_idc == 2 &&
1121 (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) ||
1122 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1)))))) {
1123 lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1126 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1127 log2_cb_size, log2_trafo_size, trafo_depth,
1131 // TODO: store cbf_luma somewhere else
1132 if (lc->tt.cbf_luma) {
1134 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1135 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1136 int x_tu = (x0 + j) >> log2_min_tu_size;
1137 int y_tu = (y0 + i) >> log2_min_tu_size;
1138 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1141 if (!s->sh.disable_deblocking_filter_flag) {
1142 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1143 if (s->pps->transquant_bypass_enable_flag &&
1144 lc->cu.cu_transquant_bypass_flag)
1145 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1151 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1153 //TODO: non-4:2:0 support
1154 HEVCLocalContext *lc = s->HEVClc;
1156 int cb_size = 1 << log2_cb_size;
1157 int stride0 = s->frame->linesize[0];
1158 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1159 int stride1 = s->frame->linesize[1];
1160 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1161 int stride2 = s->frame->linesize[2];
1162 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1164 int length = cb_size * cb_size * s->sps->pcm.bit_depth +
1165 (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1166 ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1167 s->sps->pcm.bit_depth_chroma;
1168 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1171 if (!s->sh.disable_deblocking_filter_flag)
1172 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1174 ret = init_get_bits(&gb, pcm, length);
1178 s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->sps->pcm.bit_depth);
1179 s->hevcdsp.put_pcm(dst1, stride1,
1180 cb_size >> s->sps->hshift[1],
1181 cb_size >> s->sps->vshift[1],
1182 &gb, s->sps->pcm.bit_depth_chroma);
1183 s->hevcdsp.put_pcm(dst2, stride2,
1184 cb_size >> s->sps->hshift[2],
1185 cb_size >> s->sps->vshift[2],
1186 &gb, s->sps->pcm.bit_depth_chroma);
1191 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1193 * @param s HEVC decoding context
1194 * @param dst target buffer for block data at block position
1195 * @param dststride stride of the dst buffer
1196 * @param ref reference picture buffer at origin (0, 0)
1197 * @param mv motion vector (relative to block position) to get pixel data from
1198 * @param x_off horizontal position of block from origin (0, 0)
1199 * @param y_off vertical position of block from origin (0, 0)
1200 * @param block_w width of block
1201 * @param block_h height of block
1202 * @param luma_weight weighting factor applied to the luma prediction
1203 * @param luma_offset additive offset applied to the luma prediction value
1206 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1207 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1208 int block_w, int block_h, int luma_weight, int luma_offset)
1210 HEVCLocalContext *lc = s->HEVClc;
1211 uint8_t *src = ref->data[0];
1212 ptrdiff_t srcstride = ref->linesize[0];
1213 int pic_width = s->sps->width;
1214 int pic_height = s->sps->height;
1217 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1218 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1219 int idx = ff_hevc_pel_weight[block_w];
1221 x_off += mv->x >> 2;
1222 y_off += mv->y >> 2;
1223 src += y_off * srcstride + (x_off << s->sps->pixel_shift);
1225 if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1226 x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1227 y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1228 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1229 int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1230 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1232 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1233 edge_emu_stride, srcstride,
1234 block_w + QPEL_EXTRA,
1235 block_h + QPEL_EXTRA,
1236 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1237 pic_width, pic_height);
1238 src = lc->edge_emu_buffer + buf_offset;
1239 srcstride = edge_emu_stride;
1243 s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1244 block_h, mx, my, block_w);
1246 s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1247 block_h, s->sh.luma_log2_weight_denom,
1248 luma_weight, luma_offset, mx, my, block_w);
1252 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1254 * @param s HEVC decoding context
1255 * @param dst target buffer for block data at block position
1256 * @param dststride stride of the dst buffer
1257 * @param ref0 reference picture0 buffer at origin (0, 0)
1258 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1259 * @param x_off horizontal position of block from origin (0, 0)
1260 * @param y_off vertical position of block from origin (0, 0)
1261 * @param block_w width of block
1262 * @param block_h height of block
1263 * @param ref1 reference picture1 buffer at origin (0, 0)
1264 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1265 * @param current_mv current motion vector structure
1267 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1268 AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1269 int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1271 HEVCLocalContext *lc = s->HEVClc;
1272 DECLARE_ALIGNED(16, int16_t, tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1273 ptrdiff_t src0stride = ref0->linesize[0];
1274 ptrdiff_t src1stride = ref1->linesize[0];
1275 int pic_width = s->sps->width;
1276 int pic_height = s->sps->height;
1277 int mx0 = mv0->x & 3;
1278 int my0 = mv0->y & 3;
1279 int mx1 = mv1->x & 3;
1280 int my1 = mv1->y & 3;
1281 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1282 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1283 int x_off0 = x_off + (mv0->x >> 2);
1284 int y_off0 = y_off + (mv0->y >> 2);
1285 int x_off1 = x_off + (mv1->x >> 2);
1286 int y_off1 = y_off + (mv1->y >> 2);
1287 int idx = ff_hevc_pel_weight[block_w];
1289 uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1290 uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1292 if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1293 x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1294 y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1295 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1296 int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1297 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1299 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1300 edge_emu_stride, src0stride,
1301 block_w + QPEL_EXTRA,
1302 block_h + QPEL_EXTRA,
1303 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1304 pic_width, pic_height);
1305 src0 = lc->edge_emu_buffer + buf_offset;
1306 src0stride = edge_emu_stride;
1309 if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1310 x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1311 y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1312 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1313 int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1314 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1316 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1317 edge_emu_stride, src1stride,
1318 block_w + QPEL_EXTRA,
1319 block_h + QPEL_EXTRA,
1320 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1321 pic_width, pic_height);
1322 src1 = lc->edge_emu_buffer2 + buf_offset;
1323 src1stride = edge_emu_stride;
1326 s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
1327 block_h, mx0, my0, block_w);
1329 s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1330 block_h, mx1, my1, block_w);
1332 s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1333 block_h, s->sh.luma_log2_weight_denom,
1334 s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1335 s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1336 s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1337 s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1343 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1345 * @param s HEVC decoding context
1346 * @param dst1 target buffer for block data at block position (U plane)
1347 * @param dst2 target buffer for block data at block position (V plane)
1348 * @param dststride stride of the dst1 and dst2 buffers
1349 * @param ref reference picture buffer at origin (0, 0)
1350 * @param mv motion vector (relative to block position) to get pixel data from
1351 * @param x_off horizontal position of block from origin (0, 0)
1352 * @param y_off vertical position of block from origin (0, 0)
1353 * @param block_w width of block
1354 * @param block_h height of block
1355 * @param chroma_weight weighting factor applied to the chroma prediction
1356 * @param chroma_offset additive offset applied to the chroma prediction value
1359 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1360 ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1361 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1363 HEVCLocalContext *lc = s->HEVClc;
1364 int pic_width = s->sps->width >> s->sps->hshift[1];
1365 int pic_height = s->sps->height >> s->sps->vshift[1];
1366 const Mv *mv = ¤t_mv->mv[reflist];
1367 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1368 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1369 int idx = ff_hevc_pel_weight[block_w];
1370 int hshift = s->sps->hshift[1];
1371 int vshift = s->sps->vshift[1];
1372 intptr_t mx = mv->x & ((1 << (2 + hshift)) - 1);
1373 intptr_t my = mv->y & ((1 << (2 + vshift)) - 1);
1374 intptr_t _mx = mx << (1 - hshift);
1375 intptr_t _my = my << (1 - vshift);
1377 x_off += mv->x >> (2 + hshift);
1378 y_off += mv->y >> (2 + vshift);
1379 src0 += y_off * srcstride + (x_off << s->sps->pixel_shift);
1381 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1382 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1383 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1384 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1385 int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1386 int buf_offset0 = EPEL_EXTRA_BEFORE *
1387 (edge_emu_stride + (1 << s->sps->pixel_shift));
1388 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1389 edge_emu_stride, srcstride,
1390 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1391 x_off - EPEL_EXTRA_BEFORE,
1392 y_off - EPEL_EXTRA_BEFORE,
1393 pic_width, pic_height);
1395 src0 = lc->edge_emu_buffer + buf_offset0;
1396 srcstride = edge_emu_stride;
1399 s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1400 block_h, _mx, _my, block_w);
1402 s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1403 block_h, s->sh.chroma_log2_weight_denom,
1404 chroma_weight, chroma_offset, _mx, _my, block_w);
1408 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1410 * @param s HEVC decoding context
1411 * @param dst target buffer for block data at block position
1412 * @param dststride stride of the dst buffer
1413 * @param ref0 reference picture0 buffer at origin (0, 0)
1414 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1415 * @param x_off horizontal position of block from origin (0, 0)
1416 * @param y_off vertical position of block from origin (0, 0)
1417 * @param block_w width of block
1418 * @param block_h height of block
1419 * @param ref1 reference picture1 buffer at origin (0, 0)
1420 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1421 * @param current_mv current motion vector structure
1422 * @param cidx chroma component(cb, cr)
1424 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1425 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1427 DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1428 int tmpstride = MAX_PB_SIZE;
1429 HEVCLocalContext *lc = s->HEVClc;
1430 uint8_t *src1 = ref0->data[cidx+1];
1431 uint8_t *src2 = ref1->data[cidx+1];
1432 ptrdiff_t src1stride = ref0->linesize[cidx+1];
1433 ptrdiff_t src2stride = ref1->linesize[cidx+1];
1434 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1435 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1436 int pic_width = s->sps->width >> s->sps->hshift[1];
1437 int pic_height = s->sps->height >> s->sps->vshift[1];
1438 Mv *mv0 = ¤t_mv->mv[0];
1439 Mv *mv1 = ¤t_mv->mv[1];
1440 int hshift = s->sps->hshift[1];
1441 int vshift = s->sps->vshift[1];
1443 intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1444 intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1445 intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1446 intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1447 intptr_t _mx0 = mx0 << (1 - hshift);
1448 intptr_t _my0 = my0 << (1 - vshift);
1449 intptr_t _mx1 = mx1 << (1 - hshift);
1450 intptr_t _my1 = my1 << (1 - vshift);
1452 int x_off0 = x_off + (mv0->x >> (2 + hshift));
1453 int y_off0 = y_off + (mv0->y >> (2 + vshift));
1454 int x_off1 = x_off + (mv1->x >> (2 + hshift));
1455 int y_off1 = y_off + (mv1->y >> (2 + vshift));
1456 int idx = ff_hevc_pel_weight[block_w];
1457 src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1458 src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1460 if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1461 x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1462 y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1463 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1464 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1465 int buf_offset1 = EPEL_EXTRA_BEFORE *
1466 (edge_emu_stride + (1 << s->sps->pixel_shift));
1468 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1469 edge_emu_stride, src1stride,
1470 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1471 x_off0 - EPEL_EXTRA_BEFORE,
1472 y_off0 - EPEL_EXTRA_BEFORE,
1473 pic_width, pic_height);
1475 src1 = lc->edge_emu_buffer + buf_offset1;
1476 src1stride = edge_emu_stride;
1479 if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1480 x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1481 y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1482 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1483 int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1484 int buf_offset1 = EPEL_EXTRA_BEFORE *
1485 (edge_emu_stride + (1 << s->sps->pixel_shift));
1487 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1488 edge_emu_stride, src2stride,
1489 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1490 x_off1 - EPEL_EXTRA_BEFORE,
1491 y_off1 - EPEL_EXTRA_BEFORE,
1492 pic_width, pic_height);
1494 src2 = lc->edge_emu_buffer2 + buf_offset1;
1495 src2stride = edge_emu_stride;
1498 s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride,
1499 block_h, _mx0, _my0, block_w);
1501 s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1502 src2, src2stride, tmp, tmpstride,
1503 block_h, _mx1, _my1, block_w);
1505 s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1506 src2, src2stride, tmp, tmpstride,
1508 s->sh.chroma_log2_weight_denom,
1509 s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1510 s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1511 s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1512 s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1513 _mx1, _my1, block_w);
1516 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1517 const Mv *mv, int y0, int height)
1519 int y = (mv->y >> 2) + y0 + height + 9;
1521 if (s->threads_type == FF_THREAD_FRAME )
1522 ff_thread_await_progress(&ref->tf, y, 0);
1525 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1527 int log2_cb_size, int partIdx, int idx)
1529 #define POS(c_idx, x, y) \
1530 &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1531 (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1532 HEVCLocalContext *lc = s->HEVClc;
1534 struct MvField current_mv = {{{ 0 }}};
1536 int min_pu_width = s->sps->min_pu_width;
1538 MvField *tab_mvf = s->ref->tab_mvf;
1539 RefPicList *refPicList = s->ref->refPicList;
1540 HEVCFrame *ref0, *ref1;
1541 uint8_t *dst0 = POS(0, x0, y0);
1542 uint8_t *dst1 = POS(1, x0, y0);
1543 uint8_t *dst2 = POS(2, x0, y0);
1544 int log2_min_cb_size = s->sps->log2_min_cb_size;
1545 int min_cb_width = s->sps->min_cb_width;
1546 int x_cb = x0 >> log2_min_cb_size;
1547 int y_cb = y0 >> log2_min_cb_size;
1553 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1554 if (s->sh.max_num_merge_cand > 1)
1555 merge_idx = ff_hevc_merge_idx_decode(s);
1559 ff_hevc_luma_mv_merge_mode(s, x0, y0,
1562 log2_cb_size, partIdx,
1563 merge_idx, ¤t_mv);
1564 x_pu = x0 >> s->sps->log2_min_pu_size;
1565 y_pu = y0 >> s->sps->log2_min_pu_size;
1567 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1568 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1569 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1570 } else { /* MODE_INTER */
1571 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1572 if (lc->pu.merge_flag) {
1573 if (s->sh.max_num_merge_cand > 1)
1574 merge_idx = ff_hevc_merge_idx_decode(s);
1578 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1579 partIdx, merge_idx, ¤t_mv);
1580 x_pu = x0 >> s->sps->log2_min_pu_size;
1581 y_pu = y0 >> s->sps->log2_min_pu_size;
1583 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1584 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1585 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1587 enum InterPredIdc inter_pred_idc = PRED_L0;
1588 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1589 current_mv.pred_flag = 0;
1590 if (s->sh.slice_type == B_SLICE)
1591 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1593 if (inter_pred_idc != PRED_L1) {
1594 if (s->sh.nb_refs[L0]) {
1595 ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1596 current_mv.ref_idx[0] = ref_idx[0];
1598 current_mv.pred_flag = PF_L0;
1599 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1600 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1601 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1602 partIdx, merge_idx, ¤t_mv,
1604 current_mv.mv[0].x += lc->pu.mvd.x;
1605 current_mv.mv[0].y += lc->pu.mvd.y;
1608 if (inter_pred_idc != PRED_L0) {
1609 if (s->sh.nb_refs[L1]) {
1610 ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1611 current_mv.ref_idx[1] = ref_idx[1];
1614 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1618 ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1621 current_mv.pred_flag += PF_L1;
1622 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1623 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1624 partIdx, merge_idx, ¤t_mv,
1626 current_mv.mv[1].x += lc->pu.mvd.x;
1627 current_mv.mv[1].y += lc->pu.mvd.y;
1630 x_pu = x0 >> s->sps->log2_min_pu_size;
1631 y_pu = y0 >> s->sps->log2_min_pu_size;
1633 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1634 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1635 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1639 if (current_mv.pred_flag & PF_L0) {
1640 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1643 hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH);
1645 if (current_mv.pred_flag & PF_L1) {
1646 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1649 hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH);
1652 if (current_mv.pred_flag == PF_L0) {
1653 int x0_c = x0 >> s->sps->hshift[1];
1654 int y0_c = y0 >> s->sps->vshift[1];
1655 int nPbW_c = nPbW >> s->sps->hshift[1];
1656 int nPbH_c = nPbH >> s->sps->vshift[1];
1658 luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1659 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1660 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1661 s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1663 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1664 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1665 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1666 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1667 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1668 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1669 } else if (current_mv.pred_flag == PF_L1) {
1670 int x0_c = x0 >> s->sps->hshift[1];
1671 int y0_c = y0 >> s->sps->vshift[1];
1672 int nPbW_c = nPbW >> s->sps->hshift[1];
1673 int nPbH_c = nPbH >> s->sps->vshift[1];
1675 luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1676 ¤t_mv.mv[1], x0, y0, nPbW, nPbH,
1677 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1678 s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1680 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1681 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1682 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1684 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1685 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1686 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1687 } else if (current_mv.pred_flag == PF_BI) {
1688 int x0_c = x0 >> s->sps->hshift[1];
1689 int y0_c = y0 >> s->sps->vshift[1];
1690 int nPbW_c = nPbW >> s->sps->hshift[1];
1691 int nPbH_c = nPbH >> s->sps->vshift[1];
1693 luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1694 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1695 ref1->frame, ¤t_mv.mv[1], ¤t_mv);
1697 chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1698 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0);
1700 chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1701 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1);
1708 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1709 int prev_intra_luma_pred_flag)
1711 HEVCLocalContext *lc = s->HEVClc;
1712 int x_pu = x0 >> s->sps->log2_min_pu_size;
1713 int y_pu = y0 >> s->sps->log2_min_pu_size;
1714 int min_pu_width = s->sps->min_pu_width;
1715 int size_in_pus = pu_size >> s->sps->log2_min_pu_size;
1716 int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1717 int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1719 int cand_up = (lc->ctb_up_flag || y0b) ?
1720 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1721 int cand_left = (lc->ctb_left_flag || x0b) ?
1722 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1724 int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1726 MvField *tab_mvf = s->ref->tab_mvf;
1727 int intra_pred_mode;
1731 // intra_pred_mode prediction does not cross vertical CTB boundaries
1732 if ((y0 - 1) < y_ctb)
1735 if (cand_left == cand_up) {
1736 if (cand_left < 2) {
1737 candidate[0] = INTRA_PLANAR;
1738 candidate[1] = INTRA_DC;
1739 candidate[2] = INTRA_ANGULAR_26;
1741 candidate[0] = cand_left;
1742 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1743 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1746 candidate[0] = cand_left;
1747 candidate[1] = cand_up;
1748 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1749 candidate[2] = INTRA_PLANAR;
1750 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1751 candidate[2] = INTRA_DC;
1753 candidate[2] = INTRA_ANGULAR_26;
1757 if (prev_intra_luma_pred_flag) {
1758 intra_pred_mode = candidate[lc->pu.mpm_idx];
1760 if (candidate[0] > candidate[1])
1761 FFSWAP(uint8_t, candidate[0], candidate[1]);
1762 if (candidate[0] > candidate[2])
1763 FFSWAP(uint8_t, candidate[0], candidate[2]);
1764 if (candidate[1] > candidate[2])
1765 FFSWAP(uint8_t, candidate[1], candidate[2]);
1767 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1768 for (i = 0; i < 3; i++)
1769 if (intra_pred_mode >= candidate[i])
1773 /* write the intra prediction units into the mv array */
1776 for (i = 0; i < size_in_pus; i++) {
1777 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1778 intra_pred_mode, size_in_pus);
1780 for (j = 0; j < size_in_pus; j++) {
1781 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1785 return intra_pred_mode;
1788 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1789 int log2_cb_size, int ct_depth)
1791 int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1792 int x_cb = x0 >> s->sps->log2_min_cb_size;
1793 int y_cb = y0 >> s->sps->log2_min_cb_size;
1796 for (y = 0; y < length; y++)
1797 memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1801 static const uint8_t tab_mode_idx[] = {
1802 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
1803 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1805 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1808 HEVCLocalContext *lc = s->HEVClc;
1809 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1810 uint8_t prev_intra_luma_pred_flag[4];
1811 int split = lc->cu.part_mode == PART_NxN;
1812 int pb_size = (1 << log2_cb_size) >> split;
1813 int side = split + 1;
1817 for (i = 0; i < side; i++)
1818 for (j = 0; j < side; j++)
1819 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1821 for (i = 0; i < side; i++) {
1822 for (j = 0; j < side; j++) {
1823 if (prev_intra_luma_pred_flag[2 * i + j])
1824 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1826 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1828 lc->pu.intra_pred_mode[2 * i + j] =
1829 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1830 prev_intra_luma_pred_flag[2 * i + j]);
1834 if (s->sps->chroma_format_idc == 3) {
1835 for (i = 0; i < side; i++) {
1836 for (j = 0; j < side; j++) {
1837 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1838 if (chroma_mode != 4) {
1839 if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1840 lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1842 lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1844 lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1848 } else if (s->sps->chroma_format_idc == 2) {
1850 lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1851 if (chroma_mode != 4) {
1852 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1855 mode_idx = intra_chroma_table[chroma_mode];
1857 mode_idx = lc->pu.intra_pred_mode[0];
1859 lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1860 } else if (s->sps->chroma_format_idc != 0) {
1861 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1862 if (chroma_mode != 4) {
1863 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1864 lc->pu.intra_pred_mode_c[0] = 34;
1866 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1868 lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1873 static void intra_prediction_unit_default_value(HEVCContext *s,
1877 HEVCLocalContext *lc = s->HEVClc;
1878 int pb_size = 1 << log2_cb_size;
1879 int size_in_pus = pb_size >> s->sps->log2_min_pu_size;
1880 int min_pu_width = s->sps->min_pu_width;
1881 MvField *tab_mvf = s->ref->tab_mvf;
1882 int x_pu = x0 >> s->sps->log2_min_pu_size;
1883 int y_pu = y0 >> s->sps->log2_min_pu_size;
1886 if (size_in_pus == 0)
1888 for (j = 0; j < size_in_pus; j++)
1889 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1890 if (lc->cu.pred_mode == MODE_INTRA)
1891 for (j = 0; j < size_in_pus; j++)
1892 for (k = 0; k < size_in_pus; k++)
1893 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1896 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1898 int cb_size = 1 << log2_cb_size;
1899 HEVCLocalContext *lc = s->HEVClc;
1900 int log2_min_cb_size = s->sps->log2_min_cb_size;
1901 int length = cb_size >> log2_min_cb_size;
1902 int min_cb_width = s->sps->min_cb_width;
1903 int x_cb = x0 >> log2_min_cb_size;
1904 int y_cb = y0 >> log2_min_cb_size;
1905 int idx = log2_cb_size - 2;
1906 int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1911 lc->cu.rqt_root_cbf = 1;
1912 lc->cu.pred_mode = MODE_INTRA;
1913 lc->cu.part_mode = PART_2Nx2N;
1914 lc->cu.intra_split_flag = 0;
1915 lc->cu.pcm_flag = 0;
1917 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
1918 for (x = 0; x < 4; x++)
1919 lc->pu.intra_pred_mode[x] = 1;
1920 if (s->pps->transquant_bypass_enable_flag) {
1921 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
1922 if (lc->cu.cu_transquant_bypass_flag)
1923 set_deblocking_bypass(s, x0, y0, log2_cb_size);
1925 lc->cu.cu_transquant_bypass_flag = 0;
1927 if (s->sh.slice_type != I_SLICE) {
1928 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
1930 x = y_cb * min_cb_width + x_cb;
1931 for (y = 0; y < length; y++) {
1932 memset(&s->skip_flag[x], skip_flag, length);
1935 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
1938 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1939 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
1940 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1942 if (!s->sh.disable_deblocking_filter_flag)
1943 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1945 if (s->sh.slice_type != I_SLICE)
1946 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
1947 if (lc->cu.pred_mode != MODE_INTRA ||
1948 log2_cb_size == s->sps->log2_min_cb_size) {
1949 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
1950 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
1951 lc->cu.pred_mode == MODE_INTRA;
1954 if (lc->cu.pred_mode == MODE_INTRA) {
1955 if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
1956 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
1957 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
1958 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
1960 if (lc->cu.pcm_flag) {
1961 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1962 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
1963 if (s->sps->pcm.loop_filter_disable_flag)
1964 set_deblocking_bypass(s, x0, y0, log2_cb_size);
1969 intra_prediction_unit(s, x0, y0, log2_cb_size);
1972 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1973 switch (lc->cu.part_mode) {
1975 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
1978 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
1979 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
1982 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
1983 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
1986 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
1987 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
1990 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
1991 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
1994 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
1995 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
1998 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
1999 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
2002 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2003 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2004 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2005 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2010 if (!lc->cu.pcm_flag) {
2011 if (lc->cu.pred_mode != MODE_INTRA &&
2012 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2013 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2015 if (lc->cu.rqt_root_cbf) {
2016 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2017 s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2018 s->sps->max_transform_hierarchy_depth_inter;
2019 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2021 log2_cb_size, 0, 0);
2025 if (!s->sh.disable_deblocking_filter_flag)
2026 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2031 if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2032 ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2034 x = y_cb * min_cb_width + x_cb;
2035 for (y = 0; y < length; y++) {
2036 memset(&s->qp_y_tab[x], lc->qp_y, length);
2040 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2041 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2042 lc->qPy_pred = lc->qp_y;
2045 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2050 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2051 int log2_cb_size, int cb_depth)
2053 HEVCLocalContext *lc = s->HEVClc;
2054 const int cb_size = 1 << log2_cb_size;
2056 int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2059 lc->ct.depth = cb_depth;
2060 if (x0 + cb_size <= s->sps->width &&
2061 y0 + cb_size <= s->sps->height &&
2062 log2_cb_size > s->sps->log2_min_cb_size) {
2063 split_cu_flag = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2065 split_cu_flag = (log2_cb_size > s->sps->log2_min_cb_size);
2067 if (s->pps->cu_qp_delta_enabled_flag &&
2068 log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2069 lc->tu.is_cu_qp_delta_coded = 0;
2070 lc->tu.cu_qp_delta = 0;
2073 if (split_cu_flag) {
2074 const int cb_size_split = cb_size >> 1;
2075 const int x1 = x0 + cb_size_split;
2076 const int y1 = y0 + cb_size_split;
2080 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2084 if (more_data && x1 < s->sps->width) {
2085 more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2089 if (more_data && y1 < s->sps->height) {
2090 more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2094 if (more_data && x1 < s->sps->width &&
2095 y1 < s->sps->height) {
2096 more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2101 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2102 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2103 lc->qPy_pred = lc->qp_y;
2106 return ((x1 + cb_size_split) < s->sps->width ||
2107 (y1 + cb_size_split) < s->sps->height);
2111 ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2114 if ((!((x0 + cb_size) %
2115 (1 << (s->sps->log2_ctb_size))) ||
2116 (x0 + cb_size >= s->sps->width)) &&
2118 (1 << (s->sps->log2_ctb_size))) ||
2119 (y0 + cb_size >= s->sps->height))) {
2120 int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2121 return !end_of_slice_flag;
2130 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2133 HEVCLocalContext *lc = s->HEVClc;
2134 int ctb_size = 1 << s->sps->log2_ctb_size;
2135 int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2136 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2138 int tile_left_boundary, tile_up_boundary;
2139 int slice_left_boundary, slice_up_boundary;
2141 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2143 if (s->pps->entropy_coding_sync_enabled_flag) {
2144 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2145 lc->first_qp_group = 1;
2146 lc->end_of_tiles_x = s->sps->width;
2147 } else if (s->pps->tiles_enabled_flag) {
2148 if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2149 int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2150 lc->end_of_tiles_x = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2151 lc->first_qp_group = 1;
2154 lc->end_of_tiles_x = s->sps->width;
2157 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2159 if (s->pps->tiles_enabled_flag) {
2160 tile_left_boundary = x_ctb > 0 &&
2161 s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
2162 slice_left_boundary = x_ctb > 0 &&
2163 s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1];
2164 tile_up_boundary = y_ctb > 0 &&
2165 s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2166 slice_up_boundary = y_ctb > 0 &&
2167 s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2169 tile_left_boundary =
2170 tile_up_boundary = 0;
2171 slice_left_boundary = ctb_addr_in_slice <= 0;
2172 slice_up_boundary = ctb_addr_in_slice < s->sps->ctb_width;
2174 lc->slice_or_tiles_left_boundary = slice_left_boundary + (tile_left_boundary << 1);
2175 lc->slice_or_tiles_up_boundary = slice_up_boundary + (tile_up_boundary << 1);
2176 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !tile_left_boundary);
2177 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !tile_up_boundary);
2178 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2179 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2182 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2184 HEVCContext *s = avctxt->priv_data;
2185 int ctb_size = 1 << s->sps->log2_ctb_size;
2189 int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2191 if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2192 av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2193 return AVERROR_INVALIDDATA;
2196 if (s->sh.dependent_slice_segment_flag) {
2197 int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2198 if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2199 av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2200 return AVERROR_INVALIDDATA;
2204 while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2205 int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2207 x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2208 y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2209 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2211 ff_hevc_cabac_init(s, ctb_addr_ts);
2213 hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2215 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2216 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2217 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2219 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2220 if (more_data < 0) {
2221 s->tab_slice_address[ctb_addr_rs] = -1;
2227 ff_hevc_save_states(s, ctb_addr_ts);
2228 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2231 if (x_ctb + ctb_size >= s->sps->width &&
2232 y_ctb + ctb_size >= s->sps->height)
2233 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2238 static int hls_slice_data(HEVCContext *s)
2246 s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2249 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2251 HEVCContext *s1 = avctxt->priv_data, *s;
2252 HEVCLocalContext *lc;
2253 int ctb_size = 1<< s1->sps->log2_ctb_size;
2255 int *ctb_row_p = input_ctb_row;
2256 int ctb_row = ctb_row_p[job];
2257 int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2258 int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2259 int thread = ctb_row % s1->threads_number;
2262 s = s1->sList[self_id];
2266 ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2270 ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2273 while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2274 int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2275 int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2277 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2279 ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2281 if (avpriv_atomic_int_get(&s1->wpp_err)){
2282 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2286 ff_hevc_cabac_init(s, ctb_addr_ts);
2287 hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2288 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2290 if (more_data < 0) {
2291 s->tab_slice_address[ctb_addr_rs] = -1;
2297 ff_hevc_save_states(s, ctb_addr_ts);
2298 ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2299 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2301 if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2302 avpriv_atomic_int_set(&s1->wpp_err, 1);
2303 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2307 if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2308 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2309 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2312 ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2315 if(x_ctb >= s->sps->width) {
2319 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2324 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2326 HEVCLocalContext *lc = s->HEVClc;
2327 int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2328 int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2330 int startheader, cmpt = 0;
2335 ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2338 for (i = 1; i < s->threads_number; i++) {
2339 s->sList[i] = av_malloc(sizeof(HEVCContext));
2340 memcpy(s->sList[i], s, sizeof(HEVCContext));
2341 s->HEVClcList[i] = av_malloc(sizeof(HEVCLocalContext));
2342 s->sList[i]->HEVClc = s->HEVClcList[i];
2346 offset = (lc->gb.index >> 3);
2348 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2349 if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2355 for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2356 offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2357 for (j = 0, cmpt = 0, startheader = offset
2358 + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2359 if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2364 s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2365 s->sh.offset[i - 1] = offset;
2368 if (s->sh.num_entry_point_offsets != 0) {
2369 offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2370 s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2371 s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2376 for (i = 1; i < s->threads_number; i++) {
2377 s->sList[i]->HEVClc->first_qp_group = 1;
2378 s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2379 memcpy(s->sList[i], s, sizeof(HEVCContext));
2380 s->sList[i]->HEVClc = s->HEVClcList[i];
2383 avpriv_atomic_int_set(&s->wpp_err, 0);
2384 ff_reset_entries(s->avctx);
2386 for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2391 if (s->pps->entropy_coding_sync_enabled_flag)
2392 s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2394 for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2402 * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2403 * 0 if the unit should be skipped, 1 otherwise
2405 static int hls_nal_unit(HEVCContext *s)
2407 GetBitContext *gb = &s->HEVClc->gb;
2410 if (get_bits1(gb) != 0)
2411 return AVERROR_INVALIDDATA;
2413 s->nal_unit_type = get_bits(gb, 6);
2415 nuh_layer_id = get_bits(gb, 6);
2416 s->temporal_id = get_bits(gb, 3) - 1;
2417 if (s->temporal_id < 0)
2418 return AVERROR_INVALIDDATA;
2420 av_log(s->avctx, AV_LOG_DEBUG,
2421 "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2422 s->nal_unit_type, nuh_layer_id, s->temporal_id);
2424 return nuh_layer_id == 0;
2427 static int set_side_data(HEVCContext *s)
2429 AVFrame *out = s->ref->frame;
2431 if (s->sei_frame_packing_present &&
2432 s->frame_packing_arrangement_type >= 3 &&
2433 s->frame_packing_arrangement_type <= 5 &&
2434 s->content_interpretation_type > 0 &&
2435 s->content_interpretation_type < 3) {
2436 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2438 return AVERROR(ENOMEM);
2440 switch (s->frame_packing_arrangement_type) {
2442 if (s->quincunx_subsampling)
2443 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2445 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2448 stereo->type = AV_STEREO3D_TOPBOTTOM;
2451 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2455 if (s->content_interpretation_type == 2)
2456 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2459 if (s->sei_display_orientation_present &&
2460 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2461 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2462 AVFrameSideData *rotation = av_frame_new_side_data(out,
2463 AV_FRAME_DATA_DISPLAYMATRIX,
2464 sizeof(int32_t) * 9);
2466 return AVERROR(ENOMEM);
2468 av_display_rotation_set((int32_t *)rotation->data, angle);
2469 av_display_matrix_flip((int32_t *)rotation->data,
2470 s->sei_vflip, s->sei_hflip);
2476 static int hevc_frame_start(HEVCContext *s)
2478 HEVCLocalContext *lc = s->HEVClc;
2479 int pic_size_in_ctb = ((s->sps->width >> s->sps->log2_min_cb_size) + 1) *
2480 ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2484 memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2485 memset(s->vertical_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2486 memset(s->cbf_luma, 0, s->sps->min_tb_width * s->sps->min_tb_height);
2487 memset(s->is_pcm, 0, s->sps->min_pu_width * s->sps->min_pu_height);
2488 memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2491 s->first_nal_type = s->nal_unit_type;
2493 if (s->pps->tiles_enabled_flag)
2494 lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2496 ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2501 ret = ff_hevc_frame_rps(s);
2503 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2507 s->ref->frame->key_frame = IS_IRAP(s);
2509 ret = set_side_data(s);
2513 cur_frame = s->sps->sao_enabled ? s->sao_frame : s->frame;
2514 cur_frame->pict_type = 3 - s->sh.slice_type;
2516 av_frame_unref(s->output_frame);
2517 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2521 ff_thread_finish_setup(s->avctx);
2526 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2527 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2532 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2534 HEVCLocalContext *lc = s->HEVClc;
2535 GetBitContext *gb = &lc->gb;
2536 int ctb_addr_ts, ret;
2538 ret = init_get_bits8(gb, nal, length);
2542 ret = hls_nal_unit(s);
2544 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2550 switch (s->nal_unit_type) {
2552 ret = ff_hevc_decode_nal_vps(s);
2557 ret = ff_hevc_decode_nal_sps(s);
2562 ret = ff_hevc_decode_nal_pps(s);
2566 case NAL_SEI_PREFIX:
2567 case NAL_SEI_SUFFIX:
2568 ret = ff_hevc_decode_nal_sei(s);
2579 case NAL_BLA_W_RADL:
2581 case NAL_IDR_W_RADL:
2588 ret = hls_slice_header(s);
2592 if (s->max_ra == INT_MAX) {
2593 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2597 s->max_ra = INT_MIN;
2601 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2602 s->poc <= s->max_ra) {
2606 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2607 s->max_ra = INT_MIN;
2610 if (s->sh.first_slice_in_pic_flag) {
2611 ret = hevc_frame_start(s);
2614 } else if (!s->ref) {
2615 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2619 if (s->nal_unit_type != s->first_nal_type) {
2620 av_log(s->avctx, AV_LOG_ERROR,
2621 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2622 s->first_nal_type, s->nal_unit_type);
2623 return AVERROR_INVALIDDATA;
2626 if (!s->sh.dependent_slice_segment_flag &&
2627 s->sh.slice_type != I_SLICE) {
2628 ret = ff_hevc_slice_rpl(s);
2630 av_log(s->avctx, AV_LOG_WARNING,
2631 "Error constructing the reference lists for the current slice.\n");
2636 if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2637 ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2639 ctb_addr_ts = hls_slice_data(s);
2640 if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2644 if (ctb_addr_ts < 0) {
2651 s->seq_decode = (s->seq_decode + 1) & 0xff;
2652 s->max_ra = INT_MAX;
2658 av_log(s->avctx, AV_LOG_INFO,
2659 "Skipping NAL unit %d\n", s->nal_unit_type);
2664 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2669 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2670 * between these functions would be nice. */
2671 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2677 s->skipped_bytes = 0;
2678 #define STARTCODE_TEST \
2679 if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
2680 if (src[i + 2] != 3) { \
2681 /* startcode, so we must be past the end */ \
2686 #if HAVE_FAST_UNALIGNED
2687 #define FIND_FIRST_ZERO \
2688 if (i > 0 && !src[i]) \
2693 for (i = 0; i + 1 < length; i += 9) {
2694 if (!((~AV_RN64A(src + i) &
2695 (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2696 0x8000800080008080ULL))
2703 for (i = 0; i + 1 < length; i += 5) {
2704 if (!((~AV_RN32A(src + i) &
2705 (AV_RN32A(src + i) - 0x01000101U)) &
2712 #endif /* HAVE_FAST_64BIT */
2714 for (i = 0; i + 1 < length; i += 2) {
2717 if (i > 0 && src[i - 1] == 0)
2721 #endif /* HAVE_FAST_UNALIGNED */
2723 if (i >= length - 1) { // no escaped 0
2729 av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2730 length + FF_INPUT_BUFFER_PADDING_SIZE);
2731 if (!nal->rbsp_buffer)
2732 return AVERROR(ENOMEM);
2734 dst = nal->rbsp_buffer;
2736 memcpy(dst, src, i);
2738 while (si + 2 < length) {
2739 // remove escapes (very rare 1:2^22)
2740 if (src[si + 2] > 3) {
2741 dst[di++] = src[si++];
2742 dst[di++] = src[si++];
2743 } else if (src[si] == 0 && src[si + 1] == 0) {
2744 if (src[si + 2] == 3) { // escape
2750 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2751 s->skipped_bytes_pos_size *= 2;
2752 av_reallocp_array(&s->skipped_bytes_pos,
2753 s->skipped_bytes_pos_size,
2754 sizeof(*s->skipped_bytes_pos));
2755 if (!s->skipped_bytes_pos)
2756 return AVERROR(ENOMEM);
2758 if (s->skipped_bytes_pos)
2759 s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2761 } else // next start code
2765 dst[di++] = src[si++];
2768 dst[di++] = src[si++];
2771 memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2778 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2780 int i, consumed, ret = 0;
2783 s->last_eos = s->eos;
2786 /* split the input packet into NAL units, so we know the upper bound on the
2787 * number of slices in the frame */
2789 while (length >= 4) {
2791 int extract_length = 0;
2795 for (i = 0; i < s->nal_length_size; i++)
2796 extract_length = (extract_length << 8) | buf[i];
2797 buf += s->nal_length_size;
2798 length -= s->nal_length_size;
2800 if (extract_length > length) {
2801 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2802 ret = AVERROR_INVALIDDATA;
2806 /* search start code */
2807 while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2811 av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2812 ret = AVERROR_INVALIDDATA;
2822 extract_length = length;
2824 if (s->nals_allocated < s->nb_nals + 1) {
2825 int new_size = s->nals_allocated + 1;
2826 HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2828 ret = AVERROR(ENOMEM);
2832 memset(s->nals + s->nals_allocated, 0,
2833 (new_size - s->nals_allocated) * sizeof(*tmp));
2834 av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2835 av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2836 av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2837 s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2838 s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2839 s->nals_allocated = new_size;
2841 s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2842 s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2843 nal = &s->nals[s->nb_nals];
2845 consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2847 s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2848 s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2849 s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2857 ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2862 if (s->nal_unit_type == NAL_EOB_NUT ||
2863 s->nal_unit_type == NAL_EOS_NUT)
2870 /* parse the NAL units */
2871 for (i = 0; i < s->nb_nals; i++) {
2873 s->skipped_bytes = s->skipped_bytes_nal[i];
2874 s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2876 ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2878 av_log(s->avctx, AV_LOG_WARNING,
2879 "Error parsing NAL unit #%d.\n", i);
2885 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2886 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2891 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2894 for (i = 0; i < 16; i++)
2895 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2898 static int verify_md5(HEVCContext *s, AVFrame *frame)
2900 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2905 return AVERROR(EINVAL);
2907 pixel_shift = desc->comp[0].depth_minus1 > 7;
2909 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2912 /* the checksums are LE, so we have to byteswap for >8bpp formats
2915 if (pixel_shift && !s->checksum_buf) {
2916 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2917 FFMAX3(frame->linesize[0], frame->linesize[1],
2918 frame->linesize[2]));
2919 if (!s->checksum_buf)
2920 return AVERROR(ENOMEM);
2924 for (i = 0; frame->data[i]; i++) {
2925 int width = s->avctx->coded_width;
2926 int height = s->avctx->coded_height;
2927 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2928 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2931 av_md5_init(s->md5_ctx);
2932 for (j = 0; j < h; j++) {
2933 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2936 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2937 (const uint16_t *) src, w);
2938 src = s->checksum_buf;
2941 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2943 av_md5_final(s->md5_ctx, md5);
2945 if (!memcmp(md5, s->md5[i], 16)) {
2946 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2947 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2948 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2950 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2951 print_md5(s->avctx, AV_LOG_ERROR, md5);
2952 av_log (s->avctx, AV_LOG_ERROR, " != ");
2953 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2954 av_log (s->avctx, AV_LOG_ERROR, "\n");
2955 return AVERROR_INVALIDDATA;
2959 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2964 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2968 HEVCContext *s = avctx->priv_data;
2971 ret = ff_hevc_output_frame(s, data, 1);
2980 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2984 /* verify the SEI checksum */
2985 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2987 ret = verify_md5(s, s->ref->frame);
2988 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2989 ff_hevc_unref_frame(s, s->ref, ~0);
2995 if (s->is_decoded) {
2996 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3000 if (s->output_frame->buf[0]) {
3001 av_frame_move_ref(data, s->output_frame);
3008 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3012 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3016 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3017 if (!dst->tab_mvf_buf)
3019 dst->tab_mvf = src->tab_mvf;
3021 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3022 if (!dst->rpl_tab_buf)
3024 dst->rpl_tab = src->rpl_tab;
3026 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3030 dst->poc = src->poc;
3031 dst->ctb_count = src->ctb_count;
3032 dst->window = src->window;
3033 dst->flags = src->flags;
3034 dst->sequence = src->sequence;
3038 ff_hevc_unref_frame(s, dst, ~0);
3039 return AVERROR(ENOMEM);
3042 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3044 HEVCContext *s = avctx->priv_data;
3045 HEVCLocalContext *lc = s->HEVClc;
3050 av_freep(&s->md5_ctx);
3052 for(i=0; i < s->nals_allocated; i++) {
3053 av_freep(&s->skipped_bytes_pos_nal[i]);
3055 av_freep(&s->skipped_bytes_pos_size_nal);
3056 av_freep(&s->skipped_bytes_nal);
3057 av_freep(&s->skipped_bytes_pos_nal);
3059 av_freep(&s->cabac_state);
3061 av_frame_free(&s->tmp_frame);
3062 av_frame_free(&s->output_frame);
3064 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3065 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3066 av_frame_free(&s->DPB[i].frame);
3069 for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3070 av_buffer_unref(&s->vps_list[i]);
3071 for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3072 av_buffer_unref(&s->sps_list[i]);
3073 for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3074 av_buffer_unref(&s->pps_list[i]);
3079 av_buffer_unref(&s->current_sps);
3081 av_freep(&s->sh.entry_point_offset);
3082 av_freep(&s->sh.offset);
3083 av_freep(&s->sh.size);
3085 for (i = 1; i < s->threads_number; i++) {
3086 lc = s->HEVClcList[i];
3088 av_freep(&s->HEVClcList[i]);
3089 av_freep(&s->sList[i]);
3092 if (s->HEVClc == s->HEVClcList[0])
3094 av_freep(&s->HEVClcList[0]);
3096 for (i = 0; i < s->nals_allocated; i++)
3097 av_freep(&s->nals[i].rbsp_buffer);
3099 s->nals_allocated = 0;
3104 static av_cold int hevc_init_context(AVCodecContext *avctx)
3106 HEVCContext *s = avctx->priv_data;
3111 s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3114 s->HEVClcList[0] = s->HEVClc;
3117 s->cabac_state = av_malloc(HEVC_CONTEXTS);
3118 if (!s->cabac_state)
3121 s->tmp_frame = av_frame_alloc();
3125 s->output_frame = av_frame_alloc();
3126 if (!s->output_frame)
3129 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3130 s->DPB[i].frame = av_frame_alloc();
3131 if (!s->DPB[i].frame)
3133 s->DPB[i].tf.f = s->DPB[i].frame;
3136 s->max_ra = INT_MAX;
3138 s->md5_ctx = av_md5_alloc();
3142 ff_bswapdsp_init(&s->bdsp);
3144 s->context_initialized = 1;
3150 hevc_decode_free(avctx);
3151 return AVERROR(ENOMEM);
3154 static int hevc_update_thread_context(AVCodecContext *dst,
3155 const AVCodecContext *src)
3157 HEVCContext *s = dst->priv_data;
3158 HEVCContext *s0 = src->priv_data;
3161 if (!s->context_initialized) {
3162 ret = hevc_init_context(dst);
3167 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3168 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3169 if (s0->DPB[i].frame->buf[0]) {
3170 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3176 if (s->sps != s0->sps)
3178 for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3179 av_buffer_unref(&s->vps_list[i]);
3180 if (s0->vps_list[i]) {
3181 s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3182 if (!s->vps_list[i])
3183 return AVERROR(ENOMEM);
3187 for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3188 av_buffer_unref(&s->sps_list[i]);
3189 if (s0->sps_list[i]) {
3190 s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3191 if (!s->sps_list[i])
3192 return AVERROR(ENOMEM);
3196 for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3197 av_buffer_unref(&s->pps_list[i]);
3198 if (s0->pps_list[i]) {
3199 s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3200 if (!s->pps_list[i])
3201 return AVERROR(ENOMEM);
3205 if (s->current_sps && s->sps == (HEVCSPS*)s->current_sps->data)
3207 av_buffer_unref(&s->current_sps);
3209 if (s->sps != s0->sps)
3210 ret = set_sps(s, s0->sps);
3212 s->seq_decode = s0->seq_decode;
3213 s->seq_output = s0->seq_output;
3214 s->pocTid0 = s0->pocTid0;
3215 s->max_ra = s0->max_ra;
3218 s->is_nalff = s0->is_nalff;
3219 s->nal_length_size = s0->nal_length_size;
3221 s->threads_number = s0->threads_number;
3222 s->threads_type = s0->threads_type;
3225 s->seq_decode = (s->seq_decode + 1) & 0xff;
3226 s->max_ra = INT_MAX;
3232 static int hevc_decode_extradata(HEVCContext *s)
3234 AVCodecContext *avctx = s->avctx;
3238 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3240 if (avctx->extradata_size > 3 &&
3241 (avctx->extradata[0] || avctx->extradata[1] ||
3242 avctx->extradata[2] > 1)) {
3243 /* It seems the extradata is encoded as hvcC format.
3244 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3245 * is finalized. When finalized, configurationVersion will be 1 and we
3246 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3247 int i, j, num_arrays, nal_len_size;
3251 bytestream2_skip(&gb, 21);
3252 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3253 num_arrays = bytestream2_get_byte(&gb);
3255 /* nal units in the hvcC always have length coded with 2 bytes,
3256 * so put a fake nal_length_size = 2 while parsing them */
3257 s->nal_length_size = 2;
3259 /* Decode nal units from hvcC. */
3260 for (i = 0; i < num_arrays; i++) {
3261 int type = bytestream2_get_byte(&gb) & 0x3f;
3262 int cnt = bytestream2_get_be16(&gb);
3264 for (j = 0; j < cnt; j++) {
3265 // +2 for the nal size field
3266 int nalsize = bytestream2_peek_be16(&gb) + 2;
3267 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3268 av_log(s->avctx, AV_LOG_ERROR,
3269 "Invalid NAL unit size in extradata.\n");
3270 return AVERROR_INVALIDDATA;
3273 ret = decode_nal_units(s, gb.buffer, nalsize);
3275 av_log(avctx, AV_LOG_ERROR,
3276 "Decoding nal unit %d %d from hvcC failed\n",
3280 bytestream2_skip(&gb, nalsize);
3284 /* Now store right nal length size, that will be used to parse
3286 s->nal_length_size = nal_len_size;
3289 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3296 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3298 HEVCContext *s = avctx->priv_data;
3301 ff_init_cabac_states();
3303 avctx->internal->allocate_progress = 1;
3305 ret = hevc_init_context(avctx);
3309 s->enable_parallel_tiles = 0;
3310 s->picture_struct = 0;
3312 if(avctx->active_thread_type & FF_THREAD_SLICE)
3313 s->threads_number = avctx->thread_count;
3315 s->threads_number = 1;
3317 if (avctx->extradata_size > 0 && avctx->extradata) {
3318 ret = hevc_decode_extradata(s);
3320 hevc_decode_free(avctx);
3325 if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3326 s->threads_type = FF_THREAD_FRAME;
3328 s->threads_type = FF_THREAD_SLICE;
3333 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3335 HEVCContext *s = avctx->priv_data;
3338 memset(s, 0, sizeof(*s));
3340 ret = hevc_init_context(avctx);
3347 static void hevc_decode_flush(AVCodecContext *avctx)
3349 HEVCContext *s = avctx->priv_data;
3350 ff_hevc_flush_dpb(s);
3351 s->max_ra = INT_MAX;
3354 #define OFFSET(x) offsetof(HEVCContext, x)
3355 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3357 static const AVProfile profiles[] = {
3358 { FF_PROFILE_HEVC_MAIN, "Main" },
3359 { FF_PROFILE_HEVC_MAIN_10, "Main 10" },
3360 { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" },
3361 { FF_PROFILE_HEVC_REXT, "Rext" },
3362 { FF_PROFILE_UNKNOWN },
3365 static const AVOption options[] = {
3366 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3367 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3368 { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3369 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3373 static const AVClass hevc_decoder_class = {
3374 .class_name = "HEVC decoder",
3375 .item_name = av_default_item_name,
3377 .version = LIBAVUTIL_VERSION_INT,
3380 AVCodec ff_hevc_decoder = {
3382 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3383 .type = AVMEDIA_TYPE_VIDEO,
3384 .id = AV_CODEC_ID_HEVC,
3385 .priv_data_size = sizeof(HEVCContext),
3386 .priv_class = &hevc_decoder_class,
3387 .init = hevc_decode_init,
3388 .close = hevc_decode_free,
3389 .decode = hevc_decode_frame,
3390 .flush = hevc_decode_flush,
3391 .update_thread_context = hevc_update_thread_context,
3392 .init_thread_copy = hevc_init_thread_copy,
3393 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3394 CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3395 .profiles = NULL_IF_CONFIG_SMALL(profiles),