4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/display.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
45 * NOTE: Each function hls_foo correspond to the function foo in the
46 * specification (HLS stands for High Level Syntax).
53 /* free everything allocated by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
57 av_freep(&s->deblock);
59 av_freep(&s->skip_flag);
60 av_freep(&s->tab_ct_depth);
62 av_freep(&s->tab_ipm);
63 av_freep(&s->cbf_luma);
66 av_freep(&s->qp_y_tab);
67 av_freep(&s->tab_slice_address);
68 av_freep(&s->filter_slice_edges);
70 av_freep(&s->horizontal_bs);
71 av_freep(&s->vertical_bs);
73 av_freep(&s->sh.entry_point_offset);
74 av_freep(&s->sh.size);
75 av_freep(&s->sh.offset);
77 av_buffer_pool_uninit(&s->tab_mvf_pool);
78 av_buffer_pool_uninit(&s->rpl_tab_pool);
81 /* allocate arrays that depend on frame dimensions */
82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 int log2_min_cb_size = sps->log2_min_cb_size;
85 int width = sps->width;
86 int height = sps->height;
87 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
88 ((height >> log2_min_cb_size) + 1);
89 int ctb_count = sps->ctb_width * sps->ctb_height;
90 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
92 s->bs_width = (width >> 2) + 1;
93 s->bs_height = (height >> 2) + 1;
95 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
96 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
97 if (!s->sao || !s->deblock)
100 s->skip_flag = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
101 s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
102 if (!s->skip_flag || !s->tab_ct_depth)
105 s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
106 s->tab_ipm = av_mallocz(min_pu_size);
107 s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
108 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
111 s->filter_slice_edges = av_mallocz(ctb_count);
112 s->tab_slice_address = av_malloc_array(pic_size_in_ctb,
113 sizeof(*s->tab_slice_address));
114 s->qp_y_tab = av_malloc_array(pic_size_in_ctb,
115 sizeof(*s->qp_y_tab));
116 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
119 s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
120 s->vertical_bs = av_mallocz_array(s->bs_width, s->bs_height);
121 if (!s->horizontal_bs || !s->vertical_bs)
124 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
126 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
128 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135 return AVERROR(ENOMEM);
138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 uint8_t luma_weight_l0_flag[16];
143 uint8_t chroma_weight_l0_flag[16];
144 uint8_t luma_weight_l1_flag[16];
145 uint8_t chroma_weight_l1_flag[16];
146 int luma_log2_weight_denom;
148 luma_log2_weight_denom = get_ue_golomb_long(gb);
149 if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
150 av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
151 s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
152 if (s->sps->chroma_format_idc != 0) {
153 int delta = get_se_golomb(gb);
154 s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
157 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
158 luma_weight_l0_flag[i] = get_bits1(gb);
159 if (!luma_weight_l0_flag[i]) {
160 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
161 s->sh.luma_offset_l0[i] = 0;
164 if (s->sps->chroma_format_idc != 0) {
165 for (i = 0; i < s->sh.nb_refs[L0]; i++)
166 chroma_weight_l0_flag[i] = get_bits1(gb);
168 for (i = 0; i < s->sh.nb_refs[L0]; i++)
169 chroma_weight_l0_flag[i] = 0;
171 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
172 if (luma_weight_l0_flag[i]) {
173 int delta_luma_weight_l0 = get_se_golomb(gb);
174 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
175 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
177 if (chroma_weight_l0_flag[i]) {
178 for (j = 0; j < 2; j++) {
179 int delta_chroma_weight_l0 = get_se_golomb(gb);
180 int delta_chroma_offset_l0 = get_se_golomb(gb);
181 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
182 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
183 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
186 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
187 s->sh.chroma_offset_l0[i][0] = 0;
188 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
189 s->sh.chroma_offset_l0[i][1] = 0;
192 if (s->sh.slice_type == B_SLICE) {
193 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
194 luma_weight_l1_flag[i] = get_bits1(gb);
195 if (!luma_weight_l1_flag[i]) {
196 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
197 s->sh.luma_offset_l1[i] = 0;
200 if (s->sps->chroma_format_idc != 0) {
201 for (i = 0; i < s->sh.nb_refs[L1]; i++)
202 chroma_weight_l1_flag[i] = get_bits1(gb);
204 for (i = 0; i < s->sh.nb_refs[L1]; i++)
205 chroma_weight_l1_flag[i] = 0;
207 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
208 if (luma_weight_l1_flag[i]) {
209 int delta_luma_weight_l1 = get_se_golomb(gb);
210 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
211 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
213 if (chroma_weight_l1_flag[i]) {
214 for (j = 0; j < 2; j++) {
215 int delta_chroma_weight_l1 = get_se_golomb(gb);
216 int delta_chroma_offset_l1 = get_se_golomb(gb);
217 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
218 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
219 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
222 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
223 s->sh.chroma_offset_l1[i][0] = 0;
224 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
225 s->sh.chroma_offset_l1[i][1] = 0;
231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
233 const HEVCSPS *sps = s->sps;
234 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
235 int prev_delta_msb = 0;
236 unsigned int nb_sps = 0, nb_sh;
240 if (!sps->long_term_ref_pics_present_flag)
243 if (sps->num_long_term_ref_pics_sps > 0)
244 nb_sps = get_ue_golomb_long(gb);
245 nb_sh = get_ue_golomb_long(gb);
247 if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
248 return AVERROR_INVALIDDATA;
250 rps->nb_refs = nb_sh + nb_sps;
252 for (i = 0; i < rps->nb_refs; i++) {
253 uint8_t delta_poc_msb_present;
256 uint8_t lt_idx_sps = 0;
258 if (sps->num_long_term_ref_pics_sps > 1)
259 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
261 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
262 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
264 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
265 rps->used[i] = get_bits1(gb);
268 delta_poc_msb_present = get_bits1(gb);
269 if (delta_poc_msb_present) {
270 int delta = get_ue_golomb_long(gb);
272 if (i && i != nb_sps)
273 delta += prev_delta_msb;
275 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
276 prev_delta_msb = delta;
283 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
285 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL)
286 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
288 unsigned int num = 0, den = 0;
291 ret = pic_arrays_init(s, sps);
295 s->avctx->coded_width = sps->width;
296 s->avctx->coded_height = sps->height;
297 s->avctx->width = sps->output_width;
298 s->avctx->height = sps->output_height;
299 s->avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
301 if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
302 #if CONFIG_HEVC_DXVA2_HWACCEL
303 *fmt++ = AV_PIX_FMT_DXVA2_VLD;
307 *fmt++ = sps->pix_fmt;
308 *fmt = AV_PIX_FMT_NONE;
310 ret = ff_thread_get_format(s->avctx, pix_fmts);
313 s->avctx->pix_fmt = ret;
315 ff_set_sar(s->avctx, sps->vui.sar);
317 if (sps->vui.video_signal_type_present_flag)
318 s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
321 s->avctx->color_range = AVCOL_RANGE_MPEG;
323 if (sps->vui.colour_description_present_flag) {
324 s->avctx->color_primaries = sps->vui.colour_primaries;
325 s->avctx->color_trc = sps->vui.transfer_characteristic;
326 s->avctx->colorspace = sps->vui.matrix_coeffs;
328 s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
329 s->avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
330 s->avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
333 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
334 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
335 ff_videodsp_init (&s->vdsp, sps->bit_depth);
337 for (i = 0; i < 3; i++) {
338 av_freep(&s->sao_pixel_buffer_h[i]);
339 av_freep(&s->sao_pixel_buffer_v[i]);
342 if (sps->sao_enabled && !s->avctx->hwaccel) {
343 int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
346 for(c_idx = 0; c_idx < c_count; c_idx++) {
347 int w = sps->width >> sps->hshift[c_idx];
348 int h = sps->height >> sps->vshift[c_idx];
349 s->sao_pixel_buffer_h[c_idx] =
350 av_malloc((w * 2 * sps->ctb_height) <<
352 s->sao_pixel_buffer_v[c_idx] =
353 av_malloc((h * 2 * sps->ctb_width) <<
359 s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
361 if (s->vps->vps_timing_info_present_flag) {
362 num = s->vps->vps_num_units_in_tick;
363 den = s->vps->vps_time_scale;
364 } else if (sps->vui.vui_timing_info_present_flag) {
365 num = sps->vui.vui_num_units_in_tick;
366 den = sps->vui.vui_time_scale;
369 if (num != 0 && den != 0)
370 av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
381 static int hls_slice_header(HEVCContext *s)
383 GetBitContext *gb = &s->HEVClc->gb;
384 SliceHeader *sh = &s->sh;
388 sh->first_slice_in_pic_flag = get_bits1(gb);
389 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
390 s->seq_decode = (s->seq_decode + 1) & 0xff;
393 ff_hevc_clear_refs(s);
395 sh->no_output_of_prior_pics_flag = 0;
397 sh->no_output_of_prior_pics_flag = get_bits1(gb);
399 sh->pps_id = get_ue_golomb_long(gb);
400 if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
401 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
402 return AVERROR_INVALIDDATA;
404 if (!sh->first_slice_in_pic_flag &&
405 s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
406 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
407 return AVERROR_INVALIDDATA;
409 s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
410 if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
411 sh->no_output_of_prior_pics_flag = 1;
413 if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
414 const HEVCSPS* last_sps = s->sps;
415 s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
416 if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
417 if (s->sps->width != last_sps->width || s->sps->height != last_sps->height ||
418 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
419 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
420 sh->no_output_of_prior_pics_flag = 0;
422 ff_hevc_clear_refs(s);
423 ret = set_sps(s, s->sps);
427 s->seq_decode = (s->seq_decode + 1) & 0xff;
431 s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
432 s->avctx->level = s->sps->ptl.general_ptl.level_idc;
434 sh->dependent_slice_segment_flag = 0;
435 if (!sh->first_slice_in_pic_flag) {
436 int slice_address_length;
438 if (s->pps->dependent_slice_segments_enabled_flag)
439 sh->dependent_slice_segment_flag = get_bits1(gb);
441 slice_address_length = av_ceil_log2(s->sps->ctb_width *
443 sh->slice_segment_addr = get_bits(gb, slice_address_length);
444 if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
445 av_log(s->avctx, AV_LOG_ERROR,
446 "Invalid slice segment address: %u.\n",
447 sh->slice_segment_addr);
448 return AVERROR_INVALIDDATA;
451 if (!sh->dependent_slice_segment_flag) {
452 sh->slice_addr = sh->slice_segment_addr;
456 sh->slice_segment_addr = sh->slice_addr = 0;
458 s->slice_initialized = 0;
461 if (!sh->dependent_slice_segment_flag) {
462 s->slice_initialized = 0;
464 for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
465 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
467 sh->slice_type = get_ue_golomb_long(gb);
468 if (!(sh->slice_type == I_SLICE ||
469 sh->slice_type == P_SLICE ||
470 sh->slice_type == B_SLICE)) {
471 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
473 return AVERROR_INVALIDDATA;
475 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
476 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
477 return AVERROR_INVALIDDATA;
480 // when flag is not present, picture is inferred to be output
481 sh->pic_output_flag = 1;
482 if (s->pps->output_flag_present_flag)
483 sh->pic_output_flag = get_bits1(gb);
485 if (s->sps->separate_colour_plane_flag)
486 sh->colour_plane_id = get_bits(gb, 2);
491 sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
492 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
493 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
494 av_log(s->avctx, AV_LOG_WARNING,
495 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
496 if (s->avctx->err_recognition & AV_EF_EXPLODE)
497 return AVERROR_INVALIDDATA;
502 sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
503 if (!sh->short_term_ref_pic_set_sps_flag) {
504 int pos = get_bits_left(gb);
505 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
509 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
510 sh->short_term_rps = &sh->slice_rps;
512 int numbits, rps_idx;
514 if (!s->sps->nb_st_rps) {
515 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
516 return AVERROR_INVALIDDATA;
519 numbits = av_ceil_log2(s->sps->nb_st_rps);
520 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
521 sh->short_term_rps = &s->sps->st_rps[rps_idx];
524 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
526 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
527 if (s->avctx->err_recognition & AV_EF_EXPLODE)
528 return AVERROR_INVALIDDATA;
531 if (s->sps->sps_temporal_mvp_enabled_flag)
532 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
534 sh->slice_temporal_mvp_enabled_flag = 0;
536 s->sh.short_term_rps = NULL;
541 if (s->temporal_id == 0 &&
542 s->nal_unit_type != NAL_TRAIL_N &&
543 s->nal_unit_type != NAL_TSA_N &&
544 s->nal_unit_type != NAL_STSA_N &&
545 s->nal_unit_type != NAL_RADL_N &&
546 s->nal_unit_type != NAL_RADL_R &&
547 s->nal_unit_type != NAL_RASL_N &&
548 s->nal_unit_type != NAL_RASL_R)
551 if (s->sps->sao_enabled) {
552 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
553 if (s->sps->chroma_format_idc) {
554 sh->slice_sample_adaptive_offset_flag[1] =
555 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
558 sh->slice_sample_adaptive_offset_flag[0] = 0;
559 sh->slice_sample_adaptive_offset_flag[1] = 0;
560 sh->slice_sample_adaptive_offset_flag[2] = 0;
563 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
564 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
567 sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
568 if (sh->slice_type == B_SLICE)
569 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
571 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
572 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
573 if (sh->slice_type == B_SLICE)
574 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
576 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
577 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
578 sh->nb_refs[L0], sh->nb_refs[L1]);
579 return AVERROR_INVALIDDATA;
582 sh->rpl_modification_flag[0] = 0;
583 sh->rpl_modification_flag[1] = 0;
584 nb_refs = ff_hevc_frame_nb_refs(s);
586 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
587 return AVERROR_INVALIDDATA;
590 if (s->pps->lists_modification_present_flag && nb_refs > 1) {
591 sh->rpl_modification_flag[0] = get_bits1(gb);
592 if (sh->rpl_modification_flag[0]) {
593 for (i = 0; i < sh->nb_refs[L0]; i++)
594 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
597 if (sh->slice_type == B_SLICE) {
598 sh->rpl_modification_flag[1] = get_bits1(gb);
599 if (sh->rpl_modification_flag[1] == 1)
600 for (i = 0; i < sh->nb_refs[L1]; i++)
601 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
605 if (sh->slice_type == B_SLICE)
606 sh->mvd_l1_zero_flag = get_bits1(gb);
608 if (s->pps->cabac_init_present_flag)
609 sh->cabac_init_flag = get_bits1(gb);
611 sh->cabac_init_flag = 0;
613 sh->collocated_ref_idx = 0;
614 if (sh->slice_temporal_mvp_enabled_flag) {
615 sh->collocated_list = L0;
616 if (sh->slice_type == B_SLICE)
617 sh->collocated_list = !get_bits1(gb);
619 if (sh->nb_refs[sh->collocated_list] > 1) {
620 sh->collocated_ref_idx = get_ue_golomb_long(gb);
621 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
622 av_log(s->avctx, AV_LOG_ERROR,
623 "Invalid collocated_ref_idx: %d.\n",
624 sh->collocated_ref_idx);
625 return AVERROR_INVALIDDATA;
630 if ((s->pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
631 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
632 pred_weight_table(s, gb);
635 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
636 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
637 av_log(s->avctx, AV_LOG_ERROR,
638 "Invalid number of merging MVP candidates: %d.\n",
639 sh->max_num_merge_cand);
640 return AVERROR_INVALIDDATA;
644 sh->slice_qp_delta = get_se_golomb(gb);
646 if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
647 sh->slice_cb_qp_offset = get_se_golomb(gb);
648 sh->slice_cr_qp_offset = get_se_golomb(gb);
650 sh->slice_cb_qp_offset = 0;
651 sh->slice_cr_qp_offset = 0;
654 if (s->pps->chroma_qp_offset_list_enabled_flag)
655 sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
657 sh->cu_chroma_qp_offset_enabled_flag = 0;
659 if (s->pps->deblocking_filter_control_present_flag) {
660 int deblocking_filter_override_flag = 0;
662 if (s->pps->deblocking_filter_override_enabled_flag)
663 deblocking_filter_override_flag = get_bits1(gb);
665 if (deblocking_filter_override_flag) {
666 sh->disable_deblocking_filter_flag = get_bits1(gb);
667 if (!sh->disable_deblocking_filter_flag) {
668 sh->beta_offset = get_se_golomb(gb) * 2;
669 sh->tc_offset = get_se_golomb(gb) * 2;
672 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
673 sh->beta_offset = s->pps->beta_offset;
674 sh->tc_offset = s->pps->tc_offset;
677 sh->disable_deblocking_filter_flag = 0;
682 if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
683 (sh->slice_sample_adaptive_offset_flag[0] ||
684 sh->slice_sample_adaptive_offset_flag[1] ||
685 !sh->disable_deblocking_filter_flag)) {
686 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
688 sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
690 } else if (!s->slice_initialized) {
691 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
692 return AVERROR_INVALIDDATA;
695 sh->num_entry_point_offsets = 0;
696 if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
697 sh->num_entry_point_offsets = get_ue_golomb_long(gb);
698 if (sh->num_entry_point_offsets > 0) {
699 int offset_len = get_ue_golomb_long(gb) + 1;
700 int segments = offset_len >> 4;
701 int rest = (offset_len & 15);
702 av_freep(&sh->entry_point_offset);
703 av_freep(&sh->offset);
705 sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
706 sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
707 sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
708 if (!sh->entry_point_offset || !sh->offset || !sh->size) {
709 sh->num_entry_point_offsets = 0;
710 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
711 return AVERROR(ENOMEM);
713 for (i = 0; i < sh->num_entry_point_offsets; i++) {
715 for (j = 0; j < segments; j++) {
717 val += get_bits(gb, 16);
721 val += get_bits(gb, rest);
723 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
725 if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
726 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
727 s->threads_number = 1;
729 s->enable_parallel_tiles = 0;
731 s->enable_parallel_tiles = 0;
734 if (s->pps->slice_header_extension_present_flag) {
735 unsigned int length = get_ue_golomb_long(gb);
736 if (length*8LL > get_bits_left(gb)) {
737 av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
738 return AVERROR_INVALIDDATA;
740 for (i = 0; i < length; i++)
741 skip_bits(gb, 8); // slice_header_extension_data_byte
744 // Inferred parameters
745 sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
746 if (sh->slice_qp > 51 ||
747 sh->slice_qp < -s->sps->qp_bd_offset) {
748 av_log(s->avctx, AV_LOG_ERROR,
749 "The slice_qp %d is outside the valid range "
752 -s->sps->qp_bd_offset);
753 return AVERROR_INVALIDDATA;
756 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
758 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
759 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
760 return AVERROR_INVALIDDATA;
763 if (get_bits_left(gb) < 0) {
764 av_log(s->avctx, AV_LOG_ERROR,
765 "Overread slice header by %d bits\n", -get_bits_left(gb));
766 return AVERROR_INVALIDDATA;
769 s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
771 if (!s->pps->cu_qp_delta_enabled_flag)
772 s->HEVClc->qp_y = s->sh.slice_qp;
774 s->slice_initialized = 1;
775 s->HEVClc->tu.cu_qp_offset_cb = 0;
776 s->HEVClc->tu.cu_qp_offset_cr = 0;
781 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
783 #define SET_SAO(elem, value) \
785 if (!sao_merge_up_flag && !sao_merge_left_flag) \
787 else if (sao_merge_left_flag) \
788 sao->elem = CTB(s->sao, rx-1, ry).elem; \
789 else if (sao_merge_up_flag) \
790 sao->elem = CTB(s->sao, rx, ry-1).elem; \
795 static void hls_sao_param(HEVCContext *s, int rx, int ry)
797 HEVCLocalContext *lc = s->HEVClc;
798 int sao_merge_left_flag = 0;
799 int sao_merge_up_flag = 0;
800 SAOParams *sao = &CTB(s->sao, rx, ry);
803 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
804 s->sh.slice_sample_adaptive_offset_flag[1]) {
806 if (lc->ctb_left_flag)
807 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
809 if (ry > 0 && !sao_merge_left_flag) {
811 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
815 for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
816 int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
817 s->pps->log2_sao_offset_scale_chroma;
819 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
820 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
825 sao->type_idx[2] = sao->type_idx[1];
826 sao->eo_class[2] = sao->eo_class[1];
828 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
831 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
834 for (i = 0; i < 4; i++)
835 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
837 if (sao->type_idx[c_idx] == SAO_BAND) {
838 for (i = 0; i < 4; i++) {
839 if (sao->offset_abs[c_idx][i]) {
840 SET_SAO(offset_sign[c_idx][i],
841 ff_hevc_sao_offset_sign_decode(s));
843 sao->offset_sign[c_idx][i] = 0;
846 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
847 } else if (c_idx != 2) {
848 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
851 // Inferred parameters
852 sao->offset_val[c_idx][0] = 0;
853 for (i = 0; i < 4; i++) {
854 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
855 if (sao->type_idx[c_idx] == SAO_EDGE) {
857 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
858 } else if (sao->offset_sign[c_idx][i]) {
859 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
861 sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
869 static int hls_cross_component_pred(HEVCContext *s, int idx) {
870 HEVCLocalContext *lc = s->HEVClc;
871 int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
873 if (log2_res_scale_abs_plus1 != 0) {
874 int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
875 lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
876 (1 - 2 * res_scale_sign_flag);
878 lc->tu.res_scale_val = 0;
885 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
886 int xBase, int yBase, int cb_xBase, int cb_yBase,
887 int log2_cb_size, int log2_trafo_size,
888 int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
890 HEVCLocalContext *lc = s->HEVClc;
891 const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
894 if (lc->cu.pred_mode == MODE_INTRA) {
895 int trafo_size = 1 << log2_trafo_size;
896 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
898 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
901 if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
902 (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
903 int scan_idx = SCAN_DIAG;
904 int scan_idx_c = SCAN_DIAG;
905 int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
906 (s->sps->chroma_format_idc == 2 &&
907 (cbf_cb[1] || cbf_cr[1]));
909 if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
910 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
911 if (lc->tu.cu_qp_delta != 0)
912 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
913 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
914 lc->tu.is_cu_qp_delta_coded = 1;
916 if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
917 lc->tu.cu_qp_delta > (25 + s->sps->qp_bd_offset / 2)) {
918 av_log(s->avctx, AV_LOG_ERROR,
919 "The cu_qp_delta %d is outside the valid range "
922 -(26 + s->sps->qp_bd_offset / 2),
923 (25 + s->sps->qp_bd_offset / 2));
924 return AVERROR_INVALIDDATA;
927 ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
930 if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
931 !lc->cu.cu_transquant_bypass_flag && !lc->tu.is_cu_chroma_qp_offset_coded) {
932 int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
933 if (cu_chroma_qp_offset_flag) {
934 int cu_chroma_qp_offset_idx = 0;
935 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
936 cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
937 av_log(s->avctx, AV_LOG_ERROR,
938 "cu_chroma_qp_offset_idx not yet tested.\n");
940 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
941 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
943 lc->tu.cu_qp_offset_cb = 0;
944 lc->tu.cu_qp_offset_cr = 0;
946 lc->tu.is_cu_chroma_qp_offset_coded = 1;
949 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
950 if (lc->tu.intra_pred_mode >= 6 &&
951 lc->tu.intra_pred_mode <= 14) {
952 scan_idx = SCAN_VERT;
953 } else if (lc->tu.intra_pred_mode >= 22 &&
954 lc->tu.intra_pred_mode <= 30) {
955 scan_idx = SCAN_HORIZ;
958 if (lc->tu.intra_pred_mode_c >= 6 &&
959 lc->tu.intra_pred_mode_c <= 14) {
960 scan_idx_c = SCAN_VERT;
961 } else if (lc->tu.intra_pred_mode_c >= 22 &&
962 lc->tu.intra_pred_mode_c <= 30) {
963 scan_idx_c = SCAN_HORIZ;
970 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
971 if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
972 int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
973 int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
974 lc->tu.cross_pf = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
975 (lc->cu.pred_mode == MODE_INTER ||
976 (lc->tu.chroma_mode_c == 4)));
978 if (lc->tu.cross_pf) {
979 hls_cross_component_pred(s, 0);
981 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
982 if (lc->cu.pred_mode == MODE_INTRA) {
983 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
984 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
987 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
988 log2_trafo_size_c, scan_idx_c, 1);
990 if (lc->tu.cross_pf) {
991 ptrdiff_t stride = s->frame->linesize[1];
992 int hshift = s->sps->hshift[1];
993 int vshift = s->sps->vshift[1];
994 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
995 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
996 int size = 1 << log2_trafo_size_c;
998 uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
999 ((x0 >> hshift) << s->sps->pixel_shift)];
1000 for (i = 0; i < (size * size); i++) {
1001 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1003 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1007 if (lc->tu.cross_pf) {
1008 hls_cross_component_pred(s, 1);
1010 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1011 if (lc->cu.pred_mode == MODE_INTRA) {
1012 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1013 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1016 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1017 log2_trafo_size_c, scan_idx_c, 2);
1019 if (lc->tu.cross_pf) {
1020 ptrdiff_t stride = s->frame->linesize[2];
1021 int hshift = s->sps->hshift[2];
1022 int vshift = s->sps->vshift[2];
1023 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1024 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1025 int size = 1 << log2_trafo_size_c;
1027 uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1028 ((x0 >> hshift) << s->sps->pixel_shift)];
1029 for (i = 0; i < (size * size); i++) {
1030 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1032 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1035 } else if (s->sps->chroma_format_idc && blk_idx == 3) {
1036 int trafo_size_h = 1 << (log2_trafo_size + 1);
1037 int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1038 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1039 if (lc->cu.pred_mode == MODE_INTRA) {
1040 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1041 trafo_size_h, trafo_size_v);
1042 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1045 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1046 log2_trafo_size, scan_idx_c, 1);
1048 for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1049 if (lc->cu.pred_mode == MODE_INTRA) {
1050 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1051 trafo_size_h, trafo_size_v);
1052 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1055 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1056 log2_trafo_size, scan_idx_c, 2);
1059 } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1060 if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1061 int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1062 int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1063 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1064 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1065 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1066 if (s->sps->chroma_format_idc == 2) {
1067 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1068 trafo_size_h, trafo_size_v);
1069 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1070 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1072 } else if (blk_idx == 3) {
1073 int trafo_size_h = 1 << (log2_trafo_size + 1);
1074 int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1075 ff_hevc_set_neighbour_available(s, xBase, yBase,
1076 trafo_size_h, trafo_size_v);
1077 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1078 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1079 if (s->sps->chroma_format_idc == 2) {
1080 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1081 trafo_size_h, trafo_size_v);
1082 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1083 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1091 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1093 int cb_size = 1 << log2_cb_size;
1094 int log2_min_pu_size = s->sps->log2_min_pu_size;
1096 int min_pu_width = s->sps->min_pu_width;
1097 int x_end = FFMIN(x0 + cb_size, s->sps->width);
1098 int y_end = FFMIN(y0 + cb_size, s->sps->height);
1101 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1102 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1103 s->is_pcm[i + j * min_pu_width] = 2;
1106 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1107 int xBase, int yBase, int cb_xBase, int cb_yBase,
1108 int log2_cb_size, int log2_trafo_size,
1109 int trafo_depth, int blk_idx,
1110 const int *base_cbf_cb, const int *base_cbf_cr)
1112 HEVCLocalContext *lc = s->HEVClc;
1113 uint8_t split_transform_flag;
1118 cbf_cb[0] = base_cbf_cb[0];
1119 cbf_cb[1] = base_cbf_cb[1];
1120 cbf_cr[0] = base_cbf_cr[0];
1121 cbf_cr[1] = base_cbf_cr[1];
1123 if (lc->cu.intra_split_flag) {
1124 if (trafo_depth == 1) {
1125 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1126 if (s->sps->chroma_format_idc == 3) {
1127 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1128 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx];
1130 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1131 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1135 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0];
1136 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1137 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1140 if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1141 log2_trafo_size > s->sps->log2_min_tb_size &&
1142 trafo_depth < lc->cu.max_trafo_depth &&
1143 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1144 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1146 int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1147 lc->cu.pred_mode == MODE_INTER &&
1148 lc->cu.part_mode != PART_2Nx2N &&
1151 split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1152 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1156 if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1157 if (trafo_depth == 0 || cbf_cb[0]) {
1158 cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1159 if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1160 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1164 if (trafo_depth == 0 || cbf_cr[0]) {
1165 cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1166 if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1167 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1172 if (split_transform_flag) {
1173 const int trafo_size_split = 1 << (log2_trafo_size - 1);
1174 const int x1 = x0 + trafo_size_split;
1175 const int y1 = y0 + trafo_size_split;
1177 #define SUBDIVIDE(x, y, idx) \
1179 ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1180 log2_trafo_size - 1, trafo_depth + 1, idx, \
1186 SUBDIVIDE(x0, y0, 0);
1187 SUBDIVIDE(x1, y0, 1);
1188 SUBDIVIDE(x0, y1, 2);
1189 SUBDIVIDE(x1, y1, 3);
1193 int min_tu_size = 1 << s->sps->log2_min_tb_size;
1194 int log2_min_tu_size = s->sps->log2_min_tb_size;
1195 int min_tu_width = s->sps->min_tb_width;
1198 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1199 cbf_cb[0] || cbf_cr[0] ||
1200 (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1201 cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1204 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1205 log2_cb_size, log2_trafo_size,
1206 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1209 // TODO: store cbf_luma somewhere else
1212 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1213 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1214 int x_tu = (x0 + j) >> log2_min_tu_size;
1215 int y_tu = (y0 + i) >> log2_min_tu_size;
1216 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1219 if (!s->sh.disable_deblocking_filter_flag) {
1220 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1221 if (s->pps->transquant_bypass_enable_flag &&
1222 lc->cu.cu_transquant_bypass_flag)
1223 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1229 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1231 HEVCLocalContext *lc = s->HEVClc;
1233 int cb_size = 1 << log2_cb_size;
1234 int stride0 = s->frame->linesize[0];
1235 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1236 int stride1 = s->frame->linesize[1];
1237 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1238 int stride2 = s->frame->linesize[2];
1239 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1241 int length = cb_size * cb_size * s->sps->pcm.bit_depth +
1242 (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1243 ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1244 s->sps->pcm.bit_depth_chroma;
1245 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1248 if (!s->sh.disable_deblocking_filter_flag)
1249 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1251 ret = init_get_bits(&gb, pcm, length);
1255 s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->sps->pcm.bit_depth);
1256 if (s->sps->chroma_format_idc) {
1257 s->hevcdsp.put_pcm(dst1, stride1,
1258 cb_size >> s->sps->hshift[1],
1259 cb_size >> s->sps->vshift[1],
1260 &gb, s->sps->pcm.bit_depth_chroma);
1261 s->hevcdsp.put_pcm(dst2, stride2,
1262 cb_size >> s->sps->hshift[2],
1263 cb_size >> s->sps->vshift[2],
1264 &gb, s->sps->pcm.bit_depth_chroma);
1271 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1273 * @param s HEVC decoding context
1274 * @param dst target buffer for block data at block position
1275 * @param dststride stride of the dst buffer
1276 * @param ref reference picture buffer at origin (0, 0)
1277 * @param mv motion vector (relative to block position) to get pixel data from
1278 * @param x_off horizontal position of block from origin (0, 0)
1279 * @param y_off vertical position of block from origin (0, 0)
1280 * @param block_w width of block
1281 * @param block_h height of block
1282 * @param luma_weight weighting factor applied to the luma prediction
1283 * @param luma_offset additive offset applied to the luma prediction value
1286 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1287 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1288 int block_w, int block_h, int luma_weight, int luma_offset)
1290 HEVCLocalContext *lc = s->HEVClc;
1291 uint8_t *src = ref->data[0];
1292 ptrdiff_t srcstride = ref->linesize[0];
1293 int pic_width = s->sps->width;
1294 int pic_height = s->sps->height;
1297 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1298 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1299 int idx = ff_hevc_pel_weight[block_w];
1301 x_off += mv->x >> 2;
1302 y_off += mv->y >> 2;
1303 src += y_off * srcstride + (x_off << s->sps->pixel_shift);
1305 if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1306 x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1307 y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1308 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1309 int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1310 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1312 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1313 edge_emu_stride, srcstride,
1314 block_w + QPEL_EXTRA,
1315 block_h + QPEL_EXTRA,
1316 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1317 pic_width, pic_height);
1318 src = lc->edge_emu_buffer + buf_offset;
1319 srcstride = edge_emu_stride;
1323 s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1324 block_h, mx, my, block_w);
1326 s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1327 block_h, s->sh.luma_log2_weight_denom,
1328 luma_weight, luma_offset, mx, my, block_w);
1332 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1334 * @param s HEVC decoding context
1335 * @param dst target buffer for block data at block position
1336 * @param dststride stride of the dst buffer
1337 * @param ref0 reference picture0 buffer at origin (0, 0)
1338 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1339 * @param x_off horizontal position of block from origin (0, 0)
1340 * @param y_off vertical position of block from origin (0, 0)
1341 * @param block_w width of block
1342 * @param block_h height of block
1343 * @param ref1 reference picture1 buffer at origin (0, 0)
1344 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1345 * @param current_mv current motion vector structure
1347 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1348 AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1349 int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1351 HEVCLocalContext *lc = s->HEVClc;
1352 ptrdiff_t src0stride = ref0->linesize[0];
1353 ptrdiff_t src1stride = ref1->linesize[0];
1354 int pic_width = s->sps->width;
1355 int pic_height = s->sps->height;
1356 int mx0 = mv0->x & 3;
1357 int my0 = mv0->y & 3;
1358 int mx1 = mv1->x & 3;
1359 int my1 = mv1->y & 3;
1360 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1361 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1362 int x_off0 = x_off + (mv0->x >> 2);
1363 int y_off0 = y_off + (mv0->y >> 2);
1364 int x_off1 = x_off + (mv1->x >> 2);
1365 int y_off1 = y_off + (mv1->y >> 2);
1366 int idx = ff_hevc_pel_weight[block_w];
1368 uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1369 uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1371 if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1372 x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1373 y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1374 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1375 int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1376 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1378 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1379 edge_emu_stride, src0stride,
1380 block_w + QPEL_EXTRA,
1381 block_h + QPEL_EXTRA,
1382 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1383 pic_width, pic_height);
1384 src0 = lc->edge_emu_buffer + buf_offset;
1385 src0stride = edge_emu_stride;
1388 if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1389 x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1390 y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1391 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1392 int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1393 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1395 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1396 edge_emu_stride, src1stride,
1397 block_w + QPEL_EXTRA,
1398 block_h + QPEL_EXTRA,
1399 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1400 pic_width, pic_height);
1401 src1 = lc->edge_emu_buffer2 + buf_offset;
1402 src1stride = edge_emu_stride;
1405 s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1406 block_h, mx0, my0, block_w);
1408 s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1409 block_h, mx1, my1, block_w);
1411 s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1412 block_h, s->sh.luma_log2_weight_denom,
1413 s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1414 s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1415 s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1416 s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1422 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1424 * @param s HEVC decoding context
1425 * @param dst1 target buffer for block data at block position (U plane)
1426 * @param dst2 target buffer for block data at block position (V plane)
1427 * @param dststride stride of the dst1 and dst2 buffers
1428 * @param ref reference picture buffer at origin (0, 0)
1429 * @param mv motion vector (relative to block position) to get pixel data from
1430 * @param x_off horizontal position of block from origin (0, 0)
1431 * @param y_off vertical position of block from origin (0, 0)
1432 * @param block_w width of block
1433 * @param block_h height of block
1434 * @param chroma_weight weighting factor applied to the chroma prediction
1435 * @param chroma_offset additive offset applied to the chroma prediction value
1438 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1439 ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1440 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1442 HEVCLocalContext *lc = s->HEVClc;
1443 int pic_width = s->sps->width >> s->sps->hshift[1];
1444 int pic_height = s->sps->height >> s->sps->vshift[1];
1445 const Mv *mv = ¤t_mv->mv[reflist];
1446 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1447 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1448 int idx = ff_hevc_pel_weight[block_w];
1449 int hshift = s->sps->hshift[1];
1450 int vshift = s->sps->vshift[1];
1451 intptr_t mx = mv->x & ((1 << (2 + hshift)) - 1);
1452 intptr_t my = mv->y & ((1 << (2 + vshift)) - 1);
1453 intptr_t _mx = mx << (1 - hshift);
1454 intptr_t _my = my << (1 - vshift);
1456 x_off += mv->x >> (2 + hshift);
1457 y_off += mv->y >> (2 + vshift);
1458 src0 += y_off * srcstride + (x_off << s->sps->pixel_shift);
1460 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1461 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1462 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1463 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1464 int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1465 int buf_offset0 = EPEL_EXTRA_BEFORE *
1466 (edge_emu_stride + (1 << s->sps->pixel_shift));
1467 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1468 edge_emu_stride, srcstride,
1469 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1470 x_off - EPEL_EXTRA_BEFORE,
1471 y_off - EPEL_EXTRA_BEFORE,
1472 pic_width, pic_height);
1474 src0 = lc->edge_emu_buffer + buf_offset0;
1475 srcstride = edge_emu_stride;
1478 s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1479 block_h, _mx, _my, block_w);
1481 s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1482 block_h, s->sh.chroma_log2_weight_denom,
1483 chroma_weight, chroma_offset, _mx, _my, block_w);
1487 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1489 * @param s HEVC decoding context
1490 * @param dst target buffer for block data at block position
1491 * @param dststride stride of the dst buffer
1492 * @param ref0 reference picture0 buffer at origin (0, 0)
1493 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1494 * @param x_off horizontal position of block from origin (0, 0)
1495 * @param y_off vertical position of block from origin (0, 0)
1496 * @param block_w width of block
1497 * @param block_h height of block
1498 * @param ref1 reference picture1 buffer at origin (0, 0)
1499 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1500 * @param current_mv current motion vector structure
1501 * @param cidx chroma component(cb, cr)
1503 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1504 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1506 HEVCLocalContext *lc = s->HEVClc;
1507 uint8_t *src1 = ref0->data[cidx+1];
1508 uint8_t *src2 = ref1->data[cidx+1];
1509 ptrdiff_t src1stride = ref0->linesize[cidx+1];
1510 ptrdiff_t src2stride = ref1->linesize[cidx+1];
1511 int weight_flag = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1512 (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1513 int pic_width = s->sps->width >> s->sps->hshift[1];
1514 int pic_height = s->sps->height >> s->sps->vshift[1];
1515 Mv *mv0 = ¤t_mv->mv[0];
1516 Mv *mv1 = ¤t_mv->mv[1];
1517 int hshift = s->sps->hshift[1];
1518 int vshift = s->sps->vshift[1];
1520 intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1521 intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1522 intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1523 intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1524 intptr_t _mx0 = mx0 << (1 - hshift);
1525 intptr_t _my0 = my0 << (1 - vshift);
1526 intptr_t _mx1 = mx1 << (1 - hshift);
1527 intptr_t _my1 = my1 << (1 - vshift);
1529 int x_off0 = x_off + (mv0->x >> (2 + hshift));
1530 int y_off0 = y_off + (mv0->y >> (2 + vshift));
1531 int x_off1 = x_off + (mv1->x >> (2 + hshift));
1532 int y_off1 = y_off + (mv1->y >> (2 + vshift));
1533 int idx = ff_hevc_pel_weight[block_w];
1534 src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1535 src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1537 if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1538 x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1539 y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1540 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1541 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1542 int buf_offset1 = EPEL_EXTRA_BEFORE *
1543 (edge_emu_stride + (1 << s->sps->pixel_shift));
1545 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1546 edge_emu_stride, src1stride,
1547 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1548 x_off0 - EPEL_EXTRA_BEFORE,
1549 y_off0 - EPEL_EXTRA_BEFORE,
1550 pic_width, pic_height);
1552 src1 = lc->edge_emu_buffer + buf_offset1;
1553 src1stride = edge_emu_stride;
1556 if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1557 x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1558 y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1559 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1560 int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1561 int buf_offset1 = EPEL_EXTRA_BEFORE *
1562 (edge_emu_stride + (1 << s->sps->pixel_shift));
1564 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1565 edge_emu_stride, src2stride,
1566 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1567 x_off1 - EPEL_EXTRA_BEFORE,
1568 y_off1 - EPEL_EXTRA_BEFORE,
1569 pic_width, pic_height);
1571 src2 = lc->edge_emu_buffer2 + buf_offset1;
1572 src2stride = edge_emu_stride;
1575 s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1576 block_h, _mx0, _my0, block_w);
1578 s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1579 src2, src2stride, lc->tmp,
1580 block_h, _mx1, _my1, block_w);
1582 s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1583 src2, src2stride, lc->tmp,
1585 s->sh.chroma_log2_weight_denom,
1586 s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1587 s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1588 s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1589 s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1590 _mx1, _my1, block_w);
1593 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1594 const Mv *mv, int y0, int height)
1596 int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1598 if (s->threads_type == FF_THREAD_FRAME )
1599 ff_thread_await_progress(&ref->tf, y, 0);
1602 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1603 int nPbH, int log2_cb_size, int part_idx,
1604 int merge_idx, MvField *mv)
1606 HEVCLocalContext *lc = s->HEVClc;
1607 enum InterPredIdc inter_pred_idc = PRED_L0;
1610 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1612 if (s->sh.slice_type == B_SLICE)
1613 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1615 if (inter_pred_idc != PRED_L1) {
1616 if (s->sh.nb_refs[L0])
1617 mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1619 mv->pred_flag = PF_L0;
1620 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1621 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1622 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1623 part_idx, merge_idx, mv, mvp_flag, 0);
1624 mv->mv[0].x += lc->pu.mvd.x;
1625 mv->mv[0].y += lc->pu.mvd.y;
1628 if (inter_pred_idc != PRED_L0) {
1629 if (s->sh.nb_refs[L1])
1630 mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1632 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1633 AV_ZERO32(&lc->pu.mvd);
1635 ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1638 mv->pred_flag += PF_L1;
1639 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1640 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1641 part_idx, merge_idx, mv, mvp_flag, 1);
1642 mv->mv[1].x += lc->pu.mvd.x;
1643 mv->mv[1].y += lc->pu.mvd.y;
1647 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1649 int log2_cb_size, int partIdx, int idx)
1651 #define POS(c_idx, x, y) \
1652 &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1653 (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1654 HEVCLocalContext *lc = s->HEVClc;
1656 struct MvField current_mv = {{{ 0 }}};
1658 int min_pu_width = s->sps->min_pu_width;
1660 MvField *tab_mvf = s->ref->tab_mvf;
1661 RefPicList *refPicList = s->ref->refPicList;
1662 HEVCFrame *ref0 = NULL, *ref1 = NULL;
1663 uint8_t *dst0 = POS(0, x0, y0);
1664 uint8_t *dst1 = POS(1, x0, y0);
1665 uint8_t *dst2 = POS(2, x0, y0);
1666 int log2_min_cb_size = s->sps->log2_min_cb_size;
1667 int min_cb_width = s->sps->min_cb_width;
1668 int x_cb = x0 >> log2_min_cb_size;
1669 int y_cb = y0 >> log2_min_cb_size;
1673 int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1676 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1678 if (skip_flag || lc->pu.merge_flag) {
1679 if (s->sh.max_num_merge_cand > 1)
1680 merge_idx = ff_hevc_merge_idx_decode(s);
1684 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1685 partIdx, merge_idx, ¤t_mv);
1687 hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1688 partIdx, merge_idx, ¤t_mv);
1691 x_pu = x0 >> s->sps->log2_min_pu_size;
1692 y_pu = y0 >> s->sps->log2_min_pu_size;
1694 for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1695 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1696 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1698 if (current_mv.pred_flag & PF_L0) {
1699 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1702 hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH);
1704 if (current_mv.pred_flag & PF_L1) {
1705 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1708 hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH);
1711 if (current_mv.pred_flag == PF_L0) {
1712 int x0_c = x0 >> s->sps->hshift[1];
1713 int y0_c = y0 >> s->sps->vshift[1];
1714 int nPbW_c = nPbW >> s->sps->hshift[1];
1715 int nPbH_c = nPbH >> s->sps->vshift[1];
1717 luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1718 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1719 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1720 s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1722 if (s->sps->chroma_format_idc) {
1723 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1724 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1725 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1726 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1727 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1728 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1730 } else if (current_mv.pred_flag == PF_L1) {
1731 int x0_c = x0 >> s->sps->hshift[1];
1732 int y0_c = y0 >> s->sps->vshift[1];
1733 int nPbW_c = nPbW >> s->sps->hshift[1];
1734 int nPbH_c = nPbH >> s->sps->vshift[1];
1736 luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1737 ¤t_mv.mv[1], x0, y0, nPbW, nPbH,
1738 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1739 s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1741 if (s->sps->chroma_format_idc) {
1742 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1743 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1744 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1746 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1747 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1748 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1750 } else if (current_mv.pred_flag == PF_BI) {
1751 int x0_c = x0 >> s->sps->hshift[1];
1752 int y0_c = y0 >> s->sps->vshift[1];
1753 int nPbW_c = nPbW >> s->sps->hshift[1];
1754 int nPbH_c = nPbH >> s->sps->vshift[1];
1756 luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1757 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1758 ref1->frame, ¤t_mv.mv[1], ¤t_mv);
1760 if (s->sps->chroma_format_idc) {
1761 chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1762 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0);
1764 chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1765 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1);
1773 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1774 int prev_intra_luma_pred_flag)
1776 HEVCLocalContext *lc = s->HEVClc;
1777 int x_pu = x0 >> s->sps->log2_min_pu_size;
1778 int y_pu = y0 >> s->sps->log2_min_pu_size;
1779 int min_pu_width = s->sps->min_pu_width;
1780 int size_in_pus = pu_size >> s->sps->log2_min_pu_size;
1781 int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1782 int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1784 int cand_up = (lc->ctb_up_flag || y0b) ?
1785 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1786 int cand_left = (lc->ctb_left_flag || x0b) ?
1787 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1789 int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1791 MvField *tab_mvf = s->ref->tab_mvf;
1792 int intra_pred_mode;
1796 // intra_pred_mode prediction does not cross vertical CTB boundaries
1797 if ((y0 - 1) < y_ctb)
1800 if (cand_left == cand_up) {
1801 if (cand_left < 2) {
1802 candidate[0] = INTRA_PLANAR;
1803 candidate[1] = INTRA_DC;
1804 candidate[2] = INTRA_ANGULAR_26;
1806 candidate[0] = cand_left;
1807 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1808 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1811 candidate[0] = cand_left;
1812 candidate[1] = cand_up;
1813 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1814 candidate[2] = INTRA_PLANAR;
1815 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1816 candidate[2] = INTRA_DC;
1818 candidate[2] = INTRA_ANGULAR_26;
1822 if (prev_intra_luma_pred_flag) {
1823 intra_pred_mode = candidate[lc->pu.mpm_idx];
1825 if (candidate[0] > candidate[1])
1826 FFSWAP(uint8_t, candidate[0], candidate[1]);
1827 if (candidate[0] > candidate[2])
1828 FFSWAP(uint8_t, candidate[0], candidate[2]);
1829 if (candidate[1] > candidate[2])
1830 FFSWAP(uint8_t, candidate[1], candidate[2]);
1832 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1833 for (i = 0; i < 3; i++)
1834 if (intra_pred_mode >= candidate[i])
1838 /* write the intra prediction units into the mv array */
1841 for (i = 0; i < size_in_pus; i++) {
1842 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1843 intra_pred_mode, size_in_pus);
1845 for (j = 0; j < size_in_pus; j++) {
1846 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1850 return intra_pred_mode;
1853 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1854 int log2_cb_size, int ct_depth)
1856 int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1857 int x_cb = x0 >> s->sps->log2_min_cb_size;
1858 int y_cb = y0 >> s->sps->log2_min_cb_size;
1861 for (y = 0; y < length; y++)
1862 memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1866 static const uint8_t tab_mode_idx[] = {
1867 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
1868 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1870 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1873 HEVCLocalContext *lc = s->HEVClc;
1874 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1875 uint8_t prev_intra_luma_pred_flag[4];
1876 int split = lc->cu.part_mode == PART_NxN;
1877 int pb_size = (1 << log2_cb_size) >> split;
1878 int side = split + 1;
1882 for (i = 0; i < side; i++)
1883 for (j = 0; j < side; j++)
1884 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1886 for (i = 0; i < side; i++) {
1887 for (j = 0; j < side; j++) {
1888 if (prev_intra_luma_pred_flag[2 * i + j])
1889 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1891 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1893 lc->pu.intra_pred_mode[2 * i + j] =
1894 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1895 prev_intra_luma_pred_flag[2 * i + j]);
1899 if (s->sps->chroma_format_idc == 3) {
1900 for (i = 0; i < side; i++) {
1901 for (j = 0; j < side; j++) {
1902 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1903 if (chroma_mode != 4) {
1904 if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1905 lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1907 lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1909 lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1913 } else if (s->sps->chroma_format_idc == 2) {
1915 lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1916 if (chroma_mode != 4) {
1917 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1920 mode_idx = intra_chroma_table[chroma_mode];
1922 mode_idx = lc->pu.intra_pred_mode[0];
1924 lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1925 } else if (s->sps->chroma_format_idc != 0) {
1926 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1927 if (chroma_mode != 4) {
1928 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1929 lc->pu.intra_pred_mode_c[0] = 34;
1931 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1933 lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1938 static void intra_prediction_unit_default_value(HEVCContext *s,
1942 HEVCLocalContext *lc = s->HEVClc;
1943 int pb_size = 1 << log2_cb_size;
1944 int size_in_pus = pb_size >> s->sps->log2_min_pu_size;
1945 int min_pu_width = s->sps->min_pu_width;
1946 MvField *tab_mvf = s->ref->tab_mvf;
1947 int x_pu = x0 >> s->sps->log2_min_pu_size;
1948 int y_pu = y0 >> s->sps->log2_min_pu_size;
1951 if (size_in_pus == 0)
1953 for (j = 0; j < size_in_pus; j++)
1954 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1955 if (lc->cu.pred_mode == MODE_INTRA)
1956 for (j = 0; j < size_in_pus; j++)
1957 for (k = 0; k < size_in_pus; k++)
1958 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1961 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1963 int cb_size = 1 << log2_cb_size;
1964 HEVCLocalContext *lc = s->HEVClc;
1965 int log2_min_cb_size = s->sps->log2_min_cb_size;
1966 int length = cb_size >> log2_min_cb_size;
1967 int min_cb_width = s->sps->min_cb_width;
1968 int x_cb = x0 >> log2_min_cb_size;
1969 int y_cb = y0 >> log2_min_cb_size;
1970 int idx = log2_cb_size - 2;
1971 int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1976 lc->cu.pred_mode = MODE_INTRA;
1977 lc->cu.part_mode = PART_2Nx2N;
1978 lc->cu.intra_split_flag = 0;
1980 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
1981 for (x = 0; x < 4; x++)
1982 lc->pu.intra_pred_mode[x] = 1;
1983 if (s->pps->transquant_bypass_enable_flag) {
1984 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
1985 if (lc->cu.cu_transquant_bypass_flag)
1986 set_deblocking_bypass(s, x0, y0, log2_cb_size);
1988 lc->cu.cu_transquant_bypass_flag = 0;
1990 if (s->sh.slice_type != I_SLICE) {
1991 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
1993 x = y_cb * min_cb_width + x_cb;
1994 for (y = 0; y < length; y++) {
1995 memset(&s->skip_flag[x], skip_flag, length);
1998 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2000 x = y_cb * min_cb_width + x_cb;
2001 for (y = 0; y < length; y++) {
2002 memset(&s->skip_flag[x], 0, length);
2007 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2008 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2009 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2011 if (!s->sh.disable_deblocking_filter_flag)
2012 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2016 if (s->sh.slice_type != I_SLICE)
2017 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2018 if (lc->cu.pred_mode != MODE_INTRA ||
2019 log2_cb_size == s->sps->log2_min_cb_size) {
2020 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2021 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2022 lc->cu.pred_mode == MODE_INTRA;
2025 if (lc->cu.pred_mode == MODE_INTRA) {
2026 if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2027 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2028 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2029 pcm_flag = ff_hevc_pcm_flag_decode(s);
2032 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2033 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2034 if (s->sps->pcm.loop_filter_disable_flag)
2035 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2040 intra_prediction_unit(s, x0, y0, log2_cb_size);
2043 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2044 switch (lc->cu.part_mode) {
2046 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2049 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
2050 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2053 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2054 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2057 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
2058 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2061 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2062 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
2065 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
2066 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2069 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2070 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
2073 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2074 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2075 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2076 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2082 int rqt_root_cbf = 1;
2084 if (lc->cu.pred_mode != MODE_INTRA &&
2085 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2086 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2089 const static int cbf[2] = { 0 };
2090 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2091 s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2092 s->sps->max_transform_hierarchy_depth_inter;
2093 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2095 log2_cb_size, 0, 0, cbf, cbf);
2099 if (!s->sh.disable_deblocking_filter_flag)
2100 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2105 if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2106 ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2108 x = y_cb * min_cb_width + x_cb;
2109 for (y = 0; y < length; y++) {
2110 memset(&s->qp_y_tab[x], lc->qp_y, length);
2114 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2115 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2116 lc->qPy_pred = lc->qp_y;
2119 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2124 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2125 int log2_cb_size, int cb_depth)
2127 HEVCLocalContext *lc = s->HEVClc;
2128 const int cb_size = 1 << log2_cb_size;
2132 lc->ct_depth = cb_depth;
2133 if (x0 + cb_size <= s->sps->width &&
2134 y0 + cb_size <= s->sps->height &&
2135 log2_cb_size > s->sps->log2_min_cb_size) {
2136 split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2138 split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2140 if (s->pps->cu_qp_delta_enabled_flag &&
2141 log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2142 lc->tu.is_cu_qp_delta_coded = 0;
2143 lc->tu.cu_qp_delta = 0;
2146 if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2147 log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2148 lc->tu.is_cu_chroma_qp_offset_coded = 0;
2152 int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2153 const int cb_size_split = cb_size >> 1;
2154 const int x1 = x0 + cb_size_split;
2155 const int y1 = y0 + cb_size_split;
2159 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2163 if (more_data && x1 < s->sps->width) {
2164 more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2168 if (more_data && y1 < s->sps->height) {
2169 more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2173 if (more_data && x1 < s->sps->width &&
2174 y1 < s->sps->height) {
2175 more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2180 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2181 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2182 lc->qPy_pred = lc->qp_y;
2185 return ((x1 + cb_size_split) < s->sps->width ||
2186 (y1 + cb_size_split) < s->sps->height);
2190 ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2193 if ((!((x0 + cb_size) %
2194 (1 << (s->sps->log2_ctb_size))) ||
2195 (x0 + cb_size >= s->sps->width)) &&
2197 (1 << (s->sps->log2_ctb_size))) ||
2198 (y0 + cb_size >= s->sps->height))) {
2199 int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2200 return !end_of_slice_flag;
2209 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2212 HEVCLocalContext *lc = s->HEVClc;
2213 int ctb_size = 1 << s->sps->log2_ctb_size;
2214 int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2215 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2217 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2219 if (s->pps->entropy_coding_sync_enabled_flag) {
2220 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2221 lc->first_qp_group = 1;
2222 lc->end_of_tiles_x = s->sps->width;
2223 } else if (s->pps->tiles_enabled_flag) {
2224 if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2225 int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2226 lc->end_of_tiles_x = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2227 lc->first_qp_group = 1;
2230 lc->end_of_tiles_x = s->sps->width;
2233 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2235 lc->boundary_flags = 0;
2236 if (s->pps->tiles_enabled_flag) {
2237 if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2238 lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2239 if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2240 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2241 if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2242 lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2243 if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2244 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2246 if (!ctb_addr_in_slice > 0)
2247 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2248 if (ctb_addr_in_slice < s->sps->ctb_width)
2249 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2252 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2253 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2254 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2255 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2258 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2260 HEVCContext *s = avctxt->priv_data;
2261 int ctb_size = 1 << s->sps->log2_ctb_size;
2265 int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2267 if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2268 av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2269 return AVERROR_INVALIDDATA;
2272 if (s->sh.dependent_slice_segment_flag) {
2273 int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2274 if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2275 av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2276 return AVERROR_INVALIDDATA;
2280 while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2281 int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2283 x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2284 y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2285 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2287 ff_hevc_cabac_init(s, ctb_addr_ts);
2289 hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2291 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2292 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2293 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2295 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2296 if (more_data < 0) {
2297 s->tab_slice_address[ctb_addr_rs] = -1;
2303 ff_hevc_save_states(s, ctb_addr_ts);
2304 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2307 if (x_ctb + ctb_size >= s->sps->width &&
2308 y_ctb + ctb_size >= s->sps->height)
2309 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2314 static int hls_slice_data(HEVCContext *s)
2322 s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2325 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2327 HEVCContext *s1 = avctxt->priv_data, *s;
2328 HEVCLocalContext *lc;
2329 int ctb_size = 1<< s1->sps->log2_ctb_size;
2331 int *ctb_row_p = input_ctb_row;
2332 int ctb_row = ctb_row_p[job];
2333 int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2334 int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2335 int thread = ctb_row % s1->threads_number;
2338 s = s1->sList[self_id];
2342 ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2346 ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2349 while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2350 int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2351 int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2353 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2355 ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2357 if (avpriv_atomic_int_get(&s1->wpp_err)){
2358 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2362 ff_hevc_cabac_init(s, ctb_addr_ts);
2363 hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2364 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2366 if (more_data < 0) {
2367 s->tab_slice_address[ctb_addr_rs] = -1;
2373 ff_hevc_save_states(s, ctb_addr_ts);
2374 ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2375 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2377 if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2378 avpriv_atomic_int_set(&s1->wpp_err, 1);
2379 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2383 if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2384 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2385 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2388 ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2391 if(x_ctb >= s->sps->width) {
2395 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2400 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2402 HEVCLocalContext *lc = s->HEVClc;
2403 int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2404 int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2406 int startheader, cmpt = 0;
2412 return AVERROR(ENOMEM);
2417 ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2420 for (i = 1; i < s->threads_number; i++) {
2421 s->sList[i] = av_malloc(sizeof(HEVCContext));
2422 memcpy(s->sList[i], s, sizeof(HEVCContext));
2423 s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2424 s->sList[i]->HEVClc = s->HEVClcList[i];
2428 offset = (lc->gb.index >> 3);
2430 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2431 if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2437 for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2438 offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2439 for (j = 0, cmpt = 0, startheader = offset
2440 + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2441 if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2446 s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2447 s->sh.offset[i - 1] = offset;
2450 if (s->sh.num_entry_point_offsets != 0) {
2451 offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2452 s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2453 s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2458 for (i = 1; i < s->threads_number; i++) {
2459 s->sList[i]->HEVClc->first_qp_group = 1;
2460 s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2461 memcpy(s->sList[i], s, sizeof(HEVCContext));
2462 s->sList[i]->HEVClc = s->HEVClcList[i];
2465 avpriv_atomic_int_set(&s->wpp_err, 0);
2466 ff_reset_entries(s->avctx);
2468 for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2473 if (s->pps->entropy_coding_sync_enabled_flag)
2474 s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2476 for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2484 * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2485 * 0 if the unit should be skipped, 1 otherwise
2487 static int hls_nal_unit(HEVCContext *s)
2489 GetBitContext *gb = &s->HEVClc->gb;
2492 if (get_bits1(gb) != 0)
2493 return AVERROR_INVALIDDATA;
2495 s->nal_unit_type = get_bits(gb, 6);
2497 nuh_layer_id = get_bits(gb, 6);
2498 s->temporal_id = get_bits(gb, 3) - 1;
2499 if (s->temporal_id < 0)
2500 return AVERROR_INVALIDDATA;
2502 av_log(s->avctx, AV_LOG_DEBUG,
2503 "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2504 s->nal_unit_type, nuh_layer_id, s->temporal_id);
2506 return nuh_layer_id == 0;
2509 static int set_side_data(HEVCContext *s)
2511 AVFrame *out = s->ref->frame;
2513 if (s->sei_frame_packing_present &&
2514 s->frame_packing_arrangement_type >= 3 &&
2515 s->frame_packing_arrangement_type <= 5 &&
2516 s->content_interpretation_type > 0 &&
2517 s->content_interpretation_type < 3) {
2518 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2520 return AVERROR(ENOMEM);
2522 switch (s->frame_packing_arrangement_type) {
2524 if (s->quincunx_subsampling)
2525 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2527 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2530 stereo->type = AV_STEREO3D_TOPBOTTOM;
2533 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2537 if (s->content_interpretation_type == 2)
2538 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2541 if (s->sei_display_orientation_present &&
2542 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2543 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2544 AVFrameSideData *rotation = av_frame_new_side_data(out,
2545 AV_FRAME_DATA_DISPLAYMATRIX,
2546 sizeof(int32_t) * 9);
2548 return AVERROR(ENOMEM);
2550 av_display_rotation_set((int32_t *)rotation->data, angle);
2551 av_display_matrix_flip((int32_t *)rotation->data,
2552 s->sei_hflip, s->sei_vflip);
2558 static int hevc_frame_start(HEVCContext *s)
2560 HEVCLocalContext *lc = s->HEVClc;
2561 int pic_size_in_ctb = ((s->sps->width >> s->sps->log2_min_cb_size) + 1) *
2562 ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2565 memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2566 memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
2567 memset(s->cbf_luma, 0, s->sps->min_tb_width * s->sps->min_tb_height);
2568 memset(s->is_pcm, 0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2569 memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2572 s->first_nal_type = s->nal_unit_type;
2574 if (s->pps->tiles_enabled_flag)
2575 lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2577 ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2581 ret = ff_hevc_frame_rps(s);
2583 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2587 s->ref->frame->key_frame = IS_IRAP(s);
2589 ret = set_side_data(s);
2593 s->frame->pict_type = 3 - s->sh.slice_type;
2596 ff_hevc_bump_frame(s);
2598 av_frame_unref(s->output_frame);
2599 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2603 ff_thread_finish_setup(s->avctx);
2609 ff_hevc_unref_frame(s, s->ref, ~0);
2614 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2616 HEVCLocalContext *lc = s->HEVClc;
2617 GetBitContext *gb = &lc->gb;
2618 int ctb_addr_ts, ret;
2620 ret = init_get_bits8(gb, nal->data, nal->size);
2624 ret = hls_nal_unit(s);
2626 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2632 switch (s->nal_unit_type) {
2634 ret = ff_hevc_decode_nal_vps(s);
2639 ret = ff_hevc_decode_nal_sps(s);
2644 ret = ff_hevc_decode_nal_pps(s);
2648 case NAL_SEI_PREFIX:
2649 case NAL_SEI_SUFFIX:
2650 ret = ff_hevc_decode_nal_sei(s);
2661 case NAL_BLA_W_RADL:
2663 case NAL_IDR_W_RADL:
2670 ret = hls_slice_header(s);
2674 if (s->max_ra == INT_MAX) {
2675 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2679 s->max_ra = INT_MIN;
2683 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2684 s->poc <= s->max_ra) {
2688 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2689 s->max_ra = INT_MIN;
2692 if (s->sh.first_slice_in_pic_flag) {
2693 ret = hevc_frame_start(s);
2696 } else if (!s->ref) {
2697 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2701 if (s->nal_unit_type != s->first_nal_type) {
2702 av_log(s->avctx, AV_LOG_ERROR,
2703 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2704 s->first_nal_type, s->nal_unit_type);
2705 return AVERROR_INVALIDDATA;
2708 if (!s->sh.dependent_slice_segment_flag &&
2709 s->sh.slice_type != I_SLICE) {
2710 ret = ff_hevc_slice_rpl(s);
2712 av_log(s->avctx, AV_LOG_WARNING,
2713 "Error constructing the reference lists for the current slice.\n");
2718 if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2719 ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2724 if (s->avctx->hwaccel) {
2725 ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2729 if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2730 ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
2732 ctb_addr_ts = hls_slice_data(s);
2733 if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2737 if (ctb_addr_ts < 0) {
2745 s->seq_decode = (s->seq_decode + 1) & 0xff;
2746 s->max_ra = INT_MAX;
2752 av_log(s->avctx, AV_LOG_INFO,
2753 "Skipping NAL unit %d\n", s->nal_unit_type);
2758 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2763 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2764 * between these functions would be nice. */
2765 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2771 s->skipped_bytes = 0;
2772 #define STARTCODE_TEST \
2773 if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
2774 if (src[i + 2] != 3) { \
2775 /* startcode, so we must be past the end */ \
2780 #if HAVE_FAST_UNALIGNED
2781 #define FIND_FIRST_ZERO \
2782 if (i > 0 && !src[i]) \
2787 for (i = 0; i + 1 < length; i += 9) {
2788 if (!((~AV_RN64A(src + i) &
2789 (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2790 0x8000800080008080ULL))
2797 for (i = 0; i + 1 < length; i += 5) {
2798 if (!((~AV_RN32A(src + i) &
2799 (AV_RN32A(src + i) - 0x01000101U)) &
2806 #endif /* HAVE_FAST_64BIT */
2808 for (i = 0; i + 1 < length; i += 2) {
2811 if (i > 0 && src[i - 1] == 0)
2815 #endif /* HAVE_FAST_UNALIGNED */
2817 if (i >= length - 1) { // no escaped 0
2819 nal->raw_data = src;
2821 nal->raw_size = length;
2825 av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2826 length + FF_INPUT_BUFFER_PADDING_SIZE);
2827 if (!nal->rbsp_buffer)
2828 return AVERROR(ENOMEM);
2830 dst = nal->rbsp_buffer;
2832 memcpy(dst, src, i);
2834 while (si + 2 < length) {
2835 // remove escapes (very rare 1:2^22)
2836 if (src[si + 2] > 3) {
2837 dst[di++] = src[si++];
2838 dst[di++] = src[si++];
2839 } else if (src[si] == 0 && src[si + 1] == 0) {
2840 if (src[si + 2] == 3) { // escape
2846 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2847 s->skipped_bytes_pos_size *= 2;
2848 av_reallocp_array(&s->skipped_bytes_pos,
2849 s->skipped_bytes_pos_size,
2850 sizeof(*s->skipped_bytes_pos));
2851 if (!s->skipped_bytes_pos)
2852 return AVERROR(ENOMEM);
2854 if (s->skipped_bytes_pos)
2855 s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2857 } else // next start code
2861 dst[di++] = src[si++];
2864 dst[di++] = src[si++];
2867 memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2871 nal->raw_data = src;
2876 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2878 int i, consumed, ret = 0;
2881 s->last_eos = s->eos;
2884 /* split the input packet into NAL units, so we know the upper bound on the
2885 * number of slices in the frame */
2887 while (length >= 4) {
2889 int extract_length = 0;
2893 for (i = 0; i < s->nal_length_size; i++)
2894 extract_length = (extract_length << 8) | buf[i];
2895 buf += s->nal_length_size;
2896 length -= s->nal_length_size;
2898 if (extract_length > length) {
2899 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2900 ret = AVERROR_INVALIDDATA;
2904 /* search start code */
2905 while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2909 av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2910 ret = AVERROR_INVALIDDATA;
2920 extract_length = length;
2922 if (s->nals_allocated < s->nb_nals + 1) {
2923 int new_size = s->nals_allocated + 1;
2924 void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
2925 ret = AVERROR(ENOMEM);
2930 memset(s->nals + s->nals_allocated, 0,
2931 (new_size - s->nals_allocated) * sizeof(*s->nals));
2933 tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2936 s->skipped_bytes_nal = tmp;
2938 tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2941 s->skipped_bytes_pos_size_nal = tmp;
2943 tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2946 s->skipped_bytes_pos_nal = tmp;
2948 s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2949 s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2950 if (!s->skipped_bytes_pos_nal[s->nals_allocated])
2952 s->nals_allocated = new_size;
2954 s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2955 s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2956 nal = &s->nals[s->nb_nals];
2958 consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2960 s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2961 s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2962 s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2970 ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2975 if (s->nal_unit_type == NAL_EOB_NUT ||
2976 s->nal_unit_type == NAL_EOS_NUT)
2983 /* parse the NAL units */
2984 for (i = 0; i < s->nb_nals; i++) {
2986 s->skipped_bytes = s->skipped_bytes_nal[i];
2987 s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2989 ret = decode_nal_unit(s, &s->nals[i]);
2991 av_log(s->avctx, AV_LOG_WARNING,
2992 "Error parsing NAL unit #%d.\n", i);
2998 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2999 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3004 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3007 for (i = 0; i < 16; i++)
3008 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3011 static int verify_md5(HEVCContext *s, AVFrame *frame)
3013 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3018 return AVERROR(EINVAL);
3020 pixel_shift = desc->comp[0].depth_minus1 > 7;
3022 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3025 /* the checksums are LE, so we have to byteswap for >8bpp formats
3028 if (pixel_shift && !s->checksum_buf) {
3029 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3030 FFMAX3(frame->linesize[0], frame->linesize[1],
3031 frame->linesize[2]));
3032 if (!s->checksum_buf)
3033 return AVERROR(ENOMEM);
3037 for (i = 0; frame->data[i]; i++) {
3038 int width = s->avctx->coded_width;
3039 int height = s->avctx->coded_height;
3040 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
3041 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3044 av_md5_init(s->md5_ctx);
3045 for (j = 0; j < h; j++) {
3046 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3049 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3050 (const uint16_t *) src, w);
3051 src = s->checksum_buf;
3054 av_md5_update(s->md5_ctx, src, w << pixel_shift);
3056 av_md5_final(s->md5_ctx, md5);
3058 if (!memcmp(md5, s->md5[i], 16)) {
3059 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3060 print_md5(s->avctx, AV_LOG_DEBUG, md5);
3061 av_log (s->avctx, AV_LOG_DEBUG, "; ");
3063 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3064 print_md5(s->avctx, AV_LOG_ERROR, md5);
3065 av_log (s->avctx, AV_LOG_ERROR, " != ");
3066 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3067 av_log (s->avctx, AV_LOG_ERROR, "\n");
3068 return AVERROR_INVALIDDATA;
3072 av_log(s->avctx, AV_LOG_DEBUG, "\n");
3077 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3081 HEVCContext *s = avctx->priv_data;
3084 ret = ff_hevc_output_frame(s, data, 1);
3093 ret = decode_nal_units(s, avpkt->data, avpkt->size);
3097 if (avctx->hwaccel) {
3098 if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
3099 av_log(avctx, AV_LOG_ERROR,
3100 "hardware accelerator failed to decode picture\n");
3102 /* verify the SEI checksum */
3103 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3105 ret = verify_md5(s, s->ref->frame);
3106 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3107 ff_hevc_unref_frame(s, s->ref, ~0);
3114 if (s->is_decoded) {
3115 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3119 if (s->output_frame->buf[0]) {
3120 av_frame_move_ref(data, s->output_frame);
3127 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3131 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3135 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3136 if (!dst->tab_mvf_buf)
3138 dst->tab_mvf = src->tab_mvf;
3140 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3141 if (!dst->rpl_tab_buf)
3143 dst->rpl_tab = src->rpl_tab;
3145 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3149 dst->poc = src->poc;
3150 dst->ctb_count = src->ctb_count;
3151 dst->window = src->window;
3152 dst->flags = src->flags;
3153 dst->sequence = src->sequence;
3155 if (src->hwaccel_picture_private) {
3156 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3157 if (!dst->hwaccel_priv_buf)
3159 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3164 ff_hevc_unref_frame(s, dst, ~0);
3165 return AVERROR(ENOMEM);
3168 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3170 HEVCContext *s = avctx->priv_data;
3175 av_freep(&s->md5_ctx);
3177 for(i=0; i < s->nals_allocated; i++) {
3178 av_freep(&s->skipped_bytes_pos_nal[i]);
3180 av_freep(&s->skipped_bytes_pos_size_nal);
3181 av_freep(&s->skipped_bytes_nal);
3182 av_freep(&s->skipped_bytes_pos_nal);
3184 av_freep(&s->cabac_state);
3186 for (i = 0; i < 3; i++) {
3187 av_freep(&s->sao_pixel_buffer_h[i]);
3188 av_freep(&s->sao_pixel_buffer_v[i]);
3190 av_frame_free(&s->output_frame);
3192 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3193 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3194 av_frame_free(&s->DPB[i].frame);
3197 for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3198 av_buffer_unref(&s->vps_list[i]);
3199 for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3200 av_buffer_unref(&s->sps_list[i]);
3201 for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3202 av_buffer_unref(&s->pps_list[i]);
3207 av_buffer_unref(&s->current_sps);
3209 av_freep(&s->sh.entry_point_offset);
3210 av_freep(&s->sh.offset);
3211 av_freep(&s->sh.size);
3213 for (i = 1; i < s->threads_number; i++) {
3214 HEVCLocalContext *lc = s->HEVClcList[i];
3216 av_freep(&s->HEVClcList[i]);
3217 av_freep(&s->sList[i]);
3220 if (s->HEVClc == s->HEVClcList[0])
3222 av_freep(&s->HEVClcList[0]);
3224 for (i = 0; i < s->nals_allocated; i++)
3225 av_freep(&s->nals[i].rbsp_buffer);
3227 s->nals_allocated = 0;
3232 static av_cold int hevc_init_context(AVCodecContext *avctx)
3234 HEVCContext *s = avctx->priv_data;
3239 s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3242 s->HEVClcList[0] = s->HEVClc;
3245 s->cabac_state = av_malloc(HEVC_CONTEXTS);
3246 if (!s->cabac_state)
3249 s->output_frame = av_frame_alloc();
3250 if (!s->output_frame)
3253 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3254 s->DPB[i].frame = av_frame_alloc();
3255 if (!s->DPB[i].frame)
3257 s->DPB[i].tf.f = s->DPB[i].frame;
3260 s->max_ra = INT_MAX;
3262 s->md5_ctx = av_md5_alloc();
3266 ff_bswapdsp_init(&s->bdsp);
3268 s->context_initialized = 1;
3274 hevc_decode_free(avctx);
3275 return AVERROR(ENOMEM);
3278 static int hevc_update_thread_context(AVCodecContext *dst,
3279 const AVCodecContext *src)
3281 HEVCContext *s = dst->priv_data;
3282 HEVCContext *s0 = src->priv_data;
3285 if (!s->context_initialized) {
3286 ret = hevc_init_context(dst);
3291 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3292 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3293 if (s0->DPB[i].frame->buf[0]) {
3294 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3300 if (s->sps != s0->sps)
3302 for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3303 av_buffer_unref(&s->vps_list[i]);
3304 if (s0->vps_list[i]) {
3305 s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3306 if (!s->vps_list[i])
3307 return AVERROR(ENOMEM);
3311 for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3312 av_buffer_unref(&s->sps_list[i]);
3313 if (s0->sps_list[i]) {
3314 s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3315 if (!s->sps_list[i])
3316 return AVERROR(ENOMEM);
3320 for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3321 av_buffer_unref(&s->pps_list[i]);
3322 if (s0->pps_list[i]) {
3323 s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3324 if (!s->pps_list[i])
3325 return AVERROR(ENOMEM);
3329 av_buffer_unref(&s->current_sps);
3330 if (s0->current_sps) {
3331 s->current_sps = av_buffer_ref(s0->current_sps);
3332 if (!s->current_sps)
3333 return AVERROR(ENOMEM);
3336 if (s->sps != s0->sps)
3337 if ((ret = set_sps(s, s0->sps)) < 0)
3340 s->seq_decode = s0->seq_decode;
3341 s->seq_output = s0->seq_output;
3342 s->pocTid0 = s0->pocTid0;
3343 s->max_ra = s0->max_ra;
3346 s->is_nalff = s0->is_nalff;
3347 s->nal_length_size = s0->nal_length_size;
3349 s->threads_number = s0->threads_number;
3350 s->threads_type = s0->threads_type;
3353 s->seq_decode = (s->seq_decode + 1) & 0xff;
3354 s->max_ra = INT_MAX;
3360 static int hevc_decode_extradata(HEVCContext *s)
3362 AVCodecContext *avctx = s->avctx;
3366 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3368 if (avctx->extradata_size > 3 &&
3369 (avctx->extradata[0] || avctx->extradata[1] ||
3370 avctx->extradata[2] > 1)) {
3371 /* It seems the extradata is encoded as hvcC format.
3372 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3373 * is finalized. When finalized, configurationVersion will be 1 and we
3374 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3375 int i, j, num_arrays, nal_len_size;
3379 bytestream2_skip(&gb, 21);
3380 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3381 num_arrays = bytestream2_get_byte(&gb);
3383 /* nal units in the hvcC always have length coded with 2 bytes,
3384 * so put a fake nal_length_size = 2 while parsing them */
3385 s->nal_length_size = 2;
3387 /* Decode nal units from hvcC. */
3388 for (i = 0; i < num_arrays; i++) {
3389 int type = bytestream2_get_byte(&gb) & 0x3f;
3390 int cnt = bytestream2_get_be16(&gb);
3392 for (j = 0; j < cnt; j++) {
3393 // +2 for the nal size field
3394 int nalsize = bytestream2_peek_be16(&gb) + 2;
3395 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3396 av_log(s->avctx, AV_LOG_ERROR,
3397 "Invalid NAL unit size in extradata.\n");
3398 return AVERROR_INVALIDDATA;
3401 ret = decode_nal_units(s, gb.buffer, nalsize);
3403 av_log(avctx, AV_LOG_ERROR,
3404 "Decoding nal unit %d %d from hvcC failed\n",
3408 bytestream2_skip(&gb, nalsize);
3412 /* Now store right nal length size, that will be used to parse
3414 s->nal_length_size = nal_len_size;
3417 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3424 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3426 HEVCContext *s = avctx->priv_data;
3429 ff_init_cabac_states();
3431 avctx->internal->allocate_progress = 1;
3433 ret = hevc_init_context(avctx);
3437 s->enable_parallel_tiles = 0;
3438 s->picture_struct = 0;
3440 if(avctx->active_thread_type & FF_THREAD_SLICE)
3441 s->threads_number = avctx->thread_count;
3443 s->threads_number = 1;
3445 if (avctx->extradata_size > 0 && avctx->extradata) {
3446 ret = hevc_decode_extradata(s);
3448 hevc_decode_free(avctx);
3453 if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3454 s->threads_type = FF_THREAD_FRAME;
3456 s->threads_type = FF_THREAD_SLICE;
3461 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3463 HEVCContext *s = avctx->priv_data;
3466 memset(s, 0, sizeof(*s));
3468 ret = hevc_init_context(avctx);
3475 static void hevc_decode_flush(AVCodecContext *avctx)
3477 HEVCContext *s = avctx->priv_data;
3478 ff_hevc_flush_dpb(s);
3479 s->max_ra = INT_MAX;
3482 #define OFFSET(x) offsetof(HEVCContext, x)
3483 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3485 static const AVProfile profiles[] = {
3486 { FF_PROFILE_HEVC_MAIN, "Main" },
3487 { FF_PROFILE_HEVC_MAIN_10, "Main 10" },
3488 { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" },
3489 { FF_PROFILE_HEVC_REXT, "Rext" },
3490 { FF_PROFILE_UNKNOWN },
3493 static const AVOption options[] = {
3494 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3495 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3496 { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3497 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3501 static const AVClass hevc_decoder_class = {
3502 .class_name = "HEVC decoder",
3503 .item_name = av_default_item_name,
3505 .version = LIBAVUTIL_VERSION_INT,
3508 AVCodec ff_hevc_decoder = {
3510 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3511 .type = AVMEDIA_TYPE_VIDEO,
3512 .id = AV_CODEC_ID_HEVC,
3513 .priv_data_size = sizeof(HEVCContext),
3514 .priv_class = &hevc_decoder_class,
3515 .init = hevc_decode_init,
3516 .close = hevc_decode_free,
3517 .decode = hevc_decode_frame,
3518 .flush = hevc_decode_flush,
3519 .update_thread_context = hevc_update_thread_context,
3520 .init_thread_copy = hevc_init_thread_copy,
3521 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3522 CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3523 .profiles = NULL_IF_CONFIG_SMALL(profiles),