4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/display.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
45 * NOTE: Each function hls_foo correspond to the function foo in the
46 * specification (HLS stands for High Level Syntax).
53 /* free everything allocated by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
57 av_freep(&s->deblock);
59 av_freep(&s->skip_flag);
60 av_freep(&s->tab_ct_depth);
62 av_freep(&s->tab_ipm);
63 av_freep(&s->cbf_luma);
66 av_freep(&s->qp_y_tab);
67 av_freep(&s->tab_slice_address);
68 av_freep(&s->filter_slice_edges);
70 av_freep(&s->horizontal_bs);
71 av_freep(&s->vertical_bs);
73 av_freep(&s->sh.entry_point_offset);
74 av_freep(&s->sh.size);
75 av_freep(&s->sh.offset);
77 av_buffer_pool_uninit(&s->tab_mvf_pool);
78 av_buffer_pool_uninit(&s->rpl_tab_pool);
81 /* allocate arrays that depend on frame dimensions */
82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 int log2_min_cb_size = sps->log2_min_cb_size;
85 int width = sps->width;
86 int height = sps->height;
87 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
88 ((height >> log2_min_cb_size) + 1);
89 int ctb_count = sps->ctb_width * sps->ctb_height;
90 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
92 s->bs_width = (width >> 2) + 1;
93 s->bs_height = (height >> 2) + 1;
95 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
96 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
97 if (!s->sao || !s->deblock)
100 s->skip_flag = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
101 s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
102 if (!s->skip_flag || !s->tab_ct_depth)
105 s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
106 s->tab_ipm = av_mallocz(min_pu_size);
107 s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
108 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
111 s->filter_slice_edges = av_mallocz(ctb_count);
112 s->tab_slice_address = av_malloc_array(pic_size_in_ctb,
113 sizeof(*s->tab_slice_address));
114 s->qp_y_tab = av_malloc_array(pic_size_in_ctb,
115 sizeof(*s->qp_y_tab));
116 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
119 s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
120 s->vertical_bs = av_mallocz_array(s->bs_width, s->bs_height);
121 if (!s->horizontal_bs || !s->vertical_bs)
124 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
126 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
128 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135 return AVERROR(ENOMEM);
138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 uint8_t luma_weight_l0_flag[16];
143 uint8_t chroma_weight_l0_flag[16];
144 uint8_t luma_weight_l1_flag[16];
145 uint8_t chroma_weight_l1_flag[16];
146 int luma_log2_weight_denom;
148 luma_log2_weight_denom = get_ue_golomb_long(gb);
149 if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
150 av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
151 s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
152 if (s->ps.sps->chroma_format_idc != 0) {
153 int delta = get_se_golomb(gb);
154 s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
157 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
158 luma_weight_l0_flag[i] = get_bits1(gb);
159 if (!luma_weight_l0_flag[i]) {
160 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
161 s->sh.luma_offset_l0[i] = 0;
164 if (s->ps.sps->chroma_format_idc != 0) {
165 for (i = 0; i < s->sh.nb_refs[L0]; i++)
166 chroma_weight_l0_flag[i] = get_bits1(gb);
168 for (i = 0; i < s->sh.nb_refs[L0]; i++)
169 chroma_weight_l0_flag[i] = 0;
171 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
172 if (luma_weight_l0_flag[i]) {
173 int delta_luma_weight_l0 = get_se_golomb(gb);
174 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
175 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
177 if (chroma_weight_l0_flag[i]) {
178 for (j = 0; j < 2; j++) {
179 int delta_chroma_weight_l0 = get_se_golomb(gb);
180 int delta_chroma_offset_l0 = get_se_golomb(gb);
181 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
182 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
183 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
186 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
187 s->sh.chroma_offset_l0[i][0] = 0;
188 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
189 s->sh.chroma_offset_l0[i][1] = 0;
192 if (s->sh.slice_type == B_SLICE) {
193 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
194 luma_weight_l1_flag[i] = get_bits1(gb);
195 if (!luma_weight_l1_flag[i]) {
196 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
197 s->sh.luma_offset_l1[i] = 0;
200 if (s->ps.sps->chroma_format_idc != 0) {
201 for (i = 0; i < s->sh.nb_refs[L1]; i++)
202 chroma_weight_l1_flag[i] = get_bits1(gb);
204 for (i = 0; i < s->sh.nb_refs[L1]; i++)
205 chroma_weight_l1_flag[i] = 0;
207 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
208 if (luma_weight_l1_flag[i]) {
209 int delta_luma_weight_l1 = get_se_golomb(gb);
210 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
211 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
213 if (chroma_weight_l1_flag[i]) {
214 for (j = 0; j < 2; j++) {
215 int delta_chroma_weight_l1 = get_se_golomb(gb);
216 int delta_chroma_offset_l1 = get_se_golomb(gb);
217 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
218 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
219 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
222 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
223 s->sh.chroma_offset_l1[i][0] = 0;
224 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
225 s->sh.chroma_offset_l1[i][1] = 0;
231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
233 const HEVCSPS *sps = s->ps.sps;
234 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
235 int prev_delta_msb = 0;
236 unsigned int nb_sps = 0, nb_sh;
240 if (!sps->long_term_ref_pics_present_flag)
243 if (sps->num_long_term_ref_pics_sps > 0)
244 nb_sps = get_ue_golomb_long(gb);
245 nb_sh = get_ue_golomb_long(gb);
247 if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
248 return AVERROR_INVALIDDATA;
250 rps->nb_refs = nb_sh + nb_sps;
252 for (i = 0; i < rps->nb_refs; i++) {
253 uint8_t delta_poc_msb_present;
256 uint8_t lt_idx_sps = 0;
258 if (sps->num_long_term_ref_pics_sps > 1)
259 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
261 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
262 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
264 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
265 rps->used[i] = get_bits1(gb);
268 delta_poc_msb_present = get_bits1(gb);
269 if (delta_poc_msb_present) {
270 int delta = get_ue_golomb_long(gb);
272 if (i && i != nb_sps)
273 delta += prev_delta_msb;
275 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
276 prev_delta_msb = delta;
283 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
286 const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
287 unsigned int num = 0, den = 0;
289 avctx->pix_fmt = sps->pix_fmt;
290 avctx->coded_width = sps->width;
291 avctx->coded_height = sps->height;
292 avctx->width = sps->output_width;
293 avctx->height = sps->output_height;
294 avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
295 avctx->profile = sps->ptl.general_ptl.profile_idc;
296 avctx->level = sps->ptl.general_ptl.level_idc;
298 ff_set_sar(avctx, sps->vui.sar);
300 if (sps->vui.video_signal_type_present_flag)
301 avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
304 avctx->color_range = AVCOL_RANGE_MPEG;
306 if (sps->vui.colour_description_present_flag) {
307 avctx->color_primaries = sps->vui.colour_primaries;
308 avctx->color_trc = sps->vui.transfer_characteristic;
309 avctx->colorspace = sps->vui.matrix_coeffs;
311 avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
312 avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
313 avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
316 if (vps->vps_timing_info_present_flag) {
317 num = vps->vps_num_units_in_tick;
318 den = vps->vps_time_scale;
319 } else if (sps->vui.vui_timing_info_present_flag) {
320 num = sps->vui.vui_num_units_in_tick;
321 den = sps->vui.vui_time_scale;
324 if (num != 0 && den != 0)
325 av_reduce(&avctx->framerate.den, &avctx->framerate.num,
329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
331 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
332 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
342 ret = pic_arrays_init(s, sps);
346 export_stream_params(s->avctx, &s->ps, sps);
348 if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
349 #if CONFIG_HEVC_DXVA2_HWACCEL
350 *fmt++ = AV_PIX_FMT_DXVA2_VLD;
352 #if CONFIG_HEVC_D3D11VA_HWACCEL
353 *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
355 #if CONFIG_HEVC_VAAPI_HWACCEL
356 *fmt++ = AV_PIX_FMT_VAAPI;
358 #if CONFIG_HEVC_VDPAU_HWACCEL
359 *fmt++ = AV_PIX_FMT_VDPAU;
363 if (pix_fmt == AV_PIX_FMT_NONE) {
364 *fmt++ = sps->pix_fmt;
365 *fmt = AV_PIX_FMT_NONE;
367 ret = ff_thread_get_format(s->avctx, pix_fmts);
370 s->avctx->pix_fmt = ret;
373 s->avctx->pix_fmt = pix_fmt;
376 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
377 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
378 ff_videodsp_init (&s->vdsp, sps->bit_depth);
380 for (i = 0; i < 3; i++) {
381 av_freep(&s->sao_pixel_buffer_h[i]);
382 av_freep(&s->sao_pixel_buffer_v[i]);
385 if (sps->sao_enabled && !s->avctx->hwaccel) {
386 int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
389 for(c_idx = 0; c_idx < c_count; c_idx++) {
390 int w = sps->width >> sps->hshift[c_idx];
391 int h = sps->height >> sps->vshift[c_idx];
392 s->sao_pixel_buffer_h[c_idx] =
393 av_malloc((w * 2 * sps->ctb_height) <<
395 s->sao_pixel_buffer_v[c_idx] =
396 av_malloc((h * 2 * sps->ctb_width) <<
402 s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
412 static int hls_slice_header(HEVCContext *s)
414 GetBitContext *gb = &s->HEVClc->gb;
415 SliceHeader *sh = &s->sh;
419 sh->first_slice_in_pic_flag = get_bits1(gb);
420 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
421 s->seq_decode = (s->seq_decode + 1) & 0xff;
424 ff_hevc_clear_refs(s);
426 sh->no_output_of_prior_pics_flag = 0;
428 sh->no_output_of_prior_pics_flag = get_bits1(gb);
430 sh->pps_id = get_ue_golomb_long(gb);
431 if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
432 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
433 return AVERROR_INVALIDDATA;
435 if (!sh->first_slice_in_pic_flag &&
436 s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
437 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
438 return AVERROR_INVALIDDATA;
440 s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
441 if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
442 sh->no_output_of_prior_pics_flag = 1;
444 if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
445 const HEVCSPS* last_sps = s->ps.sps;
446 s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
447 if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
448 if (s->ps.sps->width != last_sps->width || s->ps.sps->height != last_sps->height ||
449 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
450 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
451 sh->no_output_of_prior_pics_flag = 0;
453 ff_hevc_clear_refs(s);
454 ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
458 s->seq_decode = (s->seq_decode + 1) & 0xff;
462 sh->dependent_slice_segment_flag = 0;
463 if (!sh->first_slice_in_pic_flag) {
464 int slice_address_length;
466 if (s->ps.pps->dependent_slice_segments_enabled_flag)
467 sh->dependent_slice_segment_flag = get_bits1(gb);
469 slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
470 s->ps.sps->ctb_height);
471 sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
472 if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
473 av_log(s->avctx, AV_LOG_ERROR,
474 "Invalid slice segment address: %u.\n",
475 sh->slice_segment_addr);
476 return AVERROR_INVALIDDATA;
479 if (!sh->dependent_slice_segment_flag) {
480 sh->slice_addr = sh->slice_segment_addr;
484 sh->slice_segment_addr = sh->slice_addr = 0;
486 s->slice_initialized = 0;
489 if (!sh->dependent_slice_segment_flag) {
490 s->slice_initialized = 0;
492 for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
493 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
495 sh->slice_type = get_ue_golomb_long(gb);
496 if (!(sh->slice_type == I_SLICE ||
497 sh->slice_type == P_SLICE ||
498 sh->slice_type == B_SLICE)) {
499 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
501 return AVERROR_INVALIDDATA;
503 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
504 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
505 return AVERROR_INVALIDDATA;
508 // when flag is not present, picture is inferred to be output
509 sh->pic_output_flag = 1;
510 if (s->ps.pps->output_flag_present_flag)
511 sh->pic_output_flag = get_bits1(gb);
513 if (s->ps.sps->separate_colour_plane_flag)
514 sh->colour_plane_id = get_bits(gb, 2);
519 sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
520 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
521 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
522 av_log(s->avctx, AV_LOG_WARNING,
523 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
524 if (s->avctx->err_recognition & AV_EF_EXPLODE)
525 return AVERROR_INVALIDDATA;
530 sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
531 pos = get_bits_left(gb);
532 if (!sh->short_term_ref_pic_set_sps_flag) {
533 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
537 sh->short_term_rps = &sh->slice_rps;
539 int numbits, rps_idx;
541 if (!s->ps.sps->nb_st_rps) {
542 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
543 return AVERROR_INVALIDDATA;
546 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
547 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
548 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
550 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
552 pos = get_bits_left(gb);
553 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
555 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
556 if (s->avctx->err_recognition & AV_EF_EXPLODE)
557 return AVERROR_INVALIDDATA;
559 sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
561 if (s->ps.sps->sps_temporal_mvp_enabled_flag)
562 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
564 sh->slice_temporal_mvp_enabled_flag = 0;
566 s->sh.short_term_rps = NULL;
571 if (s->temporal_id == 0 &&
572 s->nal_unit_type != NAL_TRAIL_N &&
573 s->nal_unit_type != NAL_TSA_N &&
574 s->nal_unit_type != NAL_STSA_N &&
575 s->nal_unit_type != NAL_RADL_N &&
576 s->nal_unit_type != NAL_RADL_R &&
577 s->nal_unit_type != NAL_RASL_N &&
578 s->nal_unit_type != NAL_RASL_R)
581 if (s->ps.sps->sao_enabled) {
582 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
583 if (s->ps.sps->chroma_format_idc) {
584 sh->slice_sample_adaptive_offset_flag[1] =
585 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
588 sh->slice_sample_adaptive_offset_flag[0] = 0;
589 sh->slice_sample_adaptive_offset_flag[1] = 0;
590 sh->slice_sample_adaptive_offset_flag[2] = 0;
593 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
594 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
597 sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
598 if (sh->slice_type == B_SLICE)
599 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
601 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
602 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
603 if (sh->slice_type == B_SLICE)
604 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
606 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
607 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
608 sh->nb_refs[L0], sh->nb_refs[L1]);
609 return AVERROR_INVALIDDATA;
612 sh->rpl_modification_flag[0] = 0;
613 sh->rpl_modification_flag[1] = 0;
614 nb_refs = ff_hevc_frame_nb_refs(s);
616 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
617 return AVERROR_INVALIDDATA;
620 if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
621 sh->rpl_modification_flag[0] = get_bits1(gb);
622 if (sh->rpl_modification_flag[0]) {
623 for (i = 0; i < sh->nb_refs[L0]; i++)
624 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
627 if (sh->slice_type == B_SLICE) {
628 sh->rpl_modification_flag[1] = get_bits1(gb);
629 if (sh->rpl_modification_flag[1] == 1)
630 for (i = 0; i < sh->nb_refs[L1]; i++)
631 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
635 if (sh->slice_type == B_SLICE)
636 sh->mvd_l1_zero_flag = get_bits1(gb);
638 if (s->ps.pps->cabac_init_present_flag)
639 sh->cabac_init_flag = get_bits1(gb);
641 sh->cabac_init_flag = 0;
643 sh->collocated_ref_idx = 0;
644 if (sh->slice_temporal_mvp_enabled_flag) {
645 sh->collocated_list = L0;
646 if (sh->slice_type == B_SLICE)
647 sh->collocated_list = !get_bits1(gb);
649 if (sh->nb_refs[sh->collocated_list] > 1) {
650 sh->collocated_ref_idx = get_ue_golomb_long(gb);
651 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
652 av_log(s->avctx, AV_LOG_ERROR,
653 "Invalid collocated_ref_idx: %d.\n",
654 sh->collocated_ref_idx);
655 return AVERROR_INVALIDDATA;
660 if ((s->ps.pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
661 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
662 pred_weight_table(s, gb);
665 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
666 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
667 av_log(s->avctx, AV_LOG_ERROR,
668 "Invalid number of merging MVP candidates: %d.\n",
669 sh->max_num_merge_cand);
670 return AVERROR_INVALIDDATA;
674 sh->slice_qp_delta = get_se_golomb(gb);
676 if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
677 sh->slice_cb_qp_offset = get_se_golomb(gb);
678 sh->slice_cr_qp_offset = get_se_golomb(gb);
680 sh->slice_cb_qp_offset = 0;
681 sh->slice_cr_qp_offset = 0;
684 if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
685 sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
687 sh->cu_chroma_qp_offset_enabled_flag = 0;
689 if (s->ps.pps->deblocking_filter_control_present_flag) {
690 int deblocking_filter_override_flag = 0;
692 if (s->ps.pps->deblocking_filter_override_enabled_flag)
693 deblocking_filter_override_flag = get_bits1(gb);
695 if (deblocking_filter_override_flag) {
696 sh->disable_deblocking_filter_flag = get_bits1(gb);
697 if (!sh->disable_deblocking_filter_flag) {
698 sh->beta_offset = get_se_golomb(gb) * 2;
699 sh->tc_offset = get_se_golomb(gb) * 2;
702 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
703 sh->beta_offset = s->ps.pps->beta_offset;
704 sh->tc_offset = s->ps.pps->tc_offset;
707 sh->disable_deblocking_filter_flag = 0;
712 if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
713 (sh->slice_sample_adaptive_offset_flag[0] ||
714 sh->slice_sample_adaptive_offset_flag[1] ||
715 !sh->disable_deblocking_filter_flag)) {
716 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
718 sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
720 } else if (!s->slice_initialized) {
721 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
722 return AVERROR_INVALIDDATA;
725 sh->num_entry_point_offsets = 0;
726 if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
727 unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
728 // It would be possible to bound this tighter but this here is simpler
729 if (num_entry_point_offsets > get_bits_left(gb)) {
730 av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
731 return AVERROR_INVALIDDATA;
734 sh->num_entry_point_offsets = num_entry_point_offsets;
735 if (sh->num_entry_point_offsets > 0) {
736 int offset_len = get_ue_golomb_long(gb) + 1;
738 if (offset_len < 1 || offset_len > 32) {
739 sh->num_entry_point_offsets = 0;
740 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
741 return AVERROR_INVALIDDATA;
744 av_freep(&sh->entry_point_offset);
745 av_freep(&sh->offset);
747 sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
748 sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
749 sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
750 if (!sh->entry_point_offset || !sh->offset || !sh->size) {
751 sh->num_entry_point_offsets = 0;
752 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
753 return AVERROR(ENOMEM);
755 for (i = 0; i < sh->num_entry_point_offsets; i++) {
756 unsigned val = get_bits_long(gb, offset_len);
757 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
759 if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
760 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
761 s->threads_number = 1;
763 s->enable_parallel_tiles = 0;
765 s->enable_parallel_tiles = 0;
768 if (s->ps.pps->slice_header_extension_present_flag) {
769 unsigned int length = get_ue_golomb_long(gb);
770 if (length*8LL > get_bits_left(gb)) {
771 av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
772 return AVERROR_INVALIDDATA;
774 for (i = 0; i < length; i++)
775 skip_bits(gb, 8); // slice_header_extension_data_byte
778 // Inferred parameters
779 sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
780 if (sh->slice_qp > 51 ||
781 sh->slice_qp < -s->ps.sps->qp_bd_offset) {
782 av_log(s->avctx, AV_LOG_ERROR,
783 "The slice_qp %d is outside the valid range "
786 -s->ps.sps->qp_bd_offset);
787 return AVERROR_INVALIDDATA;
790 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
792 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
793 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
794 return AVERROR_INVALIDDATA;
797 if (get_bits_left(gb) < 0) {
798 av_log(s->avctx, AV_LOG_ERROR,
799 "Overread slice header by %d bits\n", -get_bits_left(gb));
800 return AVERROR_INVALIDDATA;
803 s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
805 if (!s->ps.pps->cu_qp_delta_enabled_flag)
806 s->HEVClc->qp_y = s->sh.slice_qp;
808 s->slice_initialized = 1;
809 s->HEVClc->tu.cu_qp_offset_cb = 0;
810 s->HEVClc->tu.cu_qp_offset_cr = 0;
815 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
817 #define SET_SAO(elem, value) \
819 if (!sao_merge_up_flag && !sao_merge_left_flag) \
821 else if (sao_merge_left_flag) \
822 sao->elem = CTB(s->sao, rx-1, ry).elem; \
823 else if (sao_merge_up_flag) \
824 sao->elem = CTB(s->sao, rx, ry-1).elem; \
829 static void hls_sao_param(HEVCContext *s, int rx, int ry)
831 HEVCLocalContext *lc = s->HEVClc;
832 int sao_merge_left_flag = 0;
833 int sao_merge_up_flag = 0;
834 SAOParams *sao = &CTB(s->sao, rx, ry);
837 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
838 s->sh.slice_sample_adaptive_offset_flag[1]) {
840 if (lc->ctb_left_flag)
841 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
843 if (ry > 0 && !sao_merge_left_flag) {
845 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
849 for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
850 int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
851 s->ps.pps->log2_sao_offset_scale_chroma;
853 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
854 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
859 sao->type_idx[2] = sao->type_idx[1];
860 sao->eo_class[2] = sao->eo_class[1];
862 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
865 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
868 for (i = 0; i < 4; i++)
869 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
871 if (sao->type_idx[c_idx] == SAO_BAND) {
872 for (i = 0; i < 4; i++) {
873 if (sao->offset_abs[c_idx][i]) {
874 SET_SAO(offset_sign[c_idx][i],
875 ff_hevc_sao_offset_sign_decode(s));
877 sao->offset_sign[c_idx][i] = 0;
880 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
881 } else if (c_idx != 2) {
882 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
885 // Inferred parameters
886 sao->offset_val[c_idx][0] = 0;
887 for (i = 0; i < 4; i++) {
888 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
889 if (sao->type_idx[c_idx] == SAO_EDGE) {
891 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
892 } else if (sao->offset_sign[c_idx][i]) {
893 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
895 sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
903 static int hls_cross_component_pred(HEVCContext *s, int idx) {
904 HEVCLocalContext *lc = s->HEVClc;
905 int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
907 if (log2_res_scale_abs_plus1 != 0) {
908 int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
909 lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
910 (1 - 2 * res_scale_sign_flag);
912 lc->tu.res_scale_val = 0;
919 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
920 int xBase, int yBase, int cb_xBase, int cb_yBase,
921 int log2_cb_size, int log2_trafo_size,
922 int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
924 HEVCLocalContext *lc = s->HEVClc;
925 const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
928 if (lc->cu.pred_mode == MODE_INTRA) {
929 int trafo_size = 1 << log2_trafo_size;
930 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
932 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
935 if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
936 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
937 int scan_idx = SCAN_DIAG;
938 int scan_idx_c = SCAN_DIAG;
939 int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
940 (s->ps.sps->chroma_format_idc == 2 &&
941 (cbf_cb[1] || cbf_cr[1]));
943 if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
944 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
945 if (lc->tu.cu_qp_delta != 0)
946 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
947 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
948 lc->tu.is_cu_qp_delta_coded = 1;
950 if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
951 lc->tu.cu_qp_delta > (25 + s->ps.sps->qp_bd_offset / 2)) {
952 av_log(s->avctx, AV_LOG_ERROR,
953 "The cu_qp_delta %d is outside the valid range "
956 -(26 + s->ps.sps->qp_bd_offset / 2),
957 (25 + s->ps.sps->qp_bd_offset / 2));
958 return AVERROR_INVALIDDATA;
961 ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
964 if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
965 !lc->cu.cu_transquant_bypass_flag && !lc->tu.is_cu_chroma_qp_offset_coded) {
966 int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
967 if (cu_chroma_qp_offset_flag) {
968 int cu_chroma_qp_offset_idx = 0;
969 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
970 cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
971 av_log(s->avctx, AV_LOG_ERROR,
972 "cu_chroma_qp_offset_idx not yet tested.\n");
974 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
975 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
977 lc->tu.cu_qp_offset_cb = 0;
978 lc->tu.cu_qp_offset_cr = 0;
980 lc->tu.is_cu_chroma_qp_offset_coded = 1;
983 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
984 if (lc->tu.intra_pred_mode >= 6 &&
985 lc->tu.intra_pred_mode <= 14) {
986 scan_idx = SCAN_VERT;
987 } else if (lc->tu.intra_pred_mode >= 22 &&
988 lc->tu.intra_pred_mode <= 30) {
989 scan_idx = SCAN_HORIZ;
992 if (lc->tu.intra_pred_mode_c >= 6 &&
993 lc->tu.intra_pred_mode_c <= 14) {
994 scan_idx_c = SCAN_VERT;
995 } else if (lc->tu.intra_pred_mode_c >= 22 &&
996 lc->tu.intra_pred_mode_c <= 30) {
997 scan_idx_c = SCAN_HORIZ;
1001 lc->tu.cross_pf = 0;
1004 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1005 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1006 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1007 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1008 lc->tu.cross_pf = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1009 (lc->cu.pred_mode == MODE_INTER ||
1010 (lc->tu.chroma_mode_c == 4)));
1012 if (lc->tu.cross_pf) {
1013 hls_cross_component_pred(s, 0);
1015 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1016 if (lc->cu.pred_mode == MODE_INTRA) {
1017 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1018 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1021 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1022 log2_trafo_size_c, scan_idx_c, 1);
1024 if (lc->tu.cross_pf) {
1025 ptrdiff_t stride = s->frame->linesize[1];
1026 int hshift = s->ps.sps->hshift[1];
1027 int vshift = s->ps.sps->vshift[1];
1028 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1029 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1030 int size = 1 << log2_trafo_size_c;
1032 uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1033 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1034 for (i = 0; i < (size * size); i++) {
1035 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1037 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1041 if (lc->tu.cross_pf) {
1042 hls_cross_component_pred(s, 1);
1044 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1045 if (lc->cu.pred_mode == MODE_INTRA) {
1046 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1047 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1050 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1051 log2_trafo_size_c, scan_idx_c, 2);
1053 if (lc->tu.cross_pf) {
1054 ptrdiff_t stride = s->frame->linesize[2];
1055 int hshift = s->ps.sps->hshift[2];
1056 int vshift = s->ps.sps->vshift[2];
1057 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1058 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1059 int size = 1 << log2_trafo_size_c;
1061 uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1062 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1063 for (i = 0; i < (size * size); i++) {
1064 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1066 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1069 } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1070 int trafo_size_h = 1 << (log2_trafo_size + 1);
1071 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1072 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1073 if (lc->cu.pred_mode == MODE_INTRA) {
1074 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1075 trafo_size_h, trafo_size_v);
1076 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1079 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1080 log2_trafo_size, scan_idx_c, 1);
1082 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1083 if (lc->cu.pred_mode == MODE_INTRA) {
1084 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1085 trafo_size_h, trafo_size_v);
1086 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1089 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1090 log2_trafo_size, scan_idx_c, 2);
1093 } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1094 if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1095 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1096 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1097 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1098 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1099 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1100 if (s->ps.sps->chroma_format_idc == 2) {
1101 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1102 trafo_size_h, trafo_size_v);
1103 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1104 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1106 } else if (blk_idx == 3) {
1107 int trafo_size_h = 1 << (log2_trafo_size + 1);
1108 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1109 ff_hevc_set_neighbour_available(s, xBase, yBase,
1110 trafo_size_h, trafo_size_v);
1111 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1112 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1113 if (s->ps.sps->chroma_format_idc == 2) {
1114 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1115 trafo_size_h, trafo_size_v);
1116 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1117 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1125 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1127 int cb_size = 1 << log2_cb_size;
1128 int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1130 int min_pu_width = s->ps.sps->min_pu_width;
1131 int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1132 int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1135 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1136 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1137 s->is_pcm[i + j * min_pu_width] = 2;
1140 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1141 int xBase, int yBase, int cb_xBase, int cb_yBase,
1142 int log2_cb_size, int log2_trafo_size,
1143 int trafo_depth, int blk_idx,
1144 const int *base_cbf_cb, const int *base_cbf_cr)
1146 HEVCLocalContext *lc = s->HEVClc;
1147 uint8_t split_transform_flag;
1152 cbf_cb[0] = base_cbf_cb[0];
1153 cbf_cb[1] = base_cbf_cb[1];
1154 cbf_cr[0] = base_cbf_cr[0];
1155 cbf_cr[1] = base_cbf_cr[1];
1157 if (lc->cu.intra_split_flag) {
1158 if (trafo_depth == 1) {
1159 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1160 if (s->ps.sps->chroma_format_idc == 3) {
1161 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1162 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx];
1164 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1165 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1169 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0];
1170 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1171 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1174 if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1175 log2_trafo_size > s->ps.sps->log2_min_tb_size &&
1176 trafo_depth < lc->cu.max_trafo_depth &&
1177 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1178 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1180 int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1181 lc->cu.pred_mode == MODE_INTER &&
1182 lc->cu.part_mode != PART_2Nx2N &&
1185 split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1186 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1190 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1191 if (trafo_depth == 0 || cbf_cb[0]) {
1192 cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1193 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1194 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1198 if (trafo_depth == 0 || cbf_cr[0]) {
1199 cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1200 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1201 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1206 if (split_transform_flag) {
1207 const int trafo_size_split = 1 << (log2_trafo_size - 1);
1208 const int x1 = x0 + trafo_size_split;
1209 const int y1 = y0 + trafo_size_split;
1211 #define SUBDIVIDE(x, y, idx) \
1213 ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1214 log2_trafo_size - 1, trafo_depth + 1, idx, \
1220 SUBDIVIDE(x0, y0, 0);
1221 SUBDIVIDE(x1, y0, 1);
1222 SUBDIVIDE(x0, y1, 2);
1223 SUBDIVIDE(x1, y1, 3);
1227 int min_tu_size = 1 << s->ps.sps->log2_min_tb_size;
1228 int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1229 int min_tu_width = s->ps.sps->min_tb_width;
1232 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1233 cbf_cb[0] || cbf_cr[0] ||
1234 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1235 cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1238 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1239 log2_cb_size, log2_trafo_size,
1240 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1243 // TODO: store cbf_luma somewhere else
1246 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1247 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1248 int x_tu = (x0 + j) >> log2_min_tu_size;
1249 int y_tu = (y0 + i) >> log2_min_tu_size;
1250 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1253 if (!s->sh.disable_deblocking_filter_flag) {
1254 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1255 if (s->ps.pps->transquant_bypass_enable_flag &&
1256 lc->cu.cu_transquant_bypass_flag)
1257 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1263 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1265 HEVCLocalContext *lc = s->HEVClc;
1267 int cb_size = 1 << log2_cb_size;
1268 int stride0 = s->frame->linesize[0];
1269 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1270 int stride1 = s->frame->linesize[1];
1271 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1272 int stride2 = s->frame->linesize[2];
1273 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1275 int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1276 (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1277 ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1278 s->ps.sps->pcm.bit_depth_chroma;
1279 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1282 if (!s->sh.disable_deblocking_filter_flag)
1283 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1285 ret = init_get_bits(&gb, pcm, length);
1289 s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->ps.sps->pcm.bit_depth);
1290 if (s->ps.sps->chroma_format_idc) {
1291 s->hevcdsp.put_pcm(dst1, stride1,
1292 cb_size >> s->ps.sps->hshift[1],
1293 cb_size >> s->ps.sps->vshift[1],
1294 &gb, s->ps.sps->pcm.bit_depth_chroma);
1295 s->hevcdsp.put_pcm(dst2, stride2,
1296 cb_size >> s->ps.sps->hshift[2],
1297 cb_size >> s->ps.sps->vshift[2],
1298 &gb, s->ps.sps->pcm.bit_depth_chroma);
1305 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1307 * @param s HEVC decoding context
1308 * @param dst target buffer for block data at block position
1309 * @param dststride stride of the dst buffer
1310 * @param ref reference picture buffer at origin (0, 0)
1311 * @param mv motion vector (relative to block position) to get pixel data from
1312 * @param x_off horizontal position of block from origin (0, 0)
1313 * @param y_off vertical position of block from origin (0, 0)
1314 * @param block_w width of block
1315 * @param block_h height of block
1316 * @param luma_weight weighting factor applied to the luma prediction
1317 * @param luma_offset additive offset applied to the luma prediction value
1320 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1321 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1322 int block_w, int block_h, int luma_weight, int luma_offset)
1324 HEVCLocalContext *lc = s->HEVClc;
1325 uint8_t *src = ref->data[0];
1326 ptrdiff_t srcstride = ref->linesize[0];
1327 int pic_width = s->ps.sps->width;
1328 int pic_height = s->ps.sps->height;
1331 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1332 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1333 int idx = ff_hevc_pel_weight[block_w];
1335 x_off += mv->x >> 2;
1336 y_off += mv->y >> 2;
1337 src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1339 if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1340 x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1341 y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1342 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1343 int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1344 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1346 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1347 edge_emu_stride, srcstride,
1348 block_w + QPEL_EXTRA,
1349 block_h + QPEL_EXTRA,
1350 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1351 pic_width, pic_height);
1352 src = lc->edge_emu_buffer + buf_offset;
1353 srcstride = edge_emu_stride;
1357 s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1358 block_h, mx, my, block_w);
1360 s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1361 block_h, s->sh.luma_log2_weight_denom,
1362 luma_weight, luma_offset, mx, my, block_w);
1366 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1368 * @param s HEVC decoding context
1369 * @param dst target buffer for block data at block position
1370 * @param dststride stride of the dst buffer
1371 * @param ref0 reference picture0 buffer at origin (0, 0)
1372 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1373 * @param x_off horizontal position of block from origin (0, 0)
1374 * @param y_off vertical position of block from origin (0, 0)
1375 * @param block_w width of block
1376 * @param block_h height of block
1377 * @param ref1 reference picture1 buffer at origin (0, 0)
1378 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1379 * @param current_mv current motion vector structure
1381 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1382 AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1383 int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1385 HEVCLocalContext *lc = s->HEVClc;
1386 ptrdiff_t src0stride = ref0->linesize[0];
1387 ptrdiff_t src1stride = ref1->linesize[0];
1388 int pic_width = s->ps.sps->width;
1389 int pic_height = s->ps.sps->height;
1390 int mx0 = mv0->x & 3;
1391 int my0 = mv0->y & 3;
1392 int mx1 = mv1->x & 3;
1393 int my1 = mv1->y & 3;
1394 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1395 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1396 int x_off0 = x_off + (mv0->x >> 2);
1397 int y_off0 = y_off + (mv0->y >> 2);
1398 int x_off1 = x_off + (mv1->x >> 2);
1399 int y_off1 = y_off + (mv1->y >> 2);
1400 int idx = ff_hevc_pel_weight[block_w];
1402 uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1403 uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1405 if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1406 x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1407 y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1408 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1409 int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1410 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1412 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1413 edge_emu_stride, src0stride,
1414 block_w + QPEL_EXTRA,
1415 block_h + QPEL_EXTRA,
1416 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1417 pic_width, pic_height);
1418 src0 = lc->edge_emu_buffer + buf_offset;
1419 src0stride = edge_emu_stride;
1422 if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1423 x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1424 y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1425 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1426 int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1427 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1429 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1430 edge_emu_stride, src1stride,
1431 block_w + QPEL_EXTRA,
1432 block_h + QPEL_EXTRA,
1433 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1434 pic_width, pic_height);
1435 src1 = lc->edge_emu_buffer2 + buf_offset;
1436 src1stride = edge_emu_stride;
1439 s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1440 block_h, mx0, my0, block_w);
1442 s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1443 block_h, mx1, my1, block_w);
1445 s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1446 block_h, s->sh.luma_log2_weight_denom,
1447 s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1448 s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1449 s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1450 s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1456 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1458 * @param s HEVC decoding context
1459 * @param dst1 target buffer for block data at block position (U plane)
1460 * @param dst2 target buffer for block data at block position (V plane)
1461 * @param dststride stride of the dst1 and dst2 buffers
1462 * @param ref reference picture buffer at origin (0, 0)
1463 * @param mv motion vector (relative to block position) to get pixel data from
1464 * @param x_off horizontal position of block from origin (0, 0)
1465 * @param y_off vertical position of block from origin (0, 0)
1466 * @param block_w width of block
1467 * @param block_h height of block
1468 * @param chroma_weight weighting factor applied to the chroma prediction
1469 * @param chroma_offset additive offset applied to the chroma prediction value
1472 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1473 ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1474 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1476 HEVCLocalContext *lc = s->HEVClc;
1477 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1478 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1479 const Mv *mv = ¤t_mv->mv[reflist];
1480 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1481 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1482 int idx = ff_hevc_pel_weight[block_w];
1483 int hshift = s->ps.sps->hshift[1];
1484 int vshift = s->ps.sps->vshift[1];
1485 intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift);
1486 intptr_t my = av_mod_uintp2(mv->y, 2 + vshift);
1487 intptr_t _mx = mx << (1 - hshift);
1488 intptr_t _my = my << (1 - vshift);
1490 x_off += mv->x >> (2 + hshift);
1491 y_off += mv->y >> (2 + vshift);
1492 src0 += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1494 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1495 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1496 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1497 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1498 int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1499 int buf_offset0 = EPEL_EXTRA_BEFORE *
1500 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1501 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1502 edge_emu_stride, srcstride,
1503 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1504 x_off - EPEL_EXTRA_BEFORE,
1505 y_off - EPEL_EXTRA_BEFORE,
1506 pic_width, pic_height);
1508 src0 = lc->edge_emu_buffer + buf_offset0;
1509 srcstride = edge_emu_stride;
1512 s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1513 block_h, _mx, _my, block_w);
1515 s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1516 block_h, s->sh.chroma_log2_weight_denom,
1517 chroma_weight, chroma_offset, _mx, _my, block_w);
1521 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1523 * @param s HEVC decoding context
1524 * @param dst target buffer for block data at block position
1525 * @param dststride stride of the dst buffer
1526 * @param ref0 reference picture0 buffer at origin (0, 0)
1527 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1528 * @param x_off horizontal position of block from origin (0, 0)
1529 * @param y_off vertical position of block from origin (0, 0)
1530 * @param block_w width of block
1531 * @param block_h height of block
1532 * @param ref1 reference picture1 buffer at origin (0, 0)
1533 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1534 * @param current_mv current motion vector structure
1535 * @param cidx chroma component(cb, cr)
1537 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1538 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1540 HEVCLocalContext *lc = s->HEVClc;
1541 uint8_t *src1 = ref0->data[cidx+1];
1542 uint8_t *src2 = ref1->data[cidx+1];
1543 ptrdiff_t src1stride = ref0->linesize[cidx+1];
1544 ptrdiff_t src2stride = ref1->linesize[cidx+1];
1545 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1546 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1547 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1548 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1549 Mv *mv0 = ¤t_mv->mv[0];
1550 Mv *mv1 = ¤t_mv->mv[1];
1551 int hshift = s->ps.sps->hshift[1];
1552 int vshift = s->ps.sps->vshift[1];
1554 intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1555 intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1556 intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1557 intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1558 intptr_t _mx0 = mx0 << (1 - hshift);
1559 intptr_t _my0 = my0 << (1 - vshift);
1560 intptr_t _mx1 = mx1 << (1 - hshift);
1561 intptr_t _my1 = my1 << (1 - vshift);
1563 int x_off0 = x_off + (mv0->x >> (2 + hshift));
1564 int y_off0 = y_off + (mv0->y >> (2 + vshift));
1565 int x_off1 = x_off + (mv1->x >> (2 + hshift));
1566 int y_off1 = y_off + (mv1->y >> (2 + vshift));
1567 int idx = ff_hevc_pel_weight[block_w];
1568 src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1569 src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1571 if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1572 x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1573 y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1574 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1575 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1576 int buf_offset1 = EPEL_EXTRA_BEFORE *
1577 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1579 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1580 edge_emu_stride, src1stride,
1581 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1582 x_off0 - EPEL_EXTRA_BEFORE,
1583 y_off0 - EPEL_EXTRA_BEFORE,
1584 pic_width, pic_height);
1586 src1 = lc->edge_emu_buffer + buf_offset1;
1587 src1stride = edge_emu_stride;
1590 if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1591 x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1592 y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1593 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1594 int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1595 int buf_offset1 = EPEL_EXTRA_BEFORE *
1596 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1598 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1599 edge_emu_stride, src2stride,
1600 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1601 x_off1 - EPEL_EXTRA_BEFORE,
1602 y_off1 - EPEL_EXTRA_BEFORE,
1603 pic_width, pic_height);
1605 src2 = lc->edge_emu_buffer2 + buf_offset1;
1606 src2stride = edge_emu_stride;
1609 s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1610 block_h, _mx0, _my0, block_w);
1612 s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1613 src2, src2stride, lc->tmp,
1614 block_h, _mx1, _my1, block_w);
1616 s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1617 src2, src2stride, lc->tmp,
1619 s->sh.chroma_log2_weight_denom,
1620 s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1621 s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1622 s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1623 s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1624 _mx1, _my1, block_w);
1627 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1628 const Mv *mv, int y0, int height)
1630 int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1632 if (s->threads_type == FF_THREAD_FRAME )
1633 ff_thread_await_progress(&ref->tf, y, 0);
1636 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1637 int nPbH, int log2_cb_size, int part_idx,
1638 int merge_idx, MvField *mv)
1640 HEVCLocalContext *lc = s->HEVClc;
1641 enum InterPredIdc inter_pred_idc = PRED_L0;
1644 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1646 if (s->sh.slice_type == B_SLICE)
1647 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1649 if (inter_pred_idc != PRED_L1) {
1650 if (s->sh.nb_refs[L0])
1651 mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1653 mv->pred_flag = PF_L0;
1654 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1655 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1656 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1657 part_idx, merge_idx, mv, mvp_flag, 0);
1658 mv->mv[0].x += lc->pu.mvd.x;
1659 mv->mv[0].y += lc->pu.mvd.y;
1662 if (inter_pred_idc != PRED_L0) {
1663 if (s->sh.nb_refs[L1])
1664 mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1666 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1667 AV_ZERO32(&lc->pu.mvd);
1669 ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1672 mv->pred_flag += PF_L1;
1673 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1674 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1675 part_idx, merge_idx, mv, mvp_flag, 1);
1676 mv->mv[1].x += lc->pu.mvd.x;
1677 mv->mv[1].y += lc->pu.mvd.y;
1681 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1683 int log2_cb_size, int partIdx, int idx)
1685 #define POS(c_idx, x, y) \
1686 &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1687 (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1688 HEVCLocalContext *lc = s->HEVClc;
1690 struct MvField current_mv = {{{ 0 }}};
1692 int min_pu_width = s->ps.sps->min_pu_width;
1694 MvField *tab_mvf = s->ref->tab_mvf;
1695 RefPicList *refPicList = s->ref->refPicList;
1696 HEVCFrame *ref0 = NULL, *ref1 = NULL;
1697 uint8_t *dst0 = POS(0, x0, y0);
1698 uint8_t *dst1 = POS(1, x0, y0);
1699 uint8_t *dst2 = POS(2, x0, y0);
1700 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1701 int min_cb_width = s->ps.sps->min_cb_width;
1702 int x_cb = x0 >> log2_min_cb_size;
1703 int y_cb = y0 >> log2_min_cb_size;
1707 int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1710 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1712 if (skip_flag || lc->pu.merge_flag) {
1713 if (s->sh.max_num_merge_cand > 1)
1714 merge_idx = ff_hevc_merge_idx_decode(s);
1718 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1719 partIdx, merge_idx, ¤t_mv);
1721 hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1722 partIdx, merge_idx, ¤t_mv);
1725 x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1726 y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1728 for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1729 for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1730 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1732 if (current_mv.pred_flag & PF_L0) {
1733 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1736 hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH);
1738 if (current_mv.pred_flag & PF_L1) {
1739 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1742 hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH);
1745 if (current_mv.pred_flag == PF_L0) {
1746 int x0_c = x0 >> s->ps.sps->hshift[1];
1747 int y0_c = y0 >> s->ps.sps->vshift[1];
1748 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1749 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1751 luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1752 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1753 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1754 s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1756 if (s->ps.sps->chroma_format_idc) {
1757 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1758 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1759 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1760 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1761 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1762 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1764 } else if (current_mv.pred_flag == PF_L1) {
1765 int x0_c = x0 >> s->ps.sps->hshift[1];
1766 int y0_c = y0 >> s->ps.sps->vshift[1];
1767 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1768 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1770 luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1771 ¤t_mv.mv[1], x0, y0, nPbW, nPbH,
1772 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1773 s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1775 if (s->ps.sps->chroma_format_idc) {
1776 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1777 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1778 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1780 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1781 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1782 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1784 } else if (current_mv.pred_flag == PF_BI) {
1785 int x0_c = x0 >> s->ps.sps->hshift[1];
1786 int y0_c = y0 >> s->ps.sps->vshift[1];
1787 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1788 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1790 luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1791 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1792 ref1->frame, ¤t_mv.mv[1], ¤t_mv);
1794 if (s->ps.sps->chroma_format_idc) {
1795 chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1796 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0);
1798 chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1799 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1);
1807 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1808 int prev_intra_luma_pred_flag)
1810 HEVCLocalContext *lc = s->HEVClc;
1811 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1812 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1813 int min_pu_width = s->ps.sps->min_pu_width;
1814 int size_in_pus = pu_size >> s->ps.sps->log2_min_pu_size;
1815 int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1816 int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1818 int cand_up = (lc->ctb_up_flag || y0b) ?
1819 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1820 int cand_left = (lc->ctb_left_flag || x0b) ?
1821 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1823 int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1825 MvField *tab_mvf = s->ref->tab_mvf;
1826 int intra_pred_mode;
1830 // intra_pred_mode prediction does not cross vertical CTB boundaries
1831 if ((y0 - 1) < y_ctb)
1834 if (cand_left == cand_up) {
1835 if (cand_left < 2) {
1836 candidate[0] = INTRA_PLANAR;
1837 candidate[1] = INTRA_DC;
1838 candidate[2] = INTRA_ANGULAR_26;
1840 candidate[0] = cand_left;
1841 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1842 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1845 candidate[0] = cand_left;
1846 candidate[1] = cand_up;
1847 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1848 candidate[2] = INTRA_PLANAR;
1849 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1850 candidate[2] = INTRA_DC;
1852 candidate[2] = INTRA_ANGULAR_26;
1856 if (prev_intra_luma_pred_flag) {
1857 intra_pred_mode = candidate[lc->pu.mpm_idx];
1859 if (candidate[0] > candidate[1])
1860 FFSWAP(uint8_t, candidate[0], candidate[1]);
1861 if (candidate[0] > candidate[2])
1862 FFSWAP(uint8_t, candidate[0], candidate[2]);
1863 if (candidate[1] > candidate[2])
1864 FFSWAP(uint8_t, candidate[1], candidate[2]);
1866 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1867 for (i = 0; i < 3; i++)
1868 if (intra_pred_mode >= candidate[i])
1872 /* write the intra prediction units into the mv array */
1875 for (i = 0; i < size_in_pus; i++) {
1876 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1877 intra_pred_mode, size_in_pus);
1879 for (j = 0; j < size_in_pus; j++) {
1880 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1884 return intra_pred_mode;
1887 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1888 int log2_cb_size, int ct_depth)
1890 int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1891 int x_cb = x0 >> s->ps.sps->log2_min_cb_size;
1892 int y_cb = y0 >> s->ps.sps->log2_min_cb_size;
1895 for (y = 0; y < length; y++)
1896 memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1900 static const uint8_t tab_mode_idx[] = {
1901 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
1902 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1904 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1907 HEVCLocalContext *lc = s->HEVClc;
1908 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1909 uint8_t prev_intra_luma_pred_flag[4];
1910 int split = lc->cu.part_mode == PART_NxN;
1911 int pb_size = (1 << log2_cb_size) >> split;
1912 int side = split + 1;
1916 for (i = 0; i < side; i++)
1917 for (j = 0; j < side; j++)
1918 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1920 for (i = 0; i < side; i++) {
1921 for (j = 0; j < side; j++) {
1922 if (prev_intra_luma_pred_flag[2 * i + j])
1923 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1925 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1927 lc->pu.intra_pred_mode[2 * i + j] =
1928 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1929 prev_intra_luma_pred_flag[2 * i + j]);
1933 if (s->ps.sps->chroma_format_idc == 3) {
1934 for (i = 0; i < side; i++) {
1935 for (j = 0; j < side; j++) {
1936 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1937 if (chroma_mode != 4) {
1938 if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1939 lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1941 lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1943 lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1947 } else if (s->ps.sps->chroma_format_idc == 2) {
1949 lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1950 if (chroma_mode != 4) {
1951 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1954 mode_idx = intra_chroma_table[chroma_mode];
1956 mode_idx = lc->pu.intra_pred_mode[0];
1958 lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1959 } else if (s->ps.sps->chroma_format_idc != 0) {
1960 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1961 if (chroma_mode != 4) {
1962 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1963 lc->pu.intra_pred_mode_c[0] = 34;
1965 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1967 lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1972 static void intra_prediction_unit_default_value(HEVCContext *s,
1976 HEVCLocalContext *lc = s->HEVClc;
1977 int pb_size = 1 << log2_cb_size;
1978 int size_in_pus = pb_size >> s->ps.sps->log2_min_pu_size;
1979 int min_pu_width = s->ps.sps->min_pu_width;
1980 MvField *tab_mvf = s->ref->tab_mvf;
1981 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1982 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1985 if (size_in_pus == 0)
1987 for (j = 0; j < size_in_pus; j++)
1988 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1989 if (lc->cu.pred_mode == MODE_INTRA)
1990 for (j = 0; j < size_in_pus; j++)
1991 for (k = 0; k < size_in_pus; k++)
1992 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1995 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1997 int cb_size = 1 << log2_cb_size;
1998 HEVCLocalContext *lc = s->HEVClc;
1999 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2000 int length = cb_size >> log2_min_cb_size;
2001 int min_cb_width = s->ps.sps->min_cb_width;
2002 int x_cb = x0 >> log2_min_cb_size;
2003 int y_cb = y0 >> log2_min_cb_size;
2004 int idx = log2_cb_size - 2;
2005 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2010 lc->cu.pred_mode = MODE_INTRA;
2011 lc->cu.part_mode = PART_2Nx2N;
2012 lc->cu.intra_split_flag = 0;
2014 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2015 for (x = 0; x < 4; x++)
2016 lc->pu.intra_pred_mode[x] = 1;
2017 if (s->ps.pps->transquant_bypass_enable_flag) {
2018 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2019 if (lc->cu.cu_transquant_bypass_flag)
2020 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2022 lc->cu.cu_transquant_bypass_flag = 0;
2024 if (s->sh.slice_type != I_SLICE) {
2025 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2027 x = y_cb * min_cb_width + x_cb;
2028 for (y = 0; y < length; y++) {
2029 memset(&s->skip_flag[x], skip_flag, length);
2032 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2034 x = y_cb * min_cb_width + x_cb;
2035 for (y = 0; y < length; y++) {
2036 memset(&s->skip_flag[x], 0, length);
2041 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2042 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2043 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2045 if (!s->sh.disable_deblocking_filter_flag)
2046 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2050 if (s->sh.slice_type != I_SLICE)
2051 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2052 if (lc->cu.pred_mode != MODE_INTRA ||
2053 log2_cb_size == s->ps.sps->log2_min_cb_size) {
2054 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2055 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2056 lc->cu.pred_mode == MODE_INTRA;
2059 if (lc->cu.pred_mode == MODE_INTRA) {
2060 if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2061 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2062 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2063 pcm_flag = ff_hevc_pcm_flag_decode(s);
2066 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2067 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2068 if (s->ps.sps->pcm.loop_filter_disable_flag)
2069 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2074 intra_prediction_unit(s, x0, y0, log2_cb_size);
2077 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2078 switch (lc->cu.part_mode) {
2080 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2083 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
2084 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2087 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2088 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2091 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
2092 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2095 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2096 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
2099 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
2100 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2103 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2104 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
2107 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2108 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2109 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2110 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2116 int rqt_root_cbf = 1;
2118 if (lc->cu.pred_mode != MODE_INTRA &&
2119 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2120 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2123 const static int cbf[2] = { 0 };
2124 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2125 s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2126 s->ps.sps->max_transform_hierarchy_depth_inter;
2127 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2129 log2_cb_size, 0, 0, cbf, cbf);
2133 if (!s->sh.disable_deblocking_filter_flag)
2134 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2139 if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2140 ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2142 x = y_cb * min_cb_width + x_cb;
2143 for (y = 0; y < length; y++) {
2144 memset(&s->qp_y_tab[x], lc->qp_y, length);
2148 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2149 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2150 lc->qPy_pred = lc->qp_y;
2153 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2158 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2159 int log2_cb_size, int cb_depth)
2161 HEVCLocalContext *lc = s->HEVClc;
2162 const int cb_size = 1 << log2_cb_size;
2166 lc->ct_depth = cb_depth;
2167 if (x0 + cb_size <= s->ps.sps->width &&
2168 y0 + cb_size <= s->ps.sps->height &&
2169 log2_cb_size > s->ps.sps->log2_min_cb_size) {
2170 split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2172 split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2174 if (s->ps.pps->cu_qp_delta_enabled_flag &&
2175 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2176 lc->tu.is_cu_qp_delta_coded = 0;
2177 lc->tu.cu_qp_delta = 0;
2180 if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2181 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2182 lc->tu.is_cu_chroma_qp_offset_coded = 0;
2186 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2187 const int cb_size_split = cb_size >> 1;
2188 const int x1 = x0 + cb_size_split;
2189 const int y1 = y0 + cb_size_split;
2193 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2197 if (more_data && x1 < s->ps.sps->width) {
2198 more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2202 if (more_data && y1 < s->ps.sps->height) {
2203 more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2207 if (more_data && x1 < s->ps.sps->width &&
2208 y1 < s->ps.sps->height) {
2209 more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2214 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2215 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2216 lc->qPy_pred = lc->qp_y;
2219 return ((x1 + cb_size_split) < s->ps.sps->width ||
2220 (y1 + cb_size_split) < s->ps.sps->height);
2224 ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2227 if ((!((x0 + cb_size) %
2228 (1 << (s->ps.sps->log2_ctb_size))) ||
2229 (x0 + cb_size >= s->ps.sps->width)) &&
2231 (1 << (s->ps.sps->log2_ctb_size))) ||
2232 (y0 + cb_size >= s->ps.sps->height))) {
2233 int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2234 return !end_of_slice_flag;
2243 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2246 HEVCLocalContext *lc = s->HEVClc;
2247 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2248 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2249 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2251 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2253 if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2254 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2255 lc->first_qp_group = 1;
2256 lc->end_of_tiles_x = s->ps.sps->width;
2257 } else if (s->ps.pps->tiles_enabled_flag) {
2258 if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2259 int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2260 lc->end_of_tiles_x = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2261 lc->first_qp_group = 1;
2264 lc->end_of_tiles_x = s->ps.sps->width;
2267 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2269 lc->boundary_flags = 0;
2270 if (s->ps.pps->tiles_enabled_flag) {
2271 if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2272 lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2273 if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2274 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2275 if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2276 lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2277 if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2278 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2280 if (ctb_addr_in_slice <= 0)
2281 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2282 if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2283 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2286 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2287 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2288 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2289 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2292 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2294 HEVCContext *s = avctxt->priv_data;
2295 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2299 int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2301 if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2302 av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2303 return AVERROR_INVALIDDATA;
2306 if (s->sh.dependent_slice_segment_flag) {
2307 int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2308 if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2309 av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2310 return AVERROR_INVALIDDATA;
2314 while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2315 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2317 x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2318 y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2319 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2321 ff_hevc_cabac_init(s, ctb_addr_ts);
2323 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2325 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2326 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2327 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2329 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2330 if (more_data < 0) {
2331 s->tab_slice_address[ctb_addr_rs] = -1;
2337 ff_hevc_save_states(s, ctb_addr_ts);
2338 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2341 if (x_ctb + ctb_size >= s->ps.sps->width &&
2342 y_ctb + ctb_size >= s->ps.sps->height)
2343 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2348 static int hls_slice_data(HEVCContext *s)
2356 s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2359 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2361 HEVCContext *s1 = avctxt->priv_data, *s;
2362 HEVCLocalContext *lc;
2363 int ctb_size = 1<< s1->ps.sps->log2_ctb_size;
2365 int *ctb_row_p = input_ctb_row;
2366 int ctb_row = ctb_row_p[job];
2367 int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2368 int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2369 int thread = ctb_row % s1->threads_number;
2372 s = s1->sList[self_id];
2376 ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2380 ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2383 while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2384 int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2385 int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2387 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2389 ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2391 if (avpriv_atomic_int_get(&s1->wpp_err)){
2392 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2396 ff_hevc_cabac_init(s, ctb_addr_ts);
2397 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2398 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2400 if (more_data < 0) {
2401 s->tab_slice_address[ctb_addr_rs] = -1;
2407 ff_hevc_save_states(s, ctb_addr_ts);
2408 ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2409 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2411 if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2412 avpriv_atomic_int_set(&s1->wpp_err, 1);
2413 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2417 if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2418 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2419 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2422 ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2425 if(x_ctb >= s->ps.sps->width) {
2429 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2434 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2436 const uint8_t *data = nal->data;
2437 int length = nal->size;
2438 HEVCLocalContext *lc = s->HEVClc;
2439 int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2440 int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2442 int startheader, cmpt = 0;
2448 return AVERROR(ENOMEM);
2453 ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2456 for (i = 1; i < s->threads_number; i++) {
2457 s->sList[i] = av_malloc(sizeof(HEVCContext));
2458 memcpy(s->sList[i], s, sizeof(HEVCContext));
2459 s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2460 s->sList[i]->HEVClc = s->HEVClcList[i];
2464 offset = (lc->gb.index >> 3);
2466 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2467 if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2473 for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2474 offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2475 for (j = 0, cmpt = 0, startheader = offset
2476 + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2477 if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2482 s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2483 s->sh.offset[i - 1] = offset;
2486 if (s->sh.num_entry_point_offsets != 0) {
2487 offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2488 s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2489 s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2494 for (i = 1; i < s->threads_number; i++) {
2495 s->sList[i]->HEVClc->first_qp_group = 1;
2496 s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2497 memcpy(s->sList[i], s, sizeof(HEVCContext));
2498 s->sList[i]->HEVClc = s->HEVClcList[i];
2501 avpriv_atomic_int_set(&s->wpp_err, 0);
2502 ff_reset_entries(s->avctx);
2504 for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2509 if (s->ps.pps->entropy_coding_sync_enabled_flag)
2510 s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2512 for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2519 static int set_side_data(HEVCContext *s)
2521 AVFrame *out = s->ref->frame;
2523 if (s->sei_frame_packing_present &&
2524 s->frame_packing_arrangement_type >= 3 &&
2525 s->frame_packing_arrangement_type <= 5 &&
2526 s->content_interpretation_type > 0 &&
2527 s->content_interpretation_type < 3) {
2528 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2530 return AVERROR(ENOMEM);
2532 switch (s->frame_packing_arrangement_type) {
2534 if (s->quincunx_subsampling)
2535 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2537 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2540 stereo->type = AV_STEREO3D_TOPBOTTOM;
2543 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2547 if (s->content_interpretation_type == 2)
2548 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2551 if (s->sei_display_orientation_present &&
2552 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2553 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2554 AVFrameSideData *rotation = av_frame_new_side_data(out,
2555 AV_FRAME_DATA_DISPLAYMATRIX,
2556 sizeof(int32_t) * 9);
2558 return AVERROR(ENOMEM);
2560 av_display_rotation_set((int32_t *)rotation->data, angle);
2561 av_display_matrix_flip((int32_t *)rotation->data,
2562 s->sei_hflip, s->sei_vflip);
2568 static int hevc_frame_start(HEVCContext *s)
2570 HEVCLocalContext *lc = s->HEVClc;
2571 int pic_size_in_ctb = ((s->ps.sps->width >> s->ps.sps->log2_min_cb_size) + 1) *
2572 ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2575 memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2576 memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
2577 memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2578 memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2579 memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2582 s->first_nal_type = s->nal_unit_type;
2584 if (s->ps.pps->tiles_enabled_flag)
2585 lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2587 ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2591 ret = ff_hevc_frame_rps(s);
2593 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2597 s->ref->frame->key_frame = IS_IRAP(s);
2599 ret = set_side_data(s);
2603 s->frame->pict_type = 3 - s->sh.slice_type;
2606 ff_hevc_bump_frame(s);
2608 av_frame_unref(s->output_frame);
2609 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2613 if (!s->avctx->hwaccel)
2614 ff_thread_finish_setup(s->avctx);
2620 ff_hevc_unref_frame(s, s->ref, ~0);
2625 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2627 HEVCLocalContext *lc = s->HEVClc;
2628 GetBitContext *gb = &lc->gb;
2629 int ctb_addr_ts, ret;
2632 s->nal_unit_type = nal->type;
2633 s->temporal_id = nal->temporal_id;
2635 switch (s->nal_unit_type) {
2637 ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2642 ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2643 s->apply_defdispwin);
2648 ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2652 case NAL_SEI_PREFIX:
2653 case NAL_SEI_SUFFIX:
2654 ret = ff_hevc_decode_nal_sei(s);
2665 case NAL_BLA_W_RADL:
2667 case NAL_IDR_W_RADL:
2674 ret = hls_slice_header(s);
2678 if (s->max_ra == INT_MAX) {
2679 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2683 s->max_ra = INT_MIN;
2687 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2688 s->poc <= s->max_ra) {
2692 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2693 s->max_ra = INT_MIN;
2696 if (s->sh.first_slice_in_pic_flag) {
2697 ret = hevc_frame_start(s);
2700 } else if (!s->ref) {
2701 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2705 if (s->nal_unit_type != s->first_nal_type) {
2706 av_log(s->avctx, AV_LOG_ERROR,
2707 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2708 s->first_nal_type, s->nal_unit_type);
2709 return AVERROR_INVALIDDATA;
2712 if (!s->sh.dependent_slice_segment_flag &&
2713 s->sh.slice_type != I_SLICE) {
2714 ret = ff_hevc_slice_rpl(s);
2716 av_log(s->avctx, AV_LOG_WARNING,
2717 "Error constructing the reference lists for the current slice.\n");
2722 if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2723 ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2728 if (s->avctx->hwaccel) {
2729 ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2733 if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2734 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2736 ctb_addr_ts = hls_slice_data(s);
2737 if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2741 if (ctb_addr_ts < 0) {
2749 s->seq_decode = (s->seq_decode + 1) & 0xff;
2750 s->max_ra = INT_MAX;
2756 av_log(s->avctx, AV_LOG_INFO,
2757 "Skipping NAL unit %d\n", s->nal_unit_type);
2762 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2767 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2772 s->last_eos = s->eos;
2775 /* split the input packet into NAL units, so we know the upper bound on the
2776 * number of slices in the frame */
2777 ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2778 s->nal_length_size);
2780 av_log(s->avctx, AV_LOG_ERROR,
2781 "Error splitting the input into NAL units.\n");
2785 for (i = 0; i < s->pkt.nb_nals; i++) {
2786 if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2787 s->pkt.nals[i].type == NAL_EOS_NUT)
2791 /* decode the NAL units */
2792 for (i = 0; i < s->pkt.nb_nals; i++) {
2793 ret = decode_nal_unit(s, &s->pkt.nals[i]);
2795 av_log(s->avctx, AV_LOG_WARNING,
2796 "Error parsing NAL unit #%d.\n", i);
2802 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2803 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2808 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2811 for (i = 0; i < 16; i++)
2812 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2815 static int verify_md5(HEVCContext *s, AVFrame *frame)
2817 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2822 return AVERROR(EINVAL);
2824 pixel_shift = desc->comp[0].depth_minus1 > 7;
2826 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2829 /* the checksums are LE, so we have to byteswap for >8bpp formats
2832 if (pixel_shift && !s->checksum_buf) {
2833 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2834 FFMAX3(frame->linesize[0], frame->linesize[1],
2835 frame->linesize[2]));
2836 if (!s->checksum_buf)
2837 return AVERROR(ENOMEM);
2841 for (i = 0; frame->data[i]; i++) {
2842 int width = s->avctx->coded_width;
2843 int height = s->avctx->coded_height;
2844 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2845 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2848 av_md5_init(s->md5_ctx);
2849 for (j = 0; j < h; j++) {
2850 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2853 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2854 (const uint16_t *) src, w);
2855 src = s->checksum_buf;
2858 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2860 av_md5_final(s->md5_ctx, md5);
2862 if (!memcmp(md5, s->md5[i], 16)) {
2863 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2864 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2865 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2867 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2868 print_md5(s->avctx, AV_LOG_ERROR, md5);
2869 av_log (s->avctx, AV_LOG_ERROR, " != ");
2870 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2871 av_log (s->avctx, AV_LOG_ERROR, "\n");
2872 return AVERROR_INVALIDDATA;
2876 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2881 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2885 HEVCContext *s = avctx->priv_data;
2888 ret = ff_hevc_output_frame(s, data, 1);
2897 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2901 if (avctx->hwaccel) {
2902 if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
2903 av_log(avctx, AV_LOG_ERROR,
2904 "hardware accelerator failed to decode picture\n");
2905 ff_hevc_unref_frame(s, s->ref, ~0);
2909 /* verify the SEI checksum */
2910 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2912 ret = verify_md5(s, s->ref->frame);
2913 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2914 ff_hevc_unref_frame(s, s->ref, ~0);
2921 if (s->is_decoded) {
2922 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2926 if (s->output_frame->buf[0]) {
2927 av_frame_move_ref(data, s->output_frame);
2934 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2938 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2942 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2943 if (!dst->tab_mvf_buf)
2945 dst->tab_mvf = src->tab_mvf;
2947 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2948 if (!dst->rpl_tab_buf)
2950 dst->rpl_tab = src->rpl_tab;
2952 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2956 dst->poc = src->poc;
2957 dst->ctb_count = src->ctb_count;
2958 dst->window = src->window;
2959 dst->flags = src->flags;
2960 dst->sequence = src->sequence;
2962 if (src->hwaccel_picture_private) {
2963 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2964 if (!dst->hwaccel_priv_buf)
2966 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2971 ff_hevc_unref_frame(s, dst, ~0);
2972 return AVERROR(ENOMEM);
2975 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2977 HEVCContext *s = avctx->priv_data;
2982 av_freep(&s->md5_ctx);
2984 av_freep(&s->cabac_state);
2986 for (i = 0; i < 3; i++) {
2987 av_freep(&s->sao_pixel_buffer_h[i]);
2988 av_freep(&s->sao_pixel_buffer_v[i]);
2990 av_frame_free(&s->output_frame);
2992 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2993 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2994 av_frame_free(&s->DPB[i].frame);
2997 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2998 av_buffer_unref(&s->ps.vps_list[i]);
2999 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3000 av_buffer_unref(&s->ps.sps_list[i]);
3001 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3002 av_buffer_unref(&s->ps.pps_list[i]);
3007 av_freep(&s->sh.entry_point_offset);
3008 av_freep(&s->sh.offset);
3009 av_freep(&s->sh.size);
3011 for (i = 1; i < s->threads_number; i++) {
3012 HEVCLocalContext *lc = s->HEVClcList[i];
3014 av_freep(&s->HEVClcList[i]);
3015 av_freep(&s->sList[i]);
3018 if (s->HEVClc == s->HEVClcList[0])
3020 av_freep(&s->HEVClcList[0]);
3022 for (i = 0; i < s->pkt.nals_allocated; i++) {
3023 av_freep(&s->pkt.nals[i].rbsp_buffer);
3024 av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3026 av_freep(&s->pkt.nals);
3027 s->pkt.nals_allocated = 0;
3032 static av_cold int hevc_init_context(AVCodecContext *avctx)
3034 HEVCContext *s = avctx->priv_data;
3039 s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3042 s->HEVClcList[0] = s->HEVClc;
3045 s->cabac_state = av_malloc(HEVC_CONTEXTS);
3046 if (!s->cabac_state)
3049 s->output_frame = av_frame_alloc();
3050 if (!s->output_frame)
3053 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3054 s->DPB[i].frame = av_frame_alloc();
3055 if (!s->DPB[i].frame)
3057 s->DPB[i].tf.f = s->DPB[i].frame;
3060 s->max_ra = INT_MAX;
3062 s->md5_ctx = av_md5_alloc();
3066 ff_bswapdsp_init(&s->bdsp);
3068 s->context_initialized = 1;
3074 hevc_decode_free(avctx);
3075 return AVERROR(ENOMEM);
3078 static int hevc_update_thread_context(AVCodecContext *dst,
3079 const AVCodecContext *src)
3081 HEVCContext *s = dst->priv_data;
3082 HEVCContext *s0 = src->priv_data;
3085 if (!s->context_initialized) {
3086 ret = hevc_init_context(dst);
3091 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3092 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3093 if (s0->DPB[i].frame->buf[0]) {
3094 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3100 if (s->ps.sps != s0->ps.sps)
3102 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3103 av_buffer_unref(&s->ps.vps_list[i]);
3104 if (s0->ps.vps_list[i]) {
3105 s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3106 if (!s->ps.vps_list[i])
3107 return AVERROR(ENOMEM);
3111 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3112 av_buffer_unref(&s->ps.sps_list[i]);
3113 if (s0->ps.sps_list[i]) {
3114 s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3115 if (!s->ps.sps_list[i])
3116 return AVERROR(ENOMEM);
3120 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3121 av_buffer_unref(&s->ps.pps_list[i]);
3122 if (s0->ps.pps_list[i]) {
3123 s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3124 if (!s->ps.pps_list[i])
3125 return AVERROR(ENOMEM);
3129 if (s->ps.sps != s0->ps.sps)
3130 if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3133 s->seq_decode = s0->seq_decode;
3134 s->seq_output = s0->seq_output;
3135 s->pocTid0 = s0->pocTid0;
3136 s->max_ra = s0->max_ra;
3139 s->is_nalff = s0->is_nalff;
3140 s->nal_length_size = s0->nal_length_size;
3142 s->threads_number = s0->threads_number;
3143 s->threads_type = s0->threads_type;
3146 s->seq_decode = (s->seq_decode + 1) & 0xff;
3147 s->max_ra = INT_MAX;
3153 static int hevc_decode_extradata(HEVCContext *s)
3155 AVCodecContext *avctx = s->avctx;
3159 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3161 if (avctx->extradata_size > 3 &&
3162 (avctx->extradata[0] || avctx->extradata[1] ||
3163 avctx->extradata[2] > 1)) {
3164 /* It seems the extradata is encoded as hvcC format.
3165 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3166 * is finalized. When finalized, configurationVersion will be 1 and we
3167 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3168 int i, j, num_arrays, nal_len_size;
3172 bytestream2_skip(&gb, 21);
3173 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3174 num_arrays = bytestream2_get_byte(&gb);
3176 /* nal units in the hvcC always have length coded with 2 bytes,
3177 * so put a fake nal_length_size = 2 while parsing them */
3178 s->nal_length_size = 2;
3180 /* Decode nal units from hvcC. */
3181 for (i = 0; i < num_arrays; i++) {
3182 int type = bytestream2_get_byte(&gb) & 0x3f;
3183 int cnt = bytestream2_get_be16(&gb);
3185 for (j = 0; j < cnt; j++) {
3186 // +2 for the nal size field
3187 int nalsize = bytestream2_peek_be16(&gb) + 2;
3188 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3189 av_log(s->avctx, AV_LOG_ERROR,
3190 "Invalid NAL unit size in extradata.\n");
3191 return AVERROR_INVALIDDATA;
3194 ret = decode_nal_units(s, gb.buffer, nalsize);
3196 av_log(avctx, AV_LOG_ERROR,
3197 "Decoding nal unit %d %d from hvcC failed\n",
3201 bytestream2_skip(&gb, nalsize);
3205 /* Now store right nal length size, that will be used to parse
3207 s->nal_length_size = nal_len_size;
3210 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3215 /* export stream parameters from the first SPS */
3216 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3217 if (s->ps.sps_list[i]) {
3218 const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3219 export_stream_params(s->avctx, &s->ps, sps);
3227 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3229 HEVCContext *s = avctx->priv_data;
3232 ff_init_cabac_states();
3234 avctx->internal->allocate_progress = 1;
3236 ret = hevc_init_context(avctx);
3240 s->enable_parallel_tiles = 0;
3241 s->picture_struct = 0;
3243 if(avctx->active_thread_type & FF_THREAD_SLICE)
3244 s->threads_number = avctx->thread_count;
3246 s->threads_number = 1;
3248 if (avctx->extradata_size > 0 && avctx->extradata) {
3249 ret = hevc_decode_extradata(s);
3251 hevc_decode_free(avctx);
3256 if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3257 s->threads_type = FF_THREAD_FRAME;
3259 s->threads_type = FF_THREAD_SLICE;
3264 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3266 HEVCContext *s = avctx->priv_data;
3269 memset(s, 0, sizeof(*s));
3271 ret = hevc_init_context(avctx);
3278 static void hevc_decode_flush(AVCodecContext *avctx)
3280 HEVCContext *s = avctx->priv_data;
3281 ff_hevc_flush_dpb(s);
3282 s->max_ra = INT_MAX;
3285 #define OFFSET(x) offsetof(HEVCContext, x)
3286 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3288 static const AVProfile profiles[] = {
3289 { FF_PROFILE_HEVC_MAIN, "Main" },
3290 { FF_PROFILE_HEVC_MAIN_10, "Main 10" },
3291 { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" },
3292 { FF_PROFILE_HEVC_REXT, "Rext" },
3293 { FF_PROFILE_UNKNOWN },
3296 static const AVOption options[] = {
3297 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3298 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3299 { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3300 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3304 static const AVClass hevc_decoder_class = {
3305 .class_name = "HEVC decoder",
3306 .item_name = av_default_item_name,
3308 .version = LIBAVUTIL_VERSION_INT,
3311 AVCodec ff_hevc_decoder = {
3313 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3314 .type = AVMEDIA_TYPE_VIDEO,
3315 .id = AV_CODEC_ID_HEVC,
3316 .priv_data_size = sizeof(HEVCContext),
3317 .priv_class = &hevc_decoder_class,
3318 .init = hevc_decode_init,
3319 .close = hevc_decode_free,
3320 .decode = hevc_decode_frame,
3321 .flush = hevc_decode_flush,
3322 .update_thread_context = hevc_update_thread_context,
3323 .init_thread_copy = hevc_init_thread_copy,
3324 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3325 AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3326 .profiles = NULL_IF_CONFIG_SMALL(profiles),