4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/display.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
45 * NOTE: Each function hls_foo correspond to the function foo in the
46 * specification (HLS stands for High Level Syntax).
53 /* free everything allocated by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
57 av_freep(&s->deblock);
59 av_freep(&s->skip_flag);
60 av_freep(&s->tab_ct_depth);
62 av_freep(&s->tab_ipm);
63 av_freep(&s->cbf_luma);
66 av_freep(&s->qp_y_tab);
67 av_freep(&s->tab_slice_address);
68 av_freep(&s->filter_slice_edges);
70 av_freep(&s->horizontal_bs);
71 av_freep(&s->vertical_bs);
73 av_freep(&s->sh.entry_point_offset);
74 av_freep(&s->sh.size);
75 av_freep(&s->sh.offset);
77 av_buffer_pool_uninit(&s->tab_mvf_pool);
78 av_buffer_pool_uninit(&s->rpl_tab_pool);
81 /* allocate arrays that depend on frame dimensions */
82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 int log2_min_cb_size = sps->log2_min_cb_size;
85 int width = sps->width;
86 int height = sps->height;
87 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
88 ((height >> log2_min_cb_size) + 1);
89 int ctb_count = sps->ctb_width * sps->ctb_height;
90 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
92 s->bs_width = (width >> 2) + 1;
93 s->bs_height = (height >> 2) + 1;
95 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
96 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
97 if (!s->sao || !s->deblock)
100 s->skip_flag = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
101 s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
102 if (!s->skip_flag || !s->tab_ct_depth)
105 s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
106 s->tab_ipm = av_mallocz(min_pu_size);
107 s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
108 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
111 s->filter_slice_edges = av_mallocz(ctb_count);
112 s->tab_slice_address = av_malloc_array(pic_size_in_ctb,
113 sizeof(*s->tab_slice_address));
114 s->qp_y_tab = av_malloc_array(pic_size_in_ctb,
115 sizeof(*s->qp_y_tab));
116 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
119 s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
120 s->vertical_bs = av_mallocz_array(s->bs_width, s->bs_height);
121 if (!s->horizontal_bs || !s->vertical_bs)
124 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
126 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
128 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135 return AVERROR(ENOMEM);
138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 uint8_t luma_weight_l0_flag[16];
143 uint8_t chroma_weight_l0_flag[16];
144 uint8_t luma_weight_l1_flag[16];
145 uint8_t chroma_weight_l1_flag[16];
146 int luma_log2_weight_denom;
148 luma_log2_weight_denom = get_ue_golomb_long(gb);
149 if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
150 av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
151 s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
152 if (s->ps.sps->chroma_format_idc != 0) {
153 int delta = get_se_golomb(gb);
154 s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
157 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
158 luma_weight_l0_flag[i] = get_bits1(gb);
159 if (!luma_weight_l0_flag[i]) {
160 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
161 s->sh.luma_offset_l0[i] = 0;
164 if (s->ps.sps->chroma_format_idc != 0) {
165 for (i = 0; i < s->sh.nb_refs[L0]; i++)
166 chroma_weight_l0_flag[i] = get_bits1(gb);
168 for (i = 0; i < s->sh.nb_refs[L0]; i++)
169 chroma_weight_l0_flag[i] = 0;
171 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
172 if (luma_weight_l0_flag[i]) {
173 int delta_luma_weight_l0 = get_se_golomb(gb);
174 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
175 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
177 if (chroma_weight_l0_flag[i]) {
178 for (j = 0; j < 2; j++) {
179 int delta_chroma_weight_l0 = get_se_golomb(gb);
180 int delta_chroma_offset_l0 = get_se_golomb(gb);
181 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
182 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
183 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
186 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
187 s->sh.chroma_offset_l0[i][0] = 0;
188 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
189 s->sh.chroma_offset_l0[i][1] = 0;
192 if (s->sh.slice_type == B_SLICE) {
193 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
194 luma_weight_l1_flag[i] = get_bits1(gb);
195 if (!luma_weight_l1_flag[i]) {
196 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
197 s->sh.luma_offset_l1[i] = 0;
200 if (s->ps.sps->chroma_format_idc != 0) {
201 for (i = 0; i < s->sh.nb_refs[L1]; i++)
202 chroma_weight_l1_flag[i] = get_bits1(gb);
204 for (i = 0; i < s->sh.nb_refs[L1]; i++)
205 chroma_weight_l1_flag[i] = 0;
207 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
208 if (luma_weight_l1_flag[i]) {
209 int delta_luma_weight_l1 = get_se_golomb(gb);
210 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
211 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
213 if (chroma_weight_l1_flag[i]) {
214 for (j = 0; j < 2; j++) {
215 int delta_chroma_weight_l1 = get_se_golomb(gb);
216 int delta_chroma_offset_l1 = get_se_golomb(gb);
217 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
218 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
219 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
222 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
223 s->sh.chroma_offset_l1[i][0] = 0;
224 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
225 s->sh.chroma_offset_l1[i][1] = 0;
231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
233 const HEVCSPS *sps = s->ps.sps;
234 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
235 int prev_delta_msb = 0;
236 unsigned int nb_sps = 0, nb_sh;
240 if (!sps->long_term_ref_pics_present_flag)
243 if (sps->num_long_term_ref_pics_sps > 0)
244 nb_sps = get_ue_golomb_long(gb);
245 nb_sh = get_ue_golomb_long(gb);
247 if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
248 return AVERROR_INVALIDDATA;
250 rps->nb_refs = nb_sh + nb_sps;
252 for (i = 0; i < rps->nb_refs; i++) {
253 uint8_t delta_poc_msb_present;
256 uint8_t lt_idx_sps = 0;
258 if (sps->num_long_term_ref_pics_sps > 1)
259 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
261 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
262 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
264 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
265 rps->used[i] = get_bits1(gb);
268 delta_poc_msb_present = get_bits1(gb);
269 if (delta_poc_msb_present) {
270 int delta = get_ue_golomb_long(gb);
272 if (i && i != nb_sps)
273 delta += prev_delta_msb;
275 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
276 prev_delta_msb = delta;
283 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
286 const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
287 unsigned int num = 0, den = 0;
289 avctx->pix_fmt = sps->pix_fmt;
290 avctx->coded_width = sps->width;
291 avctx->coded_height = sps->height;
292 avctx->width = sps->output_width;
293 avctx->height = sps->output_height;
294 avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
295 avctx->profile = sps->ptl.general_ptl.profile_idc;
296 avctx->level = sps->ptl.general_ptl.level_idc;
298 ff_set_sar(avctx, sps->vui.sar);
300 if (sps->vui.video_signal_type_present_flag)
301 avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
304 avctx->color_range = AVCOL_RANGE_MPEG;
306 if (sps->vui.colour_description_present_flag) {
307 avctx->color_primaries = sps->vui.colour_primaries;
308 avctx->color_trc = sps->vui.transfer_characteristic;
309 avctx->colorspace = sps->vui.matrix_coeffs;
311 avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
312 avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
313 avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
316 if (vps->vps_timing_info_present_flag) {
317 num = vps->vps_num_units_in_tick;
318 den = vps->vps_time_scale;
319 } else if (sps->vui.vui_timing_info_present_flag) {
320 num = sps->vui.vui_num_units_in_tick;
321 den = sps->vui.vui_time_scale;
324 if (num != 0 && den != 0)
325 av_reduce(&avctx->framerate.den, &avctx->framerate.num,
329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
331 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
332 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
342 ret = pic_arrays_init(s, sps);
346 export_stream_params(s->avctx, &s->ps, sps);
348 if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
349 #if CONFIG_HEVC_DXVA2_HWACCEL
350 *fmt++ = AV_PIX_FMT_DXVA2_VLD;
352 #if CONFIG_HEVC_D3D11VA_HWACCEL
353 *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
355 #if CONFIG_HEVC_VAAPI_HWACCEL
356 *fmt++ = AV_PIX_FMT_VAAPI;
358 #if CONFIG_HEVC_VDPAU_HWACCEL
359 *fmt++ = AV_PIX_FMT_VDPAU;
363 if (pix_fmt == AV_PIX_FMT_NONE) {
364 *fmt++ = sps->pix_fmt;
365 *fmt = AV_PIX_FMT_NONE;
367 ret = ff_thread_get_format(s->avctx, pix_fmts);
370 s->avctx->pix_fmt = ret;
373 s->avctx->pix_fmt = pix_fmt;
376 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
377 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
378 ff_videodsp_init (&s->vdsp, sps->bit_depth);
380 for (i = 0; i < 3; i++) {
381 av_freep(&s->sao_pixel_buffer_h[i]);
382 av_freep(&s->sao_pixel_buffer_v[i]);
385 if (sps->sao_enabled && !s->avctx->hwaccel) {
386 int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
389 for(c_idx = 0; c_idx < c_count; c_idx++) {
390 int w = sps->width >> sps->hshift[c_idx];
391 int h = sps->height >> sps->vshift[c_idx];
392 s->sao_pixel_buffer_h[c_idx] =
393 av_malloc((w * 2 * sps->ctb_height) <<
395 s->sao_pixel_buffer_v[c_idx] =
396 av_malloc((h * 2 * sps->ctb_width) <<
402 s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
412 static int hls_slice_header(HEVCContext *s)
414 GetBitContext *gb = &s->HEVClc->gb;
415 SliceHeader *sh = &s->sh;
419 sh->first_slice_in_pic_flag = get_bits1(gb);
420 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
421 s->seq_decode = (s->seq_decode + 1) & 0xff;
424 ff_hevc_clear_refs(s);
426 sh->no_output_of_prior_pics_flag = 0;
428 sh->no_output_of_prior_pics_flag = get_bits1(gb);
430 sh->pps_id = get_ue_golomb_long(gb);
431 if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
432 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
433 return AVERROR_INVALIDDATA;
435 if (!sh->first_slice_in_pic_flag &&
436 s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
437 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
438 return AVERROR_INVALIDDATA;
440 s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
441 if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
442 sh->no_output_of_prior_pics_flag = 1;
444 if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
445 const HEVCSPS* last_sps = s->ps.sps;
446 s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
447 if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
448 if (s->ps.sps->width != last_sps->width || s->ps.sps->height != last_sps->height ||
449 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
450 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
451 sh->no_output_of_prior_pics_flag = 0;
453 ff_hevc_clear_refs(s);
454 ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
458 s->seq_decode = (s->seq_decode + 1) & 0xff;
462 sh->dependent_slice_segment_flag = 0;
463 if (!sh->first_slice_in_pic_flag) {
464 int slice_address_length;
466 if (s->ps.pps->dependent_slice_segments_enabled_flag)
467 sh->dependent_slice_segment_flag = get_bits1(gb);
469 slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
470 s->ps.sps->ctb_height);
471 sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
472 if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
473 av_log(s->avctx, AV_LOG_ERROR,
474 "Invalid slice segment address: %u.\n",
475 sh->slice_segment_addr);
476 return AVERROR_INVALIDDATA;
479 if (!sh->dependent_slice_segment_flag) {
480 sh->slice_addr = sh->slice_segment_addr;
484 sh->slice_segment_addr = sh->slice_addr = 0;
486 s->slice_initialized = 0;
489 if (!sh->dependent_slice_segment_flag) {
490 s->slice_initialized = 0;
492 for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
493 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
495 sh->slice_type = get_ue_golomb_long(gb);
496 if (!(sh->slice_type == I_SLICE ||
497 sh->slice_type == P_SLICE ||
498 sh->slice_type == B_SLICE)) {
499 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
501 return AVERROR_INVALIDDATA;
503 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
504 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
505 return AVERROR_INVALIDDATA;
508 // when flag is not present, picture is inferred to be output
509 sh->pic_output_flag = 1;
510 if (s->ps.pps->output_flag_present_flag)
511 sh->pic_output_flag = get_bits1(gb);
513 if (s->ps.sps->separate_colour_plane_flag)
514 sh->colour_plane_id = get_bits(gb, 2);
519 sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
520 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
521 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
522 av_log(s->avctx, AV_LOG_WARNING,
523 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
524 if (s->avctx->err_recognition & AV_EF_EXPLODE)
525 return AVERROR_INVALIDDATA;
530 sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
531 pos = get_bits_left(gb);
532 if (!sh->short_term_ref_pic_set_sps_flag) {
533 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
537 sh->short_term_rps = &sh->slice_rps;
539 int numbits, rps_idx;
541 if (!s->ps.sps->nb_st_rps) {
542 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
543 return AVERROR_INVALIDDATA;
546 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
547 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
548 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
550 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
552 pos = get_bits_left(gb);
553 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
555 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
556 if (s->avctx->err_recognition & AV_EF_EXPLODE)
557 return AVERROR_INVALIDDATA;
559 sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
561 if (s->ps.sps->sps_temporal_mvp_enabled_flag)
562 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
564 sh->slice_temporal_mvp_enabled_flag = 0;
566 s->sh.short_term_rps = NULL;
571 if (s->temporal_id == 0 &&
572 s->nal_unit_type != NAL_TRAIL_N &&
573 s->nal_unit_type != NAL_TSA_N &&
574 s->nal_unit_type != NAL_STSA_N &&
575 s->nal_unit_type != NAL_RADL_N &&
576 s->nal_unit_type != NAL_RADL_R &&
577 s->nal_unit_type != NAL_RASL_N &&
578 s->nal_unit_type != NAL_RASL_R)
581 if (s->ps.sps->sao_enabled) {
582 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
583 if (s->ps.sps->chroma_format_idc) {
584 sh->slice_sample_adaptive_offset_flag[1] =
585 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
588 sh->slice_sample_adaptive_offset_flag[0] = 0;
589 sh->slice_sample_adaptive_offset_flag[1] = 0;
590 sh->slice_sample_adaptive_offset_flag[2] = 0;
593 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
594 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
597 sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
598 if (sh->slice_type == B_SLICE)
599 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
601 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
602 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
603 if (sh->slice_type == B_SLICE)
604 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
606 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
607 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
608 sh->nb_refs[L0], sh->nb_refs[L1]);
609 return AVERROR_INVALIDDATA;
612 sh->rpl_modification_flag[0] = 0;
613 sh->rpl_modification_flag[1] = 0;
614 nb_refs = ff_hevc_frame_nb_refs(s);
616 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
617 return AVERROR_INVALIDDATA;
620 if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
621 sh->rpl_modification_flag[0] = get_bits1(gb);
622 if (sh->rpl_modification_flag[0]) {
623 for (i = 0; i < sh->nb_refs[L0]; i++)
624 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
627 if (sh->slice_type == B_SLICE) {
628 sh->rpl_modification_flag[1] = get_bits1(gb);
629 if (sh->rpl_modification_flag[1] == 1)
630 for (i = 0; i < sh->nb_refs[L1]; i++)
631 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
635 if (sh->slice_type == B_SLICE)
636 sh->mvd_l1_zero_flag = get_bits1(gb);
638 if (s->ps.pps->cabac_init_present_flag)
639 sh->cabac_init_flag = get_bits1(gb);
641 sh->cabac_init_flag = 0;
643 sh->collocated_ref_idx = 0;
644 if (sh->slice_temporal_mvp_enabled_flag) {
645 sh->collocated_list = L0;
646 if (sh->slice_type == B_SLICE)
647 sh->collocated_list = !get_bits1(gb);
649 if (sh->nb_refs[sh->collocated_list] > 1) {
650 sh->collocated_ref_idx = get_ue_golomb_long(gb);
651 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
652 av_log(s->avctx, AV_LOG_ERROR,
653 "Invalid collocated_ref_idx: %d.\n",
654 sh->collocated_ref_idx);
655 return AVERROR_INVALIDDATA;
660 if ((s->ps.pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
661 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
662 pred_weight_table(s, gb);
665 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
666 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
667 av_log(s->avctx, AV_LOG_ERROR,
668 "Invalid number of merging MVP candidates: %d.\n",
669 sh->max_num_merge_cand);
670 return AVERROR_INVALIDDATA;
674 sh->slice_qp_delta = get_se_golomb(gb);
676 if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
677 sh->slice_cb_qp_offset = get_se_golomb(gb);
678 sh->slice_cr_qp_offset = get_se_golomb(gb);
680 sh->slice_cb_qp_offset = 0;
681 sh->slice_cr_qp_offset = 0;
684 if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
685 sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
687 sh->cu_chroma_qp_offset_enabled_flag = 0;
689 if (s->ps.pps->deblocking_filter_control_present_flag) {
690 int deblocking_filter_override_flag = 0;
692 if (s->ps.pps->deblocking_filter_override_enabled_flag)
693 deblocking_filter_override_flag = get_bits1(gb);
695 if (deblocking_filter_override_flag) {
696 sh->disable_deblocking_filter_flag = get_bits1(gb);
697 if (!sh->disable_deblocking_filter_flag) {
698 sh->beta_offset = get_se_golomb(gb) * 2;
699 sh->tc_offset = get_se_golomb(gb) * 2;
702 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
703 sh->beta_offset = s->ps.pps->beta_offset;
704 sh->tc_offset = s->ps.pps->tc_offset;
707 sh->disable_deblocking_filter_flag = 0;
712 if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
713 (sh->slice_sample_adaptive_offset_flag[0] ||
714 sh->slice_sample_adaptive_offset_flag[1] ||
715 !sh->disable_deblocking_filter_flag)) {
716 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
718 sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
720 } else if (!s->slice_initialized) {
721 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
722 return AVERROR_INVALIDDATA;
725 sh->num_entry_point_offsets = 0;
726 if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
727 unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
728 // It would be possible to bound this tighter but this here is simpler
729 if (num_entry_point_offsets > get_bits_left(gb)) {
730 av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
731 return AVERROR_INVALIDDATA;
734 sh->num_entry_point_offsets = num_entry_point_offsets;
735 if (sh->num_entry_point_offsets > 0) {
736 int offset_len = get_ue_golomb_long(gb) + 1;
738 if (offset_len < 1 || offset_len > 32) {
739 sh->num_entry_point_offsets = 0;
740 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
741 return AVERROR_INVALIDDATA;
744 av_freep(&sh->entry_point_offset);
745 av_freep(&sh->offset);
747 sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
748 sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
749 sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
750 if (!sh->entry_point_offset || !sh->offset || !sh->size) {
751 sh->num_entry_point_offsets = 0;
752 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
753 return AVERROR(ENOMEM);
755 for (i = 0; i < sh->num_entry_point_offsets; i++) {
756 unsigned val = get_bits_long(gb, offset_len);
757 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
759 if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
760 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
761 s->threads_number = 1;
763 s->enable_parallel_tiles = 0;
765 s->enable_parallel_tiles = 0;
768 if (s->ps.pps->slice_header_extension_present_flag) {
769 unsigned int length = get_ue_golomb_long(gb);
770 if (length*8LL > get_bits_left(gb)) {
771 av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
772 return AVERROR_INVALIDDATA;
774 for (i = 0; i < length; i++)
775 skip_bits(gb, 8); // slice_header_extension_data_byte
778 // Inferred parameters
779 sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
780 if (sh->slice_qp > 51 ||
781 sh->slice_qp < -s->ps.sps->qp_bd_offset) {
782 av_log(s->avctx, AV_LOG_ERROR,
783 "The slice_qp %d is outside the valid range "
786 -s->ps.sps->qp_bd_offset);
787 return AVERROR_INVALIDDATA;
790 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
792 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
793 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
794 return AVERROR_INVALIDDATA;
797 if (get_bits_left(gb) < 0) {
798 av_log(s->avctx, AV_LOG_ERROR,
799 "Overread slice header by %d bits\n", -get_bits_left(gb));
800 return AVERROR_INVALIDDATA;
803 s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
805 if (!s->ps.pps->cu_qp_delta_enabled_flag)
806 s->HEVClc->qp_y = s->sh.slice_qp;
808 s->slice_initialized = 1;
809 s->HEVClc->tu.cu_qp_offset_cb = 0;
810 s->HEVClc->tu.cu_qp_offset_cr = 0;
812 s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == NAL_CRA_NUT && s->last_eos);
817 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
819 #define SET_SAO(elem, value) \
821 if (!sao_merge_up_flag && !sao_merge_left_flag) \
823 else if (sao_merge_left_flag) \
824 sao->elem = CTB(s->sao, rx-1, ry).elem; \
825 else if (sao_merge_up_flag) \
826 sao->elem = CTB(s->sao, rx, ry-1).elem; \
831 static void hls_sao_param(HEVCContext *s, int rx, int ry)
833 HEVCLocalContext *lc = s->HEVClc;
834 int sao_merge_left_flag = 0;
835 int sao_merge_up_flag = 0;
836 SAOParams *sao = &CTB(s->sao, rx, ry);
839 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
840 s->sh.slice_sample_adaptive_offset_flag[1]) {
842 if (lc->ctb_left_flag)
843 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
845 if (ry > 0 && !sao_merge_left_flag) {
847 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
851 for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
852 int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
853 s->ps.pps->log2_sao_offset_scale_chroma;
855 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
856 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
861 sao->type_idx[2] = sao->type_idx[1];
862 sao->eo_class[2] = sao->eo_class[1];
864 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
867 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
870 for (i = 0; i < 4; i++)
871 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
873 if (sao->type_idx[c_idx] == SAO_BAND) {
874 for (i = 0; i < 4; i++) {
875 if (sao->offset_abs[c_idx][i]) {
876 SET_SAO(offset_sign[c_idx][i],
877 ff_hevc_sao_offset_sign_decode(s));
879 sao->offset_sign[c_idx][i] = 0;
882 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
883 } else if (c_idx != 2) {
884 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
887 // Inferred parameters
888 sao->offset_val[c_idx][0] = 0;
889 for (i = 0; i < 4; i++) {
890 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
891 if (sao->type_idx[c_idx] == SAO_EDGE) {
893 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
894 } else if (sao->offset_sign[c_idx][i]) {
895 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
897 sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
905 static int hls_cross_component_pred(HEVCContext *s, int idx) {
906 HEVCLocalContext *lc = s->HEVClc;
907 int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
909 if (log2_res_scale_abs_plus1 != 0) {
910 int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
911 lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
912 (1 - 2 * res_scale_sign_flag);
914 lc->tu.res_scale_val = 0;
921 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
922 int xBase, int yBase, int cb_xBase, int cb_yBase,
923 int log2_cb_size, int log2_trafo_size,
924 int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
926 HEVCLocalContext *lc = s->HEVClc;
927 const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
930 if (lc->cu.pred_mode == MODE_INTRA) {
931 int trafo_size = 1 << log2_trafo_size;
932 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
934 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
937 if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
938 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
939 int scan_idx = SCAN_DIAG;
940 int scan_idx_c = SCAN_DIAG;
941 int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
942 (s->ps.sps->chroma_format_idc == 2 &&
943 (cbf_cb[1] || cbf_cr[1]));
945 if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
946 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
947 if (lc->tu.cu_qp_delta != 0)
948 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
949 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
950 lc->tu.is_cu_qp_delta_coded = 1;
952 if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
953 lc->tu.cu_qp_delta > (25 + s->ps.sps->qp_bd_offset / 2)) {
954 av_log(s->avctx, AV_LOG_ERROR,
955 "The cu_qp_delta %d is outside the valid range "
958 -(26 + s->ps.sps->qp_bd_offset / 2),
959 (25 + s->ps.sps->qp_bd_offset / 2));
960 return AVERROR_INVALIDDATA;
963 ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
966 if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
967 !lc->cu.cu_transquant_bypass_flag && !lc->tu.is_cu_chroma_qp_offset_coded) {
968 int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
969 if (cu_chroma_qp_offset_flag) {
970 int cu_chroma_qp_offset_idx = 0;
971 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
972 cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
973 av_log(s->avctx, AV_LOG_ERROR,
974 "cu_chroma_qp_offset_idx not yet tested.\n");
976 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
977 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
979 lc->tu.cu_qp_offset_cb = 0;
980 lc->tu.cu_qp_offset_cr = 0;
982 lc->tu.is_cu_chroma_qp_offset_coded = 1;
985 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
986 if (lc->tu.intra_pred_mode >= 6 &&
987 lc->tu.intra_pred_mode <= 14) {
988 scan_idx = SCAN_VERT;
989 } else if (lc->tu.intra_pred_mode >= 22 &&
990 lc->tu.intra_pred_mode <= 30) {
991 scan_idx = SCAN_HORIZ;
994 if (lc->tu.intra_pred_mode_c >= 6 &&
995 lc->tu.intra_pred_mode_c <= 14) {
996 scan_idx_c = SCAN_VERT;
997 } else if (lc->tu.intra_pred_mode_c >= 22 &&
998 lc->tu.intra_pred_mode_c <= 30) {
999 scan_idx_c = SCAN_HORIZ;
1003 lc->tu.cross_pf = 0;
1006 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1007 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1008 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1009 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1010 lc->tu.cross_pf = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1011 (lc->cu.pred_mode == MODE_INTER ||
1012 (lc->tu.chroma_mode_c == 4)));
1014 if (lc->tu.cross_pf) {
1015 hls_cross_component_pred(s, 0);
1017 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1018 if (lc->cu.pred_mode == MODE_INTRA) {
1019 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1020 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1023 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1024 log2_trafo_size_c, scan_idx_c, 1);
1026 if (lc->tu.cross_pf) {
1027 ptrdiff_t stride = s->frame->linesize[1];
1028 int hshift = s->ps.sps->hshift[1];
1029 int vshift = s->ps.sps->vshift[1];
1030 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1031 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1032 int size = 1 << log2_trafo_size_c;
1034 uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1035 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1036 for (i = 0; i < (size * size); i++) {
1037 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1039 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1043 if (lc->tu.cross_pf) {
1044 hls_cross_component_pred(s, 1);
1046 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1047 if (lc->cu.pred_mode == MODE_INTRA) {
1048 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1049 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1052 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1053 log2_trafo_size_c, scan_idx_c, 2);
1055 if (lc->tu.cross_pf) {
1056 ptrdiff_t stride = s->frame->linesize[2];
1057 int hshift = s->ps.sps->hshift[2];
1058 int vshift = s->ps.sps->vshift[2];
1059 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1060 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1061 int size = 1 << log2_trafo_size_c;
1063 uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1064 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1065 for (i = 0; i < (size * size); i++) {
1066 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1068 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1071 } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1072 int trafo_size_h = 1 << (log2_trafo_size + 1);
1073 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1074 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1075 if (lc->cu.pred_mode == MODE_INTRA) {
1076 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1077 trafo_size_h, trafo_size_v);
1078 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1081 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1082 log2_trafo_size, scan_idx_c, 1);
1084 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1085 if (lc->cu.pred_mode == MODE_INTRA) {
1086 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1087 trafo_size_h, trafo_size_v);
1088 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1091 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1092 log2_trafo_size, scan_idx_c, 2);
1095 } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1096 if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1097 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1098 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1099 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1100 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1101 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1102 if (s->ps.sps->chroma_format_idc == 2) {
1103 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1104 trafo_size_h, trafo_size_v);
1105 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1106 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1108 } else if (blk_idx == 3) {
1109 int trafo_size_h = 1 << (log2_trafo_size + 1);
1110 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1111 ff_hevc_set_neighbour_available(s, xBase, yBase,
1112 trafo_size_h, trafo_size_v);
1113 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1114 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1115 if (s->ps.sps->chroma_format_idc == 2) {
1116 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1117 trafo_size_h, trafo_size_v);
1118 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1119 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1127 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1129 int cb_size = 1 << log2_cb_size;
1130 int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1132 int min_pu_width = s->ps.sps->min_pu_width;
1133 int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1134 int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1137 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1138 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1139 s->is_pcm[i + j * min_pu_width] = 2;
1142 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1143 int xBase, int yBase, int cb_xBase, int cb_yBase,
1144 int log2_cb_size, int log2_trafo_size,
1145 int trafo_depth, int blk_idx,
1146 const int *base_cbf_cb, const int *base_cbf_cr)
1148 HEVCLocalContext *lc = s->HEVClc;
1149 uint8_t split_transform_flag;
1154 cbf_cb[0] = base_cbf_cb[0];
1155 cbf_cb[1] = base_cbf_cb[1];
1156 cbf_cr[0] = base_cbf_cr[0];
1157 cbf_cr[1] = base_cbf_cr[1];
1159 if (lc->cu.intra_split_flag) {
1160 if (trafo_depth == 1) {
1161 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1162 if (s->ps.sps->chroma_format_idc == 3) {
1163 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1164 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx];
1166 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1167 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1171 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0];
1172 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1173 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1176 if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1177 log2_trafo_size > s->ps.sps->log2_min_tb_size &&
1178 trafo_depth < lc->cu.max_trafo_depth &&
1179 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1180 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1182 int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1183 lc->cu.pred_mode == MODE_INTER &&
1184 lc->cu.part_mode != PART_2Nx2N &&
1187 split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1188 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1192 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1193 if (trafo_depth == 0 || cbf_cb[0]) {
1194 cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1195 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1196 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1200 if (trafo_depth == 0 || cbf_cr[0]) {
1201 cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1202 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1203 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1208 if (split_transform_flag) {
1209 const int trafo_size_split = 1 << (log2_trafo_size - 1);
1210 const int x1 = x0 + trafo_size_split;
1211 const int y1 = y0 + trafo_size_split;
1213 #define SUBDIVIDE(x, y, idx) \
1215 ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1216 log2_trafo_size - 1, trafo_depth + 1, idx, \
1222 SUBDIVIDE(x0, y0, 0);
1223 SUBDIVIDE(x1, y0, 1);
1224 SUBDIVIDE(x0, y1, 2);
1225 SUBDIVIDE(x1, y1, 3);
1229 int min_tu_size = 1 << s->ps.sps->log2_min_tb_size;
1230 int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1231 int min_tu_width = s->ps.sps->min_tb_width;
1234 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1235 cbf_cb[0] || cbf_cr[0] ||
1236 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1237 cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1240 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1241 log2_cb_size, log2_trafo_size,
1242 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1245 // TODO: store cbf_luma somewhere else
1248 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1249 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1250 int x_tu = (x0 + j) >> log2_min_tu_size;
1251 int y_tu = (y0 + i) >> log2_min_tu_size;
1252 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1255 if (!s->sh.disable_deblocking_filter_flag) {
1256 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1257 if (s->ps.pps->transquant_bypass_enable_flag &&
1258 lc->cu.cu_transquant_bypass_flag)
1259 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1265 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1267 HEVCLocalContext *lc = s->HEVClc;
1269 int cb_size = 1 << log2_cb_size;
1270 int stride0 = s->frame->linesize[0];
1271 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1272 int stride1 = s->frame->linesize[1];
1273 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1274 int stride2 = s->frame->linesize[2];
1275 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1277 int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1278 (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1279 ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1280 s->ps.sps->pcm.bit_depth_chroma;
1281 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1284 if (!s->sh.disable_deblocking_filter_flag)
1285 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1287 ret = init_get_bits(&gb, pcm, length);
1291 s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->ps.sps->pcm.bit_depth);
1292 if (s->ps.sps->chroma_format_idc) {
1293 s->hevcdsp.put_pcm(dst1, stride1,
1294 cb_size >> s->ps.sps->hshift[1],
1295 cb_size >> s->ps.sps->vshift[1],
1296 &gb, s->ps.sps->pcm.bit_depth_chroma);
1297 s->hevcdsp.put_pcm(dst2, stride2,
1298 cb_size >> s->ps.sps->hshift[2],
1299 cb_size >> s->ps.sps->vshift[2],
1300 &gb, s->ps.sps->pcm.bit_depth_chroma);
1307 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1309 * @param s HEVC decoding context
1310 * @param dst target buffer for block data at block position
1311 * @param dststride stride of the dst buffer
1312 * @param ref reference picture buffer at origin (0, 0)
1313 * @param mv motion vector (relative to block position) to get pixel data from
1314 * @param x_off horizontal position of block from origin (0, 0)
1315 * @param y_off vertical position of block from origin (0, 0)
1316 * @param block_w width of block
1317 * @param block_h height of block
1318 * @param luma_weight weighting factor applied to the luma prediction
1319 * @param luma_offset additive offset applied to the luma prediction value
1322 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1323 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1324 int block_w, int block_h, int luma_weight, int luma_offset)
1326 HEVCLocalContext *lc = s->HEVClc;
1327 uint8_t *src = ref->data[0];
1328 ptrdiff_t srcstride = ref->linesize[0];
1329 int pic_width = s->ps.sps->width;
1330 int pic_height = s->ps.sps->height;
1333 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1334 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1335 int idx = ff_hevc_pel_weight[block_w];
1337 x_off += mv->x >> 2;
1338 y_off += mv->y >> 2;
1339 src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1341 if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1342 x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1343 y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1344 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1345 int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1346 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1348 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1349 edge_emu_stride, srcstride,
1350 block_w + QPEL_EXTRA,
1351 block_h + QPEL_EXTRA,
1352 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1353 pic_width, pic_height);
1354 src = lc->edge_emu_buffer + buf_offset;
1355 srcstride = edge_emu_stride;
1359 s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1360 block_h, mx, my, block_w);
1362 s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1363 block_h, s->sh.luma_log2_weight_denom,
1364 luma_weight, luma_offset, mx, my, block_w);
1368 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1370 * @param s HEVC decoding context
1371 * @param dst target buffer for block data at block position
1372 * @param dststride stride of the dst buffer
1373 * @param ref0 reference picture0 buffer at origin (0, 0)
1374 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1375 * @param x_off horizontal position of block from origin (0, 0)
1376 * @param y_off vertical position of block from origin (0, 0)
1377 * @param block_w width of block
1378 * @param block_h height of block
1379 * @param ref1 reference picture1 buffer at origin (0, 0)
1380 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1381 * @param current_mv current motion vector structure
1383 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1384 AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1385 int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1387 HEVCLocalContext *lc = s->HEVClc;
1388 ptrdiff_t src0stride = ref0->linesize[0];
1389 ptrdiff_t src1stride = ref1->linesize[0];
1390 int pic_width = s->ps.sps->width;
1391 int pic_height = s->ps.sps->height;
1392 int mx0 = mv0->x & 3;
1393 int my0 = mv0->y & 3;
1394 int mx1 = mv1->x & 3;
1395 int my1 = mv1->y & 3;
1396 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1397 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1398 int x_off0 = x_off + (mv0->x >> 2);
1399 int y_off0 = y_off + (mv0->y >> 2);
1400 int x_off1 = x_off + (mv1->x >> 2);
1401 int y_off1 = y_off + (mv1->y >> 2);
1402 int idx = ff_hevc_pel_weight[block_w];
1404 uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1405 uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1407 if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1408 x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1409 y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1410 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1411 int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1412 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1414 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1415 edge_emu_stride, src0stride,
1416 block_w + QPEL_EXTRA,
1417 block_h + QPEL_EXTRA,
1418 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1419 pic_width, pic_height);
1420 src0 = lc->edge_emu_buffer + buf_offset;
1421 src0stride = edge_emu_stride;
1424 if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1425 x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1426 y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1427 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1428 int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1429 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1431 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1432 edge_emu_stride, src1stride,
1433 block_w + QPEL_EXTRA,
1434 block_h + QPEL_EXTRA,
1435 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1436 pic_width, pic_height);
1437 src1 = lc->edge_emu_buffer2 + buf_offset;
1438 src1stride = edge_emu_stride;
1441 s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1442 block_h, mx0, my0, block_w);
1444 s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1445 block_h, mx1, my1, block_w);
1447 s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1448 block_h, s->sh.luma_log2_weight_denom,
1449 s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1450 s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1451 s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1452 s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1458 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1460 * @param s HEVC decoding context
1461 * @param dst1 target buffer for block data at block position (U plane)
1462 * @param dst2 target buffer for block data at block position (V plane)
1463 * @param dststride stride of the dst1 and dst2 buffers
1464 * @param ref reference picture buffer at origin (0, 0)
1465 * @param mv motion vector (relative to block position) to get pixel data from
1466 * @param x_off horizontal position of block from origin (0, 0)
1467 * @param y_off vertical position of block from origin (0, 0)
1468 * @param block_w width of block
1469 * @param block_h height of block
1470 * @param chroma_weight weighting factor applied to the chroma prediction
1471 * @param chroma_offset additive offset applied to the chroma prediction value
1474 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1475 ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1476 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1478 HEVCLocalContext *lc = s->HEVClc;
1479 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1480 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1481 const Mv *mv = ¤t_mv->mv[reflist];
1482 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1483 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1484 int idx = ff_hevc_pel_weight[block_w];
1485 int hshift = s->ps.sps->hshift[1];
1486 int vshift = s->ps.sps->vshift[1];
1487 intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift);
1488 intptr_t my = av_mod_uintp2(mv->y, 2 + vshift);
1489 intptr_t _mx = mx << (1 - hshift);
1490 intptr_t _my = my << (1 - vshift);
1492 x_off += mv->x >> (2 + hshift);
1493 y_off += mv->y >> (2 + vshift);
1494 src0 += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1496 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1497 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1498 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1499 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1500 int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1501 int buf_offset0 = EPEL_EXTRA_BEFORE *
1502 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1503 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1504 edge_emu_stride, srcstride,
1505 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1506 x_off - EPEL_EXTRA_BEFORE,
1507 y_off - EPEL_EXTRA_BEFORE,
1508 pic_width, pic_height);
1510 src0 = lc->edge_emu_buffer + buf_offset0;
1511 srcstride = edge_emu_stride;
1514 s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1515 block_h, _mx, _my, block_w);
1517 s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1518 block_h, s->sh.chroma_log2_weight_denom,
1519 chroma_weight, chroma_offset, _mx, _my, block_w);
1523 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1525 * @param s HEVC decoding context
1526 * @param dst target buffer for block data at block position
1527 * @param dststride stride of the dst buffer
1528 * @param ref0 reference picture0 buffer at origin (0, 0)
1529 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1530 * @param x_off horizontal position of block from origin (0, 0)
1531 * @param y_off vertical position of block from origin (0, 0)
1532 * @param block_w width of block
1533 * @param block_h height of block
1534 * @param ref1 reference picture1 buffer at origin (0, 0)
1535 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1536 * @param current_mv current motion vector structure
1537 * @param cidx chroma component(cb, cr)
1539 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1540 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1542 HEVCLocalContext *lc = s->HEVClc;
1543 uint8_t *src1 = ref0->data[cidx+1];
1544 uint8_t *src2 = ref1->data[cidx+1];
1545 ptrdiff_t src1stride = ref0->linesize[cidx+1];
1546 ptrdiff_t src2stride = ref1->linesize[cidx+1];
1547 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1548 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1549 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1550 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1551 Mv *mv0 = ¤t_mv->mv[0];
1552 Mv *mv1 = ¤t_mv->mv[1];
1553 int hshift = s->ps.sps->hshift[1];
1554 int vshift = s->ps.sps->vshift[1];
1556 intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1557 intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1558 intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1559 intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1560 intptr_t _mx0 = mx0 << (1 - hshift);
1561 intptr_t _my0 = my0 << (1 - vshift);
1562 intptr_t _mx1 = mx1 << (1 - hshift);
1563 intptr_t _my1 = my1 << (1 - vshift);
1565 int x_off0 = x_off + (mv0->x >> (2 + hshift));
1566 int y_off0 = y_off + (mv0->y >> (2 + vshift));
1567 int x_off1 = x_off + (mv1->x >> (2 + hshift));
1568 int y_off1 = y_off + (mv1->y >> (2 + vshift));
1569 int idx = ff_hevc_pel_weight[block_w];
1570 src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1571 src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1573 if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1574 x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1575 y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1576 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1577 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1578 int buf_offset1 = EPEL_EXTRA_BEFORE *
1579 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1581 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1582 edge_emu_stride, src1stride,
1583 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1584 x_off0 - EPEL_EXTRA_BEFORE,
1585 y_off0 - EPEL_EXTRA_BEFORE,
1586 pic_width, pic_height);
1588 src1 = lc->edge_emu_buffer + buf_offset1;
1589 src1stride = edge_emu_stride;
1592 if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1593 x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1594 y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1595 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1596 int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1597 int buf_offset1 = EPEL_EXTRA_BEFORE *
1598 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1600 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1601 edge_emu_stride, src2stride,
1602 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1603 x_off1 - EPEL_EXTRA_BEFORE,
1604 y_off1 - EPEL_EXTRA_BEFORE,
1605 pic_width, pic_height);
1607 src2 = lc->edge_emu_buffer2 + buf_offset1;
1608 src2stride = edge_emu_stride;
1611 s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1612 block_h, _mx0, _my0, block_w);
1614 s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1615 src2, src2stride, lc->tmp,
1616 block_h, _mx1, _my1, block_w);
1618 s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1619 src2, src2stride, lc->tmp,
1621 s->sh.chroma_log2_weight_denom,
1622 s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1623 s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1624 s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1625 s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1626 _mx1, _my1, block_w);
1629 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1630 const Mv *mv, int y0, int height)
1632 int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1634 if (s->threads_type == FF_THREAD_FRAME )
1635 ff_thread_await_progress(&ref->tf, y, 0);
1638 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1639 int nPbH, int log2_cb_size, int part_idx,
1640 int merge_idx, MvField *mv)
1642 HEVCLocalContext *lc = s->HEVClc;
1643 enum InterPredIdc inter_pred_idc = PRED_L0;
1646 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1648 if (s->sh.slice_type == B_SLICE)
1649 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1651 if (inter_pred_idc != PRED_L1) {
1652 if (s->sh.nb_refs[L0])
1653 mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1655 mv->pred_flag = PF_L0;
1656 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1657 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1658 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1659 part_idx, merge_idx, mv, mvp_flag, 0);
1660 mv->mv[0].x += lc->pu.mvd.x;
1661 mv->mv[0].y += lc->pu.mvd.y;
1664 if (inter_pred_idc != PRED_L0) {
1665 if (s->sh.nb_refs[L1])
1666 mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1668 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1669 AV_ZERO32(&lc->pu.mvd);
1671 ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1674 mv->pred_flag += PF_L1;
1675 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1676 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1677 part_idx, merge_idx, mv, mvp_flag, 1);
1678 mv->mv[1].x += lc->pu.mvd.x;
1679 mv->mv[1].y += lc->pu.mvd.y;
1683 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1685 int log2_cb_size, int partIdx, int idx)
1687 #define POS(c_idx, x, y) \
1688 &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1689 (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1690 HEVCLocalContext *lc = s->HEVClc;
1692 struct MvField current_mv = {{{ 0 }}};
1694 int min_pu_width = s->ps.sps->min_pu_width;
1696 MvField *tab_mvf = s->ref->tab_mvf;
1697 RefPicList *refPicList = s->ref->refPicList;
1698 HEVCFrame *ref0 = NULL, *ref1 = NULL;
1699 uint8_t *dst0 = POS(0, x0, y0);
1700 uint8_t *dst1 = POS(1, x0, y0);
1701 uint8_t *dst2 = POS(2, x0, y0);
1702 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1703 int min_cb_width = s->ps.sps->min_cb_width;
1704 int x_cb = x0 >> log2_min_cb_size;
1705 int y_cb = y0 >> log2_min_cb_size;
1709 int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1712 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1714 if (skip_flag || lc->pu.merge_flag) {
1715 if (s->sh.max_num_merge_cand > 1)
1716 merge_idx = ff_hevc_merge_idx_decode(s);
1720 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1721 partIdx, merge_idx, ¤t_mv);
1723 hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1724 partIdx, merge_idx, ¤t_mv);
1727 x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1728 y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1730 for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1731 for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1732 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1734 if (current_mv.pred_flag & PF_L0) {
1735 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1738 hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH);
1740 if (current_mv.pred_flag & PF_L1) {
1741 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1744 hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH);
1747 if (current_mv.pred_flag == PF_L0) {
1748 int x0_c = x0 >> s->ps.sps->hshift[1];
1749 int y0_c = y0 >> s->ps.sps->vshift[1];
1750 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1751 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1753 luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1754 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1755 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1756 s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1758 if (s->ps.sps->chroma_format_idc) {
1759 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1760 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1761 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1762 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1763 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1764 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1766 } else if (current_mv.pred_flag == PF_L1) {
1767 int x0_c = x0 >> s->ps.sps->hshift[1];
1768 int y0_c = y0 >> s->ps.sps->vshift[1];
1769 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1770 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1772 luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1773 ¤t_mv.mv[1], x0, y0, nPbW, nPbH,
1774 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1775 s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1777 if (s->ps.sps->chroma_format_idc) {
1778 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1779 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1780 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1782 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1783 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1784 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1786 } else if (current_mv.pred_flag == PF_BI) {
1787 int x0_c = x0 >> s->ps.sps->hshift[1];
1788 int y0_c = y0 >> s->ps.sps->vshift[1];
1789 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1790 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1792 luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1793 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1794 ref1->frame, ¤t_mv.mv[1], ¤t_mv);
1796 if (s->ps.sps->chroma_format_idc) {
1797 chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1798 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0);
1800 chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1801 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1);
1809 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1810 int prev_intra_luma_pred_flag)
1812 HEVCLocalContext *lc = s->HEVClc;
1813 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1814 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1815 int min_pu_width = s->ps.sps->min_pu_width;
1816 int size_in_pus = pu_size >> s->ps.sps->log2_min_pu_size;
1817 int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1818 int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1820 int cand_up = (lc->ctb_up_flag || y0b) ?
1821 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1822 int cand_left = (lc->ctb_left_flag || x0b) ?
1823 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1825 int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1827 MvField *tab_mvf = s->ref->tab_mvf;
1828 int intra_pred_mode;
1832 // intra_pred_mode prediction does not cross vertical CTB boundaries
1833 if ((y0 - 1) < y_ctb)
1836 if (cand_left == cand_up) {
1837 if (cand_left < 2) {
1838 candidate[0] = INTRA_PLANAR;
1839 candidate[1] = INTRA_DC;
1840 candidate[2] = INTRA_ANGULAR_26;
1842 candidate[0] = cand_left;
1843 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1844 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1847 candidate[0] = cand_left;
1848 candidate[1] = cand_up;
1849 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1850 candidate[2] = INTRA_PLANAR;
1851 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1852 candidate[2] = INTRA_DC;
1854 candidate[2] = INTRA_ANGULAR_26;
1858 if (prev_intra_luma_pred_flag) {
1859 intra_pred_mode = candidate[lc->pu.mpm_idx];
1861 if (candidate[0] > candidate[1])
1862 FFSWAP(uint8_t, candidate[0], candidate[1]);
1863 if (candidate[0] > candidate[2])
1864 FFSWAP(uint8_t, candidate[0], candidate[2]);
1865 if (candidate[1] > candidate[2])
1866 FFSWAP(uint8_t, candidate[1], candidate[2]);
1868 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1869 for (i = 0; i < 3; i++)
1870 if (intra_pred_mode >= candidate[i])
1874 /* write the intra prediction units into the mv array */
1877 for (i = 0; i < size_in_pus; i++) {
1878 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1879 intra_pred_mode, size_in_pus);
1881 for (j = 0; j < size_in_pus; j++) {
1882 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1886 return intra_pred_mode;
1889 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1890 int log2_cb_size, int ct_depth)
1892 int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1893 int x_cb = x0 >> s->ps.sps->log2_min_cb_size;
1894 int y_cb = y0 >> s->ps.sps->log2_min_cb_size;
1897 for (y = 0; y < length; y++)
1898 memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1902 static const uint8_t tab_mode_idx[] = {
1903 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
1904 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1906 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1909 HEVCLocalContext *lc = s->HEVClc;
1910 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1911 uint8_t prev_intra_luma_pred_flag[4];
1912 int split = lc->cu.part_mode == PART_NxN;
1913 int pb_size = (1 << log2_cb_size) >> split;
1914 int side = split + 1;
1918 for (i = 0; i < side; i++)
1919 for (j = 0; j < side; j++)
1920 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1922 for (i = 0; i < side; i++) {
1923 for (j = 0; j < side; j++) {
1924 if (prev_intra_luma_pred_flag[2 * i + j])
1925 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1927 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1929 lc->pu.intra_pred_mode[2 * i + j] =
1930 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1931 prev_intra_luma_pred_flag[2 * i + j]);
1935 if (s->ps.sps->chroma_format_idc == 3) {
1936 for (i = 0; i < side; i++) {
1937 for (j = 0; j < side; j++) {
1938 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1939 if (chroma_mode != 4) {
1940 if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1941 lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1943 lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1945 lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1949 } else if (s->ps.sps->chroma_format_idc == 2) {
1951 lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1952 if (chroma_mode != 4) {
1953 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1956 mode_idx = intra_chroma_table[chroma_mode];
1958 mode_idx = lc->pu.intra_pred_mode[0];
1960 lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1961 } else if (s->ps.sps->chroma_format_idc != 0) {
1962 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1963 if (chroma_mode != 4) {
1964 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1965 lc->pu.intra_pred_mode_c[0] = 34;
1967 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1969 lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1974 static void intra_prediction_unit_default_value(HEVCContext *s,
1978 HEVCLocalContext *lc = s->HEVClc;
1979 int pb_size = 1 << log2_cb_size;
1980 int size_in_pus = pb_size >> s->ps.sps->log2_min_pu_size;
1981 int min_pu_width = s->ps.sps->min_pu_width;
1982 MvField *tab_mvf = s->ref->tab_mvf;
1983 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1984 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1987 if (size_in_pus == 0)
1989 for (j = 0; j < size_in_pus; j++)
1990 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1991 if (lc->cu.pred_mode == MODE_INTRA)
1992 for (j = 0; j < size_in_pus; j++)
1993 for (k = 0; k < size_in_pus; k++)
1994 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1997 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1999 int cb_size = 1 << log2_cb_size;
2000 HEVCLocalContext *lc = s->HEVClc;
2001 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2002 int length = cb_size >> log2_min_cb_size;
2003 int min_cb_width = s->ps.sps->min_cb_width;
2004 int x_cb = x0 >> log2_min_cb_size;
2005 int y_cb = y0 >> log2_min_cb_size;
2006 int idx = log2_cb_size - 2;
2007 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2012 lc->cu.pred_mode = MODE_INTRA;
2013 lc->cu.part_mode = PART_2Nx2N;
2014 lc->cu.intra_split_flag = 0;
2016 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2017 for (x = 0; x < 4; x++)
2018 lc->pu.intra_pred_mode[x] = 1;
2019 if (s->ps.pps->transquant_bypass_enable_flag) {
2020 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2021 if (lc->cu.cu_transquant_bypass_flag)
2022 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2024 lc->cu.cu_transquant_bypass_flag = 0;
2026 if (s->sh.slice_type != I_SLICE) {
2027 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2029 x = y_cb * min_cb_width + x_cb;
2030 for (y = 0; y < length; y++) {
2031 memset(&s->skip_flag[x], skip_flag, length);
2034 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2036 x = y_cb * min_cb_width + x_cb;
2037 for (y = 0; y < length; y++) {
2038 memset(&s->skip_flag[x], 0, length);
2043 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2044 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2045 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2047 if (!s->sh.disable_deblocking_filter_flag)
2048 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2052 if (s->sh.slice_type != I_SLICE)
2053 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2054 if (lc->cu.pred_mode != MODE_INTRA ||
2055 log2_cb_size == s->ps.sps->log2_min_cb_size) {
2056 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2057 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2058 lc->cu.pred_mode == MODE_INTRA;
2061 if (lc->cu.pred_mode == MODE_INTRA) {
2062 if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2063 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2064 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2065 pcm_flag = ff_hevc_pcm_flag_decode(s);
2068 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2070 if (s->ps.sps->pcm.loop_filter_disable_flag)
2071 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2076 intra_prediction_unit(s, x0, y0, log2_cb_size);
2079 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2080 switch (lc->cu.part_mode) {
2082 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2085 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
2086 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2089 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2090 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2093 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
2094 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2097 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2098 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
2101 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
2102 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2105 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2106 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
2109 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2110 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2111 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2112 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2118 int rqt_root_cbf = 1;
2120 if (lc->cu.pred_mode != MODE_INTRA &&
2121 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2122 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2125 const static int cbf[2] = { 0 };
2126 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2127 s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2128 s->ps.sps->max_transform_hierarchy_depth_inter;
2129 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2131 log2_cb_size, 0, 0, cbf, cbf);
2135 if (!s->sh.disable_deblocking_filter_flag)
2136 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2141 if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2142 ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2144 x = y_cb * min_cb_width + x_cb;
2145 for (y = 0; y < length; y++) {
2146 memset(&s->qp_y_tab[x], lc->qp_y, length);
2150 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2151 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2152 lc->qPy_pred = lc->qp_y;
2155 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2160 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2161 int log2_cb_size, int cb_depth)
2163 HEVCLocalContext *lc = s->HEVClc;
2164 const int cb_size = 1 << log2_cb_size;
2168 lc->ct_depth = cb_depth;
2169 if (x0 + cb_size <= s->ps.sps->width &&
2170 y0 + cb_size <= s->ps.sps->height &&
2171 log2_cb_size > s->ps.sps->log2_min_cb_size) {
2172 split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2174 split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2176 if (s->ps.pps->cu_qp_delta_enabled_flag &&
2177 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2178 lc->tu.is_cu_qp_delta_coded = 0;
2179 lc->tu.cu_qp_delta = 0;
2182 if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2183 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2184 lc->tu.is_cu_chroma_qp_offset_coded = 0;
2188 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2189 const int cb_size_split = cb_size >> 1;
2190 const int x1 = x0 + cb_size_split;
2191 const int y1 = y0 + cb_size_split;
2195 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2199 if (more_data && x1 < s->ps.sps->width) {
2200 more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2204 if (more_data && y1 < s->ps.sps->height) {
2205 more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2209 if (more_data && x1 < s->ps.sps->width &&
2210 y1 < s->ps.sps->height) {
2211 more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2216 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2217 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2218 lc->qPy_pred = lc->qp_y;
2221 return ((x1 + cb_size_split) < s->ps.sps->width ||
2222 (y1 + cb_size_split) < s->ps.sps->height);
2226 ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2229 if ((!((x0 + cb_size) %
2230 (1 << (s->ps.sps->log2_ctb_size))) ||
2231 (x0 + cb_size >= s->ps.sps->width)) &&
2233 (1 << (s->ps.sps->log2_ctb_size))) ||
2234 (y0 + cb_size >= s->ps.sps->height))) {
2235 int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2236 return !end_of_slice_flag;
2245 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2248 HEVCLocalContext *lc = s->HEVClc;
2249 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2250 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2251 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2253 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2255 if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2256 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2257 lc->first_qp_group = 1;
2258 lc->end_of_tiles_x = s->ps.sps->width;
2259 } else if (s->ps.pps->tiles_enabled_flag) {
2260 if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2261 int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2262 lc->end_of_tiles_x = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2263 lc->first_qp_group = 1;
2266 lc->end_of_tiles_x = s->ps.sps->width;
2269 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2271 lc->boundary_flags = 0;
2272 if (s->ps.pps->tiles_enabled_flag) {
2273 if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2274 lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2275 if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2276 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2277 if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2278 lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2279 if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2280 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2282 if (ctb_addr_in_slice <= 0)
2283 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2284 if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2285 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2288 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2289 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2290 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2291 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2294 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2296 HEVCContext *s = avctxt->priv_data;
2297 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2301 int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2303 if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2304 av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2305 return AVERROR_INVALIDDATA;
2308 if (s->sh.dependent_slice_segment_flag) {
2309 int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2310 if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2311 av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2312 return AVERROR_INVALIDDATA;
2316 while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2317 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2319 x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2320 y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2321 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2323 ff_hevc_cabac_init(s, ctb_addr_ts);
2325 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2327 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2328 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2329 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2331 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2332 if (more_data < 0) {
2333 s->tab_slice_address[ctb_addr_rs] = -1;
2339 ff_hevc_save_states(s, ctb_addr_ts);
2340 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2343 if (x_ctb + ctb_size >= s->ps.sps->width &&
2344 y_ctb + ctb_size >= s->ps.sps->height)
2345 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2350 static int hls_slice_data(HEVCContext *s)
2358 s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2361 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2363 HEVCContext *s1 = avctxt->priv_data, *s;
2364 HEVCLocalContext *lc;
2365 int ctb_size = 1<< s1->ps.sps->log2_ctb_size;
2367 int *ctb_row_p = input_ctb_row;
2368 int ctb_row = ctb_row_p[job];
2369 int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2370 int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2371 int thread = ctb_row % s1->threads_number;
2374 s = s1->sList[self_id];
2378 ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2382 ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2385 while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2386 int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2387 int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2389 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2391 ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2393 if (avpriv_atomic_int_get(&s1->wpp_err)){
2394 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2398 ff_hevc_cabac_init(s, ctb_addr_ts);
2399 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2400 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2402 if (more_data < 0) {
2403 s->tab_slice_address[ctb_addr_rs] = -1;
2404 avpriv_atomic_int_set(&s1->wpp_err, 1);
2405 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2411 ff_hevc_save_states(s, ctb_addr_ts);
2412 ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2413 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2415 if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2416 avpriv_atomic_int_set(&s1->wpp_err, 1);
2417 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2421 if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2422 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2423 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2426 ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2429 if(x_ctb >= s->ps.sps->width) {
2433 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2438 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2440 const uint8_t *data = nal->data;
2441 int length = nal->size;
2442 HEVCLocalContext *lc = s->HEVClc;
2443 int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2444 int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2446 int startheader, cmpt = 0;
2452 return AVERROR(ENOMEM);
2457 ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2460 for (i = 1; i < s->threads_number; i++) {
2461 s->sList[i] = av_malloc(sizeof(HEVCContext));
2462 memcpy(s->sList[i], s, sizeof(HEVCContext));
2463 s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2464 s->sList[i]->HEVClc = s->HEVClcList[i];
2468 offset = (lc->gb.index >> 3);
2470 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2471 if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2477 for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2478 offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2479 for (j = 0, cmpt = 0, startheader = offset
2480 + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2481 if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2486 s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2487 s->sh.offset[i - 1] = offset;
2490 if (s->sh.num_entry_point_offsets != 0) {
2491 offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2492 s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2493 s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2498 for (i = 1; i < s->threads_number; i++) {
2499 s->sList[i]->HEVClc->first_qp_group = 1;
2500 s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2501 memcpy(s->sList[i], s, sizeof(HEVCContext));
2502 s->sList[i]->HEVClc = s->HEVClcList[i];
2505 avpriv_atomic_int_set(&s->wpp_err, 0);
2506 ff_reset_entries(s->avctx);
2508 for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2513 if (s->ps.pps->entropy_coding_sync_enabled_flag)
2514 s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2516 for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2523 static int set_side_data(HEVCContext *s)
2525 AVFrame *out = s->ref->frame;
2527 if (s->sei_frame_packing_present &&
2528 s->frame_packing_arrangement_type >= 3 &&
2529 s->frame_packing_arrangement_type <= 5 &&
2530 s->content_interpretation_type > 0 &&
2531 s->content_interpretation_type < 3) {
2532 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2534 return AVERROR(ENOMEM);
2536 switch (s->frame_packing_arrangement_type) {
2538 if (s->quincunx_subsampling)
2539 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2541 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2544 stereo->type = AV_STEREO3D_TOPBOTTOM;
2547 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2551 if (s->content_interpretation_type == 2)
2552 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2555 if (s->sei_display_orientation_present &&
2556 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2557 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2558 AVFrameSideData *rotation = av_frame_new_side_data(out,
2559 AV_FRAME_DATA_DISPLAYMATRIX,
2560 sizeof(int32_t) * 9);
2562 return AVERROR(ENOMEM);
2564 av_display_rotation_set((int32_t *)rotation->data, angle);
2565 av_display_matrix_flip((int32_t *)rotation->data,
2566 s->sei_hflip, s->sei_vflip);
2572 static int hevc_frame_start(HEVCContext *s)
2574 HEVCLocalContext *lc = s->HEVClc;
2575 int pic_size_in_ctb = ((s->ps.sps->width >> s->ps.sps->log2_min_cb_size) + 1) *
2576 ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2579 memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2580 memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
2581 memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2582 memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2583 memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2586 s->first_nal_type = s->nal_unit_type;
2588 if (s->ps.pps->tiles_enabled_flag)
2589 lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2591 ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2595 ret = ff_hevc_frame_rps(s);
2597 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2601 s->ref->frame->key_frame = IS_IRAP(s);
2603 ret = set_side_data(s);
2607 s->frame->pict_type = 3 - s->sh.slice_type;
2610 ff_hevc_bump_frame(s);
2612 av_frame_unref(s->output_frame);
2613 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2617 if (!s->avctx->hwaccel)
2618 ff_thread_finish_setup(s->avctx);
2624 ff_hevc_unref_frame(s, s->ref, ~0);
2629 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2631 HEVCLocalContext *lc = s->HEVClc;
2632 GetBitContext *gb = &lc->gb;
2633 int ctb_addr_ts, ret;
2636 s->nal_unit_type = nal->type;
2637 s->temporal_id = nal->temporal_id;
2639 switch (s->nal_unit_type) {
2641 ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2646 ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2647 s->apply_defdispwin);
2652 ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2656 case NAL_SEI_PREFIX:
2657 case NAL_SEI_SUFFIX:
2658 ret = ff_hevc_decode_nal_sei(s);
2669 case NAL_BLA_W_RADL:
2671 case NAL_IDR_W_RADL:
2678 ret = hls_slice_header(s);
2682 if (s->max_ra == INT_MAX) {
2683 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2687 s->max_ra = INT_MIN;
2691 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2692 s->poc <= s->max_ra) {
2696 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2697 s->max_ra = INT_MIN;
2700 if (s->sh.first_slice_in_pic_flag) {
2701 ret = hevc_frame_start(s);
2704 } else if (!s->ref) {
2705 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2709 if (s->nal_unit_type != s->first_nal_type) {
2710 av_log(s->avctx, AV_LOG_ERROR,
2711 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2712 s->first_nal_type, s->nal_unit_type);
2713 return AVERROR_INVALIDDATA;
2716 if (!s->sh.dependent_slice_segment_flag &&
2717 s->sh.slice_type != I_SLICE) {
2718 ret = ff_hevc_slice_rpl(s);
2720 av_log(s->avctx, AV_LOG_WARNING,
2721 "Error constructing the reference lists for the current slice.\n");
2726 if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2727 ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2732 if (s->avctx->hwaccel) {
2733 ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2737 if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2738 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2740 ctb_addr_ts = hls_slice_data(s);
2741 if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2745 if (ctb_addr_ts < 0) {
2753 s->seq_decode = (s->seq_decode + 1) & 0xff;
2754 s->max_ra = INT_MAX;
2760 av_log(s->avctx, AV_LOG_INFO,
2761 "Skipping NAL unit %d\n", s->nal_unit_type);
2766 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2771 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2776 s->last_eos = s->eos;
2779 /* split the input packet into NAL units, so we know the upper bound on the
2780 * number of slices in the frame */
2781 ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2782 s->nal_length_size);
2784 av_log(s->avctx, AV_LOG_ERROR,
2785 "Error splitting the input into NAL units.\n");
2789 for (i = 0; i < s->pkt.nb_nals; i++) {
2790 if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2791 s->pkt.nals[i].type == NAL_EOS_NUT)
2795 /* decode the NAL units */
2796 for (i = 0; i < s->pkt.nb_nals; i++) {
2797 ret = decode_nal_unit(s, &s->pkt.nals[i]);
2799 av_log(s->avctx, AV_LOG_WARNING,
2800 "Error parsing NAL unit #%d.\n", i);
2806 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2807 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2812 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2815 for (i = 0; i < 16; i++)
2816 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2819 static int verify_md5(HEVCContext *s, AVFrame *frame)
2821 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2826 return AVERROR(EINVAL);
2828 pixel_shift = desc->comp[0].depth > 8;
2830 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2833 /* the checksums are LE, so we have to byteswap for >8bpp formats
2836 if (pixel_shift && !s->checksum_buf) {
2837 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2838 FFMAX3(frame->linesize[0], frame->linesize[1],
2839 frame->linesize[2]));
2840 if (!s->checksum_buf)
2841 return AVERROR(ENOMEM);
2845 for (i = 0; frame->data[i]; i++) {
2846 int width = s->avctx->coded_width;
2847 int height = s->avctx->coded_height;
2848 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2849 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2852 av_md5_init(s->md5_ctx);
2853 for (j = 0; j < h; j++) {
2854 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2857 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2858 (const uint16_t *) src, w);
2859 src = s->checksum_buf;
2862 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2864 av_md5_final(s->md5_ctx, md5);
2866 if (!memcmp(md5, s->md5[i], 16)) {
2867 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2868 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2869 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2871 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2872 print_md5(s->avctx, AV_LOG_ERROR, md5);
2873 av_log (s->avctx, AV_LOG_ERROR, " != ");
2874 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2875 av_log (s->avctx, AV_LOG_ERROR, "\n");
2876 return AVERROR_INVALIDDATA;
2880 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2885 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2889 HEVCContext *s = avctx->priv_data;
2892 ret = ff_hevc_output_frame(s, data, 1);
2901 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2905 if (avctx->hwaccel) {
2906 if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
2907 av_log(avctx, AV_LOG_ERROR,
2908 "hardware accelerator failed to decode picture\n");
2909 ff_hevc_unref_frame(s, s->ref, ~0);
2913 /* verify the SEI checksum */
2914 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2916 ret = verify_md5(s, s->ref->frame);
2917 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2918 ff_hevc_unref_frame(s, s->ref, ~0);
2925 if (s->is_decoded) {
2926 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2930 if (s->output_frame->buf[0]) {
2931 av_frame_move_ref(data, s->output_frame);
2938 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2942 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2946 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2947 if (!dst->tab_mvf_buf)
2949 dst->tab_mvf = src->tab_mvf;
2951 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2952 if (!dst->rpl_tab_buf)
2954 dst->rpl_tab = src->rpl_tab;
2956 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2960 dst->poc = src->poc;
2961 dst->ctb_count = src->ctb_count;
2962 dst->window = src->window;
2963 dst->flags = src->flags;
2964 dst->sequence = src->sequence;
2966 if (src->hwaccel_picture_private) {
2967 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2968 if (!dst->hwaccel_priv_buf)
2970 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2975 ff_hevc_unref_frame(s, dst, ~0);
2976 return AVERROR(ENOMEM);
2979 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2981 HEVCContext *s = avctx->priv_data;
2986 av_freep(&s->md5_ctx);
2988 av_freep(&s->cabac_state);
2990 for (i = 0; i < 3; i++) {
2991 av_freep(&s->sao_pixel_buffer_h[i]);
2992 av_freep(&s->sao_pixel_buffer_v[i]);
2994 av_frame_free(&s->output_frame);
2996 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2997 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2998 av_frame_free(&s->DPB[i].frame);
3001 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
3002 av_buffer_unref(&s->ps.vps_list[i]);
3003 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3004 av_buffer_unref(&s->ps.sps_list[i]);
3005 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3006 av_buffer_unref(&s->ps.pps_list[i]);
3011 av_freep(&s->sh.entry_point_offset);
3012 av_freep(&s->sh.offset);
3013 av_freep(&s->sh.size);
3015 for (i = 1; i < s->threads_number; i++) {
3016 HEVCLocalContext *lc = s->HEVClcList[i];
3018 av_freep(&s->HEVClcList[i]);
3019 av_freep(&s->sList[i]);
3022 if (s->HEVClc == s->HEVClcList[0])
3024 av_freep(&s->HEVClcList[0]);
3026 for (i = 0; i < s->pkt.nals_allocated; i++) {
3027 av_freep(&s->pkt.nals[i].rbsp_buffer);
3028 av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3030 av_freep(&s->pkt.nals);
3031 s->pkt.nals_allocated = 0;
3036 static av_cold int hevc_init_context(AVCodecContext *avctx)
3038 HEVCContext *s = avctx->priv_data;
3043 s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3046 s->HEVClcList[0] = s->HEVClc;
3049 s->cabac_state = av_malloc(HEVC_CONTEXTS);
3050 if (!s->cabac_state)
3053 s->output_frame = av_frame_alloc();
3054 if (!s->output_frame)
3057 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3058 s->DPB[i].frame = av_frame_alloc();
3059 if (!s->DPB[i].frame)
3061 s->DPB[i].tf.f = s->DPB[i].frame;
3064 s->max_ra = INT_MAX;
3066 s->md5_ctx = av_md5_alloc();
3070 ff_bswapdsp_init(&s->bdsp);
3072 s->context_initialized = 1;
3078 hevc_decode_free(avctx);
3079 return AVERROR(ENOMEM);
3082 static int hevc_update_thread_context(AVCodecContext *dst,
3083 const AVCodecContext *src)
3085 HEVCContext *s = dst->priv_data;
3086 HEVCContext *s0 = src->priv_data;
3089 if (!s->context_initialized) {
3090 ret = hevc_init_context(dst);
3095 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3096 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3097 if (s0->DPB[i].frame->buf[0]) {
3098 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3104 if (s->ps.sps != s0->ps.sps)
3106 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3107 av_buffer_unref(&s->ps.vps_list[i]);
3108 if (s0->ps.vps_list[i]) {
3109 s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3110 if (!s->ps.vps_list[i])
3111 return AVERROR(ENOMEM);
3115 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3116 av_buffer_unref(&s->ps.sps_list[i]);
3117 if (s0->ps.sps_list[i]) {
3118 s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3119 if (!s->ps.sps_list[i])
3120 return AVERROR(ENOMEM);
3124 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3125 av_buffer_unref(&s->ps.pps_list[i]);
3126 if (s0->ps.pps_list[i]) {
3127 s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3128 if (!s->ps.pps_list[i])
3129 return AVERROR(ENOMEM);
3133 if (s->ps.sps != s0->ps.sps)
3134 if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3137 s->seq_decode = s0->seq_decode;
3138 s->seq_output = s0->seq_output;
3139 s->pocTid0 = s0->pocTid0;
3140 s->max_ra = s0->max_ra;
3142 s->no_rasl_output_flag = s0->no_rasl_output_flag;
3144 s->is_nalff = s0->is_nalff;
3145 s->nal_length_size = s0->nal_length_size;
3147 s->threads_number = s0->threads_number;
3148 s->threads_type = s0->threads_type;
3151 s->seq_decode = (s->seq_decode + 1) & 0xff;
3152 s->max_ra = INT_MAX;
3158 static int hevc_decode_extradata(HEVCContext *s)
3160 AVCodecContext *avctx = s->avctx;
3164 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3166 if (avctx->extradata_size > 3 &&
3167 (avctx->extradata[0] || avctx->extradata[1] ||
3168 avctx->extradata[2] > 1)) {
3169 /* It seems the extradata is encoded as hvcC format.
3170 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3171 * is finalized. When finalized, configurationVersion will be 1 and we
3172 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3173 int i, j, num_arrays, nal_len_size;
3177 bytestream2_skip(&gb, 21);
3178 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3179 num_arrays = bytestream2_get_byte(&gb);
3181 /* nal units in the hvcC always have length coded with 2 bytes,
3182 * so put a fake nal_length_size = 2 while parsing them */
3183 s->nal_length_size = 2;
3185 /* Decode nal units from hvcC. */
3186 for (i = 0; i < num_arrays; i++) {
3187 int type = bytestream2_get_byte(&gb) & 0x3f;
3188 int cnt = bytestream2_get_be16(&gb);
3190 for (j = 0; j < cnt; j++) {
3191 // +2 for the nal size field
3192 int nalsize = bytestream2_peek_be16(&gb) + 2;
3193 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3194 av_log(s->avctx, AV_LOG_ERROR,
3195 "Invalid NAL unit size in extradata.\n");
3196 return AVERROR_INVALIDDATA;
3199 ret = decode_nal_units(s, gb.buffer, nalsize);
3201 av_log(avctx, AV_LOG_ERROR,
3202 "Decoding nal unit %d %d from hvcC failed\n",
3206 bytestream2_skip(&gb, nalsize);
3210 /* Now store right nal length size, that will be used to parse
3212 s->nal_length_size = nal_len_size;
3215 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3220 /* export stream parameters from the first SPS */
3221 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3222 if (s->ps.sps_list[i]) {
3223 const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3224 export_stream_params(s->avctx, &s->ps, sps);
3232 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3234 HEVCContext *s = avctx->priv_data;
3237 ff_init_cabac_states();
3239 avctx->internal->allocate_progress = 1;
3241 ret = hevc_init_context(avctx);
3245 s->enable_parallel_tiles = 0;
3246 s->picture_struct = 0;
3249 if(avctx->active_thread_type & FF_THREAD_SLICE)
3250 s->threads_number = avctx->thread_count;
3252 s->threads_number = 1;
3254 if (avctx->extradata_size > 0 && avctx->extradata) {
3255 ret = hevc_decode_extradata(s);
3257 hevc_decode_free(avctx);
3262 if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3263 s->threads_type = FF_THREAD_FRAME;
3265 s->threads_type = FF_THREAD_SLICE;
3270 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3272 HEVCContext *s = avctx->priv_data;
3275 memset(s, 0, sizeof(*s));
3277 ret = hevc_init_context(avctx);
3284 static void hevc_decode_flush(AVCodecContext *avctx)
3286 HEVCContext *s = avctx->priv_data;
3287 ff_hevc_flush_dpb(s);
3288 s->max_ra = INT_MAX;
3292 #define OFFSET(x) offsetof(HEVCContext, x)
3293 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3295 static const AVProfile profiles[] = {
3296 { FF_PROFILE_HEVC_MAIN, "Main" },
3297 { FF_PROFILE_HEVC_MAIN_10, "Main 10" },
3298 { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" },
3299 { FF_PROFILE_HEVC_REXT, "Rext" },
3300 { FF_PROFILE_UNKNOWN },
3303 static const AVOption options[] = {
3304 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3305 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3306 { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3307 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3311 static const AVClass hevc_decoder_class = {
3312 .class_name = "HEVC decoder",
3313 .item_name = av_default_item_name,
3315 .version = LIBAVUTIL_VERSION_INT,
3318 AVCodec ff_hevc_decoder = {
3320 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3321 .type = AVMEDIA_TYPE_VIDEO,
3322 .id = AV_CODEC_ID_HEVC,
3323 .priv_data_size = sizeof(HEVCContext),
3324 .priv_class = &hevc_decoder_class,
3325 .init = hevc_decode_init,
3326 .close = hevc_decode_free,
3327 .decode = hevc_decode_frame,
3328 .flush = hevc_decode_flush,
3329 .update_thread_context = hevc_update_thread_context,
3330 .init_thread_copy = hevc_init_thread_copy,
3331 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3332 AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3333 .profiles = NULL_IF_CONFIG_SMALL(profiles),