4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/display.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
45 * NOTE: Each function hls_foo correspond to the function foo in the
46 * specification (HLS stands for High Level Syntax).
53 /* free everything allocated by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
57 av_freep(&s->deblock);
59 av_freep(&s->skip_flag);
60 av_freep(&s->tab_ct_depth);
62 av_freep(&s->tab_ipm);
63 av_freep(&s->cbf_luma);
66 av_freep(&s->qp_y_tab);
67 av_freep(&s->tab_slice_address);
68 av_freep(&s->filter_slice_edges);
70 av_freep(&s->horizontal_bs);
71 av_freep(&s->vertical_bs);
73 av_freep(&s->sh.entry_point_offset);
74 av_freep(&s->sh.size);
75 av_freep(&s->sh.offset);
77 av_buffer_pool_uninit(&s->tab_mvf_pool);
78 av_buffer_pool_uninit(&s->rpl_tab_pool);
81 /* allocate arrays that depend on frame dimensions */
82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 int log2_min_cb_size = sps->log2_min_cb_size;
85 int width = sps->width;
86 int height = sps->height;
87 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
88 ((height >> log2_min_cb_size) + 1);
89 int ctb_count = sps->ctb_width * sps->ctb_height;
90 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
92 s->bs_width = (width >> 2) + 1;
93 s->bs_height = (height >> 2) + 1;
95 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
96 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
97 if (!s->sao || !s->deblock)
100 s->skip_flag = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
101 s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
102 if (!s->skip_flag || !s->tab_ct_depth)
105 s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
106 s->tab_ipm = av_mallocz(min_pu_size);
107 s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
108 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
111 s->filter_slice_edges = av_mallocz(ctb_count);
112 s->tab_slice_address = av_malloc_array(pic_size_in_ctb,
113 sizeof(*s->tab_slice_address));
114 s->qp_y_tab = av_malloc_array(pic_size_in_ctb,
115 sizeof(*s->qp_y_tab));
116 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
119 s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
120 s->vertical_bs = av_mallocz_array(s->bs_width, s->bs_height);
121 if (!s->horizontal_bs || !s->vertical_bs)
124 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
126 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
128 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135 return AVERROR(ENOMEM);
138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 uint8_t luma_weight_l0_flag[16];
143 uint8_t chroma_weight_l0_flag[16];
144 uint8_t luma_weight_l1_flag[16];
145 uint8_t chroma_weight_l1_flag[16];
146 int luma_log2_weight_denom;
148 luma_log2_weight_denom = get_ue_golomb_long(gb);
149 if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
150 av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
151 s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
152 if (s->ps.sps->chroma_format_idc != 0) {
153 int delta = get_se_golomb(gb);
154 s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
157 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
158 luma_weight_l0_flag[i] = get_bits1(gb);
159 if (!luma_weight_l0_flag[i]) {
160 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
161 s->sh.luma_offset_l0[i] = 0;
164 if (s->ps.sps->chroma_format_idc != 0) {
165 for (i = 0; i < s->sh.nb_refs[L0]; i++)
166 chroma_weight_l0_flag[i] = get_bits1(gb);
168 for (i = 0; i < s->sh.nb_refs[L0]; i++)
169 chroma_weight_l0_flag[i] = 0;
171 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
172 if (luma_weight_l0_flag[i]) {
173 int delta_luma_weight_l0 = get_se_golomb(gb);
174 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
175 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
177 if (chroma_weight_l0_flag[i]) {
178 for (j = 0; j < 2; j++) {
179 int delta_chroma_weight_l0 = get_se_golomb(gb);
180 int delta_chroma_offset_l0 = get_se_golomb(gb);
181 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
182 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
183 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
186 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
187 s->sh.chroma_offset_l0[i][0] = 0;
188 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
189 s->sh.chroma_offset_l0[i][1] = 0;
192 if (s->sh.slice_type == B_SLICE) {
193 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
194 luma_weight_l1_flag[i] = get_bits1(gb);
195 if (!luma_weight_l1_flag[i]) {
196 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
197 s->sh.luma_offset_l1[i] = 0;
200 if (s->ps.sps->chroma_format_idc != 0) {
201 for (i = 0; i < s->sh.nb_refs[L1]; i++)
202 chroma_weight_l1_flag[i] = get_bits1(gb);
204 for (i = 0; i < s->sh.nb_refs[L1]; i++)
205 chroma_weight_l1_flag[i] = 0;
207 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
208 if (luma_weight_l1_flag[i]) {
209 int delta_luma_weight_l1 = get_se_golomb(gb);
210 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
211 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
213 if (chroma_weight_l1_flag[i]) {
214 for (j = 0; j < 2; j++) {
215 int delta_chroma_weight_l1 = get_se_golomb(gb);
216 int delta_chroma_offset_l1 = get_se_golomb(gb);
217 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
218 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
219 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
222 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
223 s->sh.chroma_offset_l1[i][0] = 0;
224 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
225 s->sh.chroma_offset_l1[i][1] = 0;
231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
233 const HEVCSPS *sps = s->ps.sps;
234 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
235 int prev_delta_msb = 0;
236 unsigned int nb_sps = 0, nb_sh;
240 if (!sps->long_term_ref_pics_present_flag)
243 if (sps->num_long_term_ref_pics_sps > 0)
244 nb_sps = get_ue_golomb_long(gb);
245 nb_sh = get_ue_golomb_long(gb);
247 if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
248 return AVERROR_INVALIDDATA;
250 rps->nb_refs = nb_sh + nb_sps;
252 for (i = 0; i < rps->nb_refs; i++) {
253 uint8_t delta_poc_msb_present;
256 uint8_t lt_idx_sps = 0;
258 if (sps->num_long_term_ref_pics_sps > 1)
259 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
261 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
262 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
264 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
265 rps->used[i] = get_bits1(gb);
268 delta_poc_msb_present = get_bits1(gb);
269 if (delta_poc_msb_present) {
270 int delta = get_ue_golomb_long(gb);
272 if (i && i != nb_sps)
273 delta += prev_delta_msb;
275 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
276 prev_delta_msb = delta;
283 static void export_stream_params(AVCodecContext *avctx,
284 const HEVCContext *s, const HEVCSPS *sps)
286 const HEVCVPS *vps = (const HEVCVPS*)s->ps.vps_list[sps->vps_id]->data;
287 unsigned int num = 0, den = 0;
289 avctx->pix_fmt = sps->pix_fmt;
290 avctx->coded_width = sps->width;
291 avctx->coded_height = sps->height;
292 avctx->width = sps->output_width;
293 avctx->height = sps->output_height;
294 avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
295 avctx->profile = sps->ptl.general_ptl.profile_idc;
296 avctx->level = sps->ptl.general_ptl.level_idc;
298 ff_set_sar(avctx, sps->vui.sar);
300 if (sps->vui.video_signal_type_present_flag)
301 avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
304 avctx->color_range = AVCOL_RANGE_MPEG;
306 if (sps->vui.colour_description_present_flag) {
307 avctx->color_primaries = sps->vui.colour_primaries;
308 avctx->color_trc = sps->vui.transfer_characteristic;
309 avctx->colorspace = sps->vui.matrix_coeffs;
311 avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
312 avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
313 avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
316 if (vps->vps_timing_info_present_flag) {
317 num = vps->vps_num_units_in_tick;
318 den = vps->vps_time_scale;
319 } else if (sps->vui.vui_timing_info_present_flag) {
320 num = sps->vui.vui_num_units_in_tick;
321 den = sps->vui.vui_time_scale;
324 if (num != 0 && den != 0)
325 av_reduce(&avctx->framerate.den, &avctx->framerate.num,
329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
331 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
332 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
335 export_stream_params(s->avctx, s, sps);
338 ret = pic_arrays_init(s, sps);
342 if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
343 #if CONFIG_HEVC_DXVA2_HWACCEL
344 *fmt++ = AV_PIX_FMT_DXVA2_VLD;
346 #if CONFIG_HEVC_D3D11VA_HWACCEL
347 *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
349 #if CONFIG_HEVC_VDPAU_HWACCEL
350 *fmt++ = AV_PIX_FMT_VDPAU;
354 if (pix_fmt == AV_PIX_FMT_NONE) {
355 *fmt++ = sps->pix_fmt;
356 *fmt = AV_PIX_FMT_NONE;
358 ret = ff_thread_get_format(s->avctx, pix_fmts);
361 s->avctx->pix_fmt = ret;
364 s->avctx->pix_fmt = pix_fmt;
367 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
368 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
369 ff_videodsp_init (&s->vdsp, sps->bit_depth);
371 for (i = 0; i < 3; i++) {
372 av_freep(&s->sao_pixel_buffer_h[i]);
373 av_freep(&s->sao_pixel_buffer_v[i]);
376 if (sps->sao_enabled && !s->avctx->hwaccel) {
377 int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
380 for(c_idx = 0; c_idx < c_count; c_idx++) {
381 int w = sps->width >> sps->hshift[c_idx];
382 int h = sps->height >> sps->vshift[c_idx];
383 s->sao_pixel_buffer_h[c_idx] =
384 av_malloc((w * 2 * sps->ctb_height) <<
386 s->sao_pixel_buffer_v[c_idx] =
387 av_malloc((h * 2 * sps->ctb_width) <<
393 s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
403 static int hls_slice_header(HEVCContext *s)
405 GetBitContext *gb = &s->HEVClc->gb;
406 SliceHeader *sh = &s->sh;
410 sh->first_slice_in_pic_flag = get_bits1(gb);
411 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
412 s->seq_decode = (s->seq_decode + 1) & 0xff;
415 ff_hevc_clear_refs(s);
417 sh->no_output_of_prior_pics_flag = 0;
419 sh->no_output_of_prior_pics_flag = get_bits1(gb);
421 sh->pps_id = get_ue_golomb_long(gb);
422 if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
423 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
424 return AVERROR_INVALIDDATA;
426 if (!sh->first_slice_in_pic_flag &&
427 s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
428 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
429 return AVERROR_INVALIDDATA;
431 s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
432 if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
433 sh->no_output_of_prior_pics_flag = 1;
435 if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
436 const HEVCSPS* last_sps = s->ps.sps;
437 s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
438 if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
439 if (s->ps.sps->width != last_sps->width || s->ps.sps->height != last_sps->height ||
440 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
441 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
442 sh->no_output_of_prior_pics_flag = 0;
444 ff_hevc_clear_refs(s);
445 ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
449 s->seq_decode = (s->seq_decode + 1) & 0xff;
453 sh->dependent_slice_segment_flag = 0;
454 if (!sh->first_slice_in_pic_flag) {
455 int slice_address_length;
457 if (s->ps.pps->dependent_slice_segments_enabled_flag)
458 sh->dependent_slice_segment_flag = get_bits1(gb);
460 slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
461 s->ps.sps->ctb_height);
462 sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
463 if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
464 av_log(s->avctx, AV_LOG_ERROR,
465 "Invalid slice segment address: %u.\n",
466 sh->slice_segment_addr);
467 return AVERROR_INVALIDDATA;
470 if (!sh->dependent_slice_segment_flag) {
471 sh->slice_addr = sh->slice_segment_addr;
475 sh->slice_segment_addr = sh->slice_addr = 0;
477 s->slice_initialized = 0;
480 if (!sh->dependent_slice_segment_flag) {
481 s->slice_initialized = 0;
483 for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
484 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
486 sh->slice_type = get_ue_golomb_long(gb);
487 if (!(sh->slice_type == I_SLICE ||
488 sh->slice_type == P_SLICE ||
489 sh->slice_type == B_SLICE)) {
490 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
492 return AVERROR_INVALIDDATA;
494 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
495 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
496 return AVERROR_INVALIDDATA;
499 // when flag is not present, picture is inferred to be output
500 sh->pic_output_flag = 1;
501 if (s->ps.pps->output_flag_present_flag)
502 sh->pic_output_flag = get_bits1(gb);
504 if (s->ps.sps->separate_colour_plane_flag)
505 sh->colour_plane_id = get_bits(gb, 2);
510 sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
511 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
512 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
513 av_log(s->avctx, AV_LOG_WARNING,
514 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
515 if (s->avctx->err_recognition & AV_EF_EXPLODE)
516 return AVERROR_INVALIDDATA;
521 sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
522 pos = get_bits_left(gb);
523 if (!sh->short_term_ref_pic_set_sps_flag) {
524 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
528 sh->short_term_rps = &sh->slice_rps;
530 int numbits, rps_idx;
532 if (!s->ps.sps->nb_st_rps) {
533 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
534 return AVERROR_INVALIDDATA;
537 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
538 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
539 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
541 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
543 pos = get_bits_left(gb);
544 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
546 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
547 if (s->avctx->err_recognition & AV_EF_EXPLODE)
548 return AVERROR_INVALIDDATA;
550 sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
552 if (s->ps.sps->sps_temporal_mvp_enabled_flag)
553 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
555 sh->slice_temporal_mvp_enabled_flag = 0;
557 s->sh.short_term_rps = NULL;
562 if (s->temporal_id == 0 &&
563 s->nal_unit_type != NAL_TRAIL_N &&
564 s->nal_unit_type != NAL_TSA_N &&
565 s->nal_unit_type != NAL_STSA_N &&
566 s->nal_unit_type != NAL_RADL_N &&
567 s->nal_unit_type != NAL_RADL_R &&
568 s->nal_unit_type != NAL_RASL_N &&
569 s->nal_unit_type != NAL_RASL_R)
572 if (s->ps.sps->sao_enabled) {
573 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
574 if (s->ps.sps->chroma_format_idc) {
575 sh->slice_sample_adaptive_offset_flag[1] =
576 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
579 sh->slice_sample_adaptive_offset_flag[0] = 0;
580 sh->slice_sample_adaptive_offset_flag[1] = 0;
581 sh->slice_sample_adaptive_offset_flag[2] = 0;
584 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
585 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
588 sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
589 if (sh->slice_type == B_SLICE)
590 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
592 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
593 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
594 if (sh->slice_type == B_SLICE)
595 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
597 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
598 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
599 sh->nb_refs[L0], sh->nb_refs[L1]);
600 return AVERROR_INVALIDDATA;
603 sh->rpl_modification_flag[0] = 0;
604 sh->rpl_modification_flag[1] = 0;
605 nb_refs = ff_hevc_frame_nb_refs(s);
607 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
608 return AVERROR_INVALIDDATA;
611 if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
612 sh->rpl_modification_flag[0] = get_bits1(gb);
613 if (sh->rpl_modification_flag[0]) {
614 for (i = 0; i < sh->nb_refs[L0]; i++)
615 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
618 if (sh->slice_type == B_SLICE) {
619 sh->rpl_modification_flag[1] = get_bits1(gb);
620 if (sh->rpl_modification_flag[1] == 1)
621 for (i = 0; i < sh->nb_refs[L1]; i++)
622 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
626 if (sh->slice_type == B_SLICE)
627 sh->mvd_l1_zero_flag = get_bits1(gb);
629 if (s->ps.pps->cabac_init_present_flag)
630 sh->cabac_init_flag = get_bits1(gb);
632 sh->cabac_init_flag = 0;
634 sh->collocated_ref_idx = 0;
635 if (sh->slice_temporal_mvp_enabled_flag) {
636 sh->collocated_list = L0;
637 if (sh->slice_type == B_SLICE)
638 sh->collocated_list = !get_bits1(gb);
640 if (sh->nb_refs[sh->collocated_list] > 1) {
641 sh->collocated_ref_idx = get_ue_golomb_long(gb);
642 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
643 av_log(s->avctx, AV_LOG_ERROR,
644 "Invalid collocated_ref_idx: %d.\n",
645 sh->collocated_ref_idx);
646 return AVERROR_INVALIDDATA;
651 if ((s->ps.pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
652 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
653 pred_weight_table(s, gb);
656 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
657 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
658 av_log(s->avctx, AV_LOG_ERROR,
659 "Invalid number of merging MVP candidates: %d.\n",
660 sh->max_num_merge_cand);
661 return AVERROR_INVALIDDATA;
665 sh->slice_qp_delta = get_se_golomb(gb);
667 if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
668 sh->slice_cb_qp_offset = get_se_golomb(gb);
669 sh->slice_cr_qp_offset = get_se_golomb(gb);
671 sh->slice_cb_qp_offset = 0;
672 sh->slice_cr_qp_offset = 0;
675 if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
676 sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
678 sh->cu_chroma_qp_offset_enabled_flag = 0;
680 if (s->ps.pps->deblocking_filter_control_present_flag) {
681 int deblocking_filter_override_flag = 0;
683 if (s->ps.pps->deblocking_filter_override_enabled_flag)
684 deblocking_filter_override_flag = get_bits1(gb);
686 if (deblocking_filter_override_flag) {
687 sh->disable_deblocking_filter_flag = get_bits1(gb);
688 if (!sh->disable_deblocking_filter_flag) {
689 sh->beta_offset = get_se_golomb(gb) * 2;
690 sh->tc_offset = get_se_golomb(gb) * 2;
693 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
694 sh->beta_offset = s->ps.pps->beta_offset;
695 sh->tc_offset = s->ps.pps->tc_offset;
698 sh->disable_deblocking_filter_flag = 0;
703 if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
704 (sh->slice_sample_adaptive_offset_flag[0] ||
705 sh->slice_sample_adaptive_offset_flag[1] ||
706 !sh->disable_deblocking_filter_flag)) {
707 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
709 sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
711 } else if (!s->slice_initialized) {
712 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
713 return AVERROR_INVALIDDATA;
716 sh->num_entry_point_offsets = 0;
717 if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
718 unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
719 // It would be possible to bound this tighter but this here is simpler
720 if (num_entry_point_offsets > get_bits_left(gb)) {
721 av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
722 return AVERROR_INVALIDDATA;
725 sh->num_entry_point_offsets = num_entry_point_offsets;
726 if (sh->num_entry_point_offsets > 0) {
727 int offset_len = get_ue_golomb_long(gb) + 1;
729 if (offset_len < 1 || offset_len > 32) {
730 sh->num_entry_point_offsets = 0;
731 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
732 return AVERROR_INVALIDDATA;
735 av_freep(&sh->entry_point_offset);
736 av_freep(&sh->offset);
738 sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
739 sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
740 sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
741 if (!sh->entry_point_offset || !sh->offset || !sh->size) {
742 sh->num_entry_point_offsets = 0;
743 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
744 return AVERROR(ENOMEM);
746 for (i = 0; i < sh->num_entry_point_offsets; i++) {
747 unsigned val = get_bits_long(gb, offset_len);
748 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
750 if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
751 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
752 s->threads_number = 1;
754 s->enable_parallel_tiles = 0;
756 s->enable_parallel_tiles = 0;
759 if (s->ps.pps->slice_header_extension_present_flag) {
760 unsigned int length = get_ue_golomb_long(gb);
761 if (length*8LL > get_bits_left(gb)) {
762 av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
763 return AVERROR_INVALIDDATA;
765 for (i = 0; i < length; i++)
766 skip_bits(gb, 8); // slice_header_extension_data_byte
769 // Inferred parameters
770 sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
771 if (sh->slice_qp > 51 ||
772 sh->slice_qp < -s->ps.sps->qp_bd_offset) {
773 av_log(s->avctx, AV_LOG_ERROR,
774 "The slice_qp %d is outside the valid range "
777 -s->ps.sps->qp_bd_offset);
778 return AVERROR_INVALIDDATA;
781 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
783 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
784 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
785 return AVERROR_INVALIDDATA;
788 if (get_bits_left(gb) < 0) {
789 av_log(s->avctx, AV_LOG_ERROR,
790 "Overread slice header by %d bits\n", -get_bits_left(gb));
791 return AVERROR_INVALIDDATA;
794 s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
796 if (!s->ps.pps->cu_qp_delta_enabled_flag)
797 s->HEVClc->qp_y = s->sh.slice_qp;
799 s->slice_initialized = 1;
800 s->HEVClc->tu.cu_qp_offset_cb = 0;
801 s->HEVClc->tu.cu_qp_offset_cr = 0;
806 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
808 #define SET_SAO(elem, value) \
810 if (!sao_merge_up_flag && !sao_merge_left_flag) \
812 else if (sao_merge_left_flag) \
813 sao->elem = CTB(s->sao, rx-1, ry).elem; \
814 else if (sao_merge_up_flag) \
815 sao->elem = CTB(s->sao, rx, ry-1).elem; \
820 static void hls_sao_param(HEVCContext *s, int rx, int ry)
822 HEVCLocalContext *lc = s->HEVClc;
823 int sao_merge_left_flag = 0;
824 int sao_merge_up_flag = 0;
825 SAOParams *sao = &CTB(s->sao, rx, ry);
828 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
829 s->sh.slice_sample_adaptive_offset_flag[1]) {
831 if (lc->ctb_left_flag)
832 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
834 if (ry > 0 && !sao_merge_left_flag) {
836 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
840 for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
841 int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
842 s->ps.pps->log2_sao_offset_scale_chroma;
844 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
845 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
850 sao->type_idx[2] = sao->type_idx[1];
851 sao->eo_class[2] = sao->eo_class[1];
853 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
856 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
859 for (i = 0; i < 4; i++)
860 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
862 if (sao->type_idx[c_idx] == SAO_BAND) {
863 for (i = 0; i < 4; i++) {
864 if (sao->offset_abs[c_idx][i]) {
865 SET_SAO(offset_sign[c_idx][i],
866 ff_hevc_sao_offset_sign_decode(s));
868 sao->offset_sign[c_idx][i] = 0;
871 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
872 } else if (c_idx != 2) {
873 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
876 // Inferred parameters
877 sao->offset_val[c_idx][0] = 0;
878 for (i = 0; i < 4; i++) {
879 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
880 if (sao->type_idx[c_idx] == SAO_EDGE) {
882 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
883 } else if (sao->offset_sign[c_idx][i]) {
884 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
886 sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
894 static int hls_cross_component_pred(HEVCContext *s, int idx) {
895 HEVCLocalContext *lc = s->HEVClc;
896 int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
898 if (log2_res_scale_abs_plus1 != 0) {
899 int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
900 lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
901 (1 - 2 * res_scale_sign_flag);
903 lc->tu.res_scale_val = 0;
910 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
911 int xBase, int yBase, int cb_xBase, int cb_yBase,
912 int log2_cb_size, int log2_trafo_size,
913 int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
915 HEVCLocalContext *lc = s->HEVClc;
916 const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
919 if (lc->cu.pred_mode == MODE_INTRA) {
920 int trafo_size = 1 << log2_trafo_size;
921 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
923 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
926 if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
927 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
928 int scan_idx = SCAN_DIAG;
929 int scan_idx_c = SCAN_DIAG;
930 int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
931 (s->ps.sps->chroma_format_idc == 2 &&
932 (cbf_cb[1] || cbf_cr[1]));
934 if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
935 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
936 if (lc->tu.cu_qp_delta != 0)
937 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
938 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
939 lc->tu.is_cu_qp_delta_coded = 1;
941 if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
942 lc->tu.cu_qp_delta > (25 + s->ps.sps->qp_bd_offset / 2)) {
943 av_log(s->avctx, AV_LOG_ERROR,
944 "The cu_qp_delta %d is outside the valid range "
947 -(26 + s->ps.sps->qp_bd_offset / 2),
948 (25 + s->ps.sps->qp_bd_offset / 2));
949 return AVERROR_INVALIDDATA;
952 ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
955 if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
956 !lc->cu.cu_transquant_bypass_flag && !lc->tu.is_cu_chroma_qp_offset_coded) {
957 int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
958 if (cu_chroma_qp_offset_flag) {
959 int cu_chroma_qp_offset_idx = 0;
960 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
961 cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
962 av_log(s->avctx, AV_LOG_ERROR,
963 "cu_chroma_qp_offset_idx not yet tested.\n");
965 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
966 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
968 lc->tu.cu_qp_offset_cb = 0;
969 lc->tu.cu_qp_offset_cr = 0;
971 lc->tu.is_cu_chroma_qp_offset_coded = 1;
974 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
975 if (lc->tu.intra_pred_mode >= 6 &&
976 lc->tu.intra_pred_mode <= 14) {
977 scan_idx = SCAN_VERT;
978 } else if (lc->tu.intra_pred_mode >= 22 &&
979 lc->tu.intra_pred_mode <= 30) {
980 scan_idx = SCAN_HORIZ;
983 if (lc->tu.intra_pred_mode_c >= 6 &&
984 lc->tu.intra_pred_mode_c <= 14) {
985 scan_idx_c = SCAN_VERT;
986 } else if (lc->tu.intra_pred_mode_c >= 22 &&
987 lc->tu.intra_pred_mode_c <= 30) {
988 scan_idx_c = SCAN_HORIZ;
995 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
996 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
997 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
998 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
999 lc->tu.cross_pf = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1000 (lc->cu.pred_mode == MODE_INTER ||
1001 (lc->tu.chroma_mode_c == 4)));
1003 if (lc->tu.cross_pf) {
1004 hls_cross_component_pred(s, 0);
1006 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1007 if (lc->cu.pred_mode == MODE_INTRA) {
1008 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1009 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1012 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1013 log2_trafo_size_c, scan_idx_c, 1);
1015 if (lc->tu.cross_pf) {
1016 ptrdiff_t stride = s->frame->linesize[1];
1017 int hshift = s->ps.sps->hshift[1];
1018 int vshift = s->ps.sps->vshift[1];
1019 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1020 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1021 int size = 1 << log2_trafo_size_c;
1023 uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1024 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1025 for (i = 0; i < (size * size); i++) {
1026 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1028 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1032 if (lc->tu.cross_pf) {
1033 hls_cross_component_pred(s, 1);
1035 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1036 if (lc->cu.pred_mode == MODE_INTRA) {
1037 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1038 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1041 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1042 log2_trafo_size_c, scan_idx_c, 2);
1044 if (lc->tu.cross_pf) {
1045 ptrdiff_t stride = s->frame->linesize[2];
1046 int hshift = s->ps.sps->hshift[2];
1047 int vshift = s->ps.sps->vshift[2];
1048 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1049 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1050 int size = 1 << log2_trafo_size_c;
1052 uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1053 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1054 for (i = 0; i < (size * size); i++) {
1055 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1057 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1060 } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1061 int trafo_size_h = 1 << (log2_trafo_size + 1);
1062 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1063 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1064 if (lc->cu.pred_mode == MODE_INTRA) {
1065 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1066 trafo_size_h, trafo_size_v);
1067 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1070 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1071 log2_trafo_size, scan_idx_c, 1);
1073 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1074 if (lc->cu.pred_mode == MODE_INTRA) {
1075 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1076 trafo_size_h, trafo_size_v);
1077 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1080 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1081 log2_trafo_size, scan_idx_c, 2);
1084 } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1085 if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1086 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1087 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1088 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1089 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1090 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1091 if (s->ps.sps->chroma_format_idc == 2) {
1092 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1093 trafo_size_h, trafo_size_v);
1094 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1095 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1097 } else if (blk_idx == 3) {
1098 int trafo_size_h = 1 << (log2_trafo_size + 1);
1099 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1100 ff_hevc_set_neighbour_available(s, xBase, yBase,
1101 trafo_size_h, trafo_size_v);
1102 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1103 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1104 if (s->ps.sps->chroma_format_idc == 2) {
1105 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1106 trafo_size_h, trafo_size_v);
1107 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1108 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1116 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1118 int cb_size = 1 << log2_cb_size;
1119 int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1121 int min_pu_width = s->ps.sps->min_pu_width;
1122 int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1123 int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1126 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1127 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1128 s->is_pcm[i + j * min_pu_width] = 2;
1131 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1132 int xBase, int yBase, int cb_xBase, int cb_yBase,
1133 int log2_cb_size, int log2_trafo_size,
1134 int trafo_depth, int blk_idx,
1135 const int *base_cbf_cb, const int *base_cbf_cr)
1137 HEVCLocalContext *lc = s->HEVClc;
1138 uint8_t split_transform_flag;
1143 cbf_cb[0] = base_cbf_cb[0];
1144 cbf_cb[1] = base_cbf_cb[1];
1145 cbf_cr[0] = base_cbf_cr[0];
1146 cbf_cr[1] = base_cbf_cr[1];
1148 if (lc->cu.intra_split_flag) {
1149 if (trafo_depth == 1) {
1150 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1151 if (s->ps.sps->chroma_format_idc == 3) {
1152 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1153 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx];
1155 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1156 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1160 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0];
1161 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1162 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1165 if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1166 log2_trafo_size > s->ps.sps->log2_min_tb_size &&
1167 trafo_depth < lc->cu.max_trafo_depth &&
1168 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1169 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1171 int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1172 lc->cu.pred_mode == MODE_INTER &&
1173 lc->cu.part_mode != PART_2Nx2N &&
1176 split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1177 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1181 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1182 if (trafo_depth == 0 || cbf_cb[0]) {
1183 cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1184 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1185 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1189 if (trafo_depth == 0 || cbf_cr[0]) {
1190 cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1191 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1192 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1197 if (split_transform_flag) {
1198 const int trafo_size_split = 1 << (log2_trafo_size - 1);
1199 const int x1 = x0 + trafo_size_split;
1200 const int y1 = y0 + trafo_size_split;
1202 #define SUBDIVIDE(x, y, idx) \
1204 ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1205 log2_trafo_size - 1, trafo_depth + 1, idx, \
1211 SUBDIVIDE(x0, y0, 0);
1212 SUBDIVIDE(x1, y0, 1);
1213 SUBDIVIDE(x0, y1, 2);
1214 SUBDIVIDE(x1, y1, 3);
1218 int min_tu_size = 1 << s->ps.sps->log2_min_tb_size;
1219 int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1220 int min_tu_width = s->ps.sps->min_tb_width;
1223 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1224 cbf_cb[0] || cbf_cr[0] ||
1225 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1226 cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1229 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1230 log2_cb_size, log2_trafo_size,
1231 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1234 // TODO: store cbf_luma somewhere else
1237 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1238 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1239 int x_tu = (x0 + j) >> log2_min_tu_size;
1240 int y_tu = (y0 + i) >> log2_min_tu_size;
1241 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1244 if (!s->sh.disable_deblocking_filter_flag) {
1245 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1246 if (s->ps.pps->transquant_bypass_enable_flag &&
1247 lc->cu.cu_transquant_bypass_flag)
1248 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1254 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1256 HEVCLocalContext *lc = s->HEVClc;
1258 int cb_size = 1 << log2_cb_size;
1259 int stride0 = s->frame->linesize[0];
1260 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1261 int stride1 = s->frame->linesize[1];
1262 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1263 int stride2 = s->frame->linesize[2];
1264 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1266 int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1267 (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1268 ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1269 s->ps.sps->pcm.bit_depth_chroma;
1270 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1273 if (!s->sh.disable_deblocking_filter_flag)
1274 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1276 ret = init_get_bits(&gb, pcm, length);
1280 s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->ps.sps->pcm.bit_depth);
1281 if (s->ps.sps->chroma_format_idc) {
1282 s->hevcdsp.put_pcm(dst1, stride1,
1283 cb_size >> s->ps.sps->hshift[1],
1284 cb_size >> s->ps.sps->vshift[1],
1285 &gb, s->ps.sps->pcm.bit_depth_chroma);
1286 s->hevcdsp.put_pcm(dst2, stride2,
1287 cb_size >> s->ps.sps->hshift[2],
1288 cb_size >> s->ps.sps->vshift[2],
1289 &gb, s->ps.sps->pcm.bit_depth_chroma);
1296 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1298 * @param s HEVC decoding context
1299 * @param dst target buffer for block data at block position
1300 * @param dststride stride of the dst buffer
1301 * @param ref reference picture buffer at origin (0, 0)
1302 * @param mv motion vector (relative to block position) to get pixel data from
1303 * @param x_off horizontal position of block from origin (0, 0)
1304 * @param y_off vertical position of block from origin (0, 0)
1305 * @param block_w width of block
1306 * @param block_h height of block
1307 * @param luma_weight weighting factor applied to the luma prediction
1308 * @param luma_offset additive offset applied to the luma prediction value
1311 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1312 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1313 int block_w, int block_h, int luma_weight, int luma_offset)
1315 HEVCLocalContext *lc = s->HEVClc;
1316 uint8_t *src = ref->data[0];
1317 ptrdiff_t srcstride = ref->linesize[0];
1318 int pic_width = s->ps.sps->width;
1319 int pic_height = s->ps.sps->height;
1322 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1323 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1324 int idx = ff_hevc_pel_weight[block_w];
1326 x_off += mv->x >> 2;
1327 y_off += mv->y >> 2;
1328 src += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1330 if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1331 x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1332 y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1333 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1334 int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1335 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1337 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1338 edge_emu_stride, srcstride,
1339 block_w + QPEL_EXTRA,
1340 block_h + QPEL_EXTRA,
1341 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1342 pic_width, pic_height);
1343 src = lc->edge_emu_buffer + buf_offset;
1344 srcstride = edge_emu_stride;
1348 s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1349 block_h, mx, my, block_w);
1351 s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1352 block_h, s->sh.luma_log2_weight_denom,
1353 luma_weight, luma_offset, mx, my, block_w);
1357 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1359 * @param s HEVC decoding context
1360 * @param dst target buffer for block data at block position
1361 * @param dststride stride of the dst buffer
1362 * @param ref0 reference picture0 buffer at origin (0, 0)
1363 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1364 * @param x_off horizontal position of block from origin (0, 0)
1365 * @param y_off vertical position of block from origin (0, 0)
1366 * @param block_w width of block
1367 * @param block_h height of block
1368 * @param ref1 reference picture1 buffer at origin (0, 0)
1369 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1370 * @param current_mv current motion vector structure
1372 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1373 AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1374 int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1376 HEVCLocalContext *lc = s->HEVClc;
1377 ptrdiff_t src0stride = ref0->linesize[0];
1378 ptrdiff_t src1stride = ref1->linesize[0];
1379 int pic_width = s->ps.sps->width;
1380 int pic_height = s->ps.sps->height;
1381 int mx0 = mv0->x & 3;
1382 int my0 = mv0->y & 3;
1383 int mx1 = mv1->x & 3;
1384 int my1 = mv1->y & 3;
1385 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1386 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1387 int x_off0 = x_off + (mv0->x >> 2);
1388 int y_off0 = y_off + (mv0->y >> 2);
1389 int x_off1 = x_off + (mv1->x >> 2);
1390 int y_off1 = y_off + (mv1->y >> 2);
1391 int idx = ff_hevc_pel_weight[block_w];
1393 uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1394 uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1396 if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1397 x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1398 y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1399 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1400 int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1401 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1403 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1404 edge_emu_stride, src0stride,
1405 block_w + QPEL_EXTRA,
1406 block_h + QPEL_EXTRA,
1407 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1408 pic_width, pic_height);
1409 src0 = lc->edge_emu_buffer + buf_offset;
1410 src0stride = edge_emu_stride;
1413 if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1414 x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1415 y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1416 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1417 int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1418 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1420 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1421 edge_emu_stride, src1stride,
1422 block_w + QPEL_EXTRA,
1423 block_h + QPEL_EXTRA,
1424 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1425 pic_width, pic_height);
1426 src1 = lc->edge_emu_buffer2 + buf_offset;
1427 src1stride = edge_emu_stride;
1430 s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1431 block_h, mx0, my0, block_w);
1433 s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1434 block_h, mx1, my1, block_w);
1436 s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1437 block_h, s->sh.luma_log2_weight_denom,
1438 s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1439 s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1440 s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1441 s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1447 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1449 * @param s HEVC decoding context
1450 * @param dst1 target buffer for block data at block position (U plane)
1451 * @param dst2 target buffer for block data at block position (V plane)
1452 * @param dststride stride of the dst1 and dst2 buffers
1453 * @param ref reference picture buffer at origin (0, 0)
1454 * @param mv motion vector (relative to block position) to get pixel data from
1455 * @param x_off horizontal position of block from origin (0, 0)
1456 * @param y_off vertical position of block from origin (0, 0)
1457 * @param block_w width of block
1458 * @param block_h height of block
1459 * @param chroma_weight weighting factor applied to the chroma prediction
1460 * @param chroma_offset additive offset applied to the chroma prediction value
1463 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1464 ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1465 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1467 HEVCLocalContext *lc = s->HEVClc;
1468 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1469 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1470 const Mv *mv = ¤t_mv->mv[reflist];
1471 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1472 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1473 int idx = ff_hevc_pel_weight[block_w];
1474 int hshift = s->ps.sps->hshift[1];
1475 int vshift = s->ps.sps->vshift[1];
1476 intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift);
1477 intptr_t my = av_mod_uintp2(mv->y, 2 + vshift);
1478 intptr_t _mx = mx << (1 - hshift);
1479 intptr_t _my = my << (1 - vshift);
1481 x_off += mv->x >> (2 + hshift);
1482 y_off += mv->y >> (2 + vshift);
1483 src0 += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1485 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1486 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1487 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1488 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1489 int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1490 int buf_offset0 = EPEL_EXTRA_BEFORE *
1491 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1492 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1493 edge_emu_stride, srcstride,
1494 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1495 x_off - EPEL_EXTRA_BEFORE,
1496 y_off - EPEL_EXTRA_BEFORE,
1497 pic_width, pic_height);
1499 src0 = lc->edge_emu_buffer + buf_offset0;
1500 srcstride = edge_emu_stride;
1503 s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1504 block_h, _mx, _my, block_w);
1506 s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1507 block_h, s->sh.chroma_log2_weight_denom,
1508 chroma_weight, chroma_offset, _mx, _my, block_w);
1512 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1514 * @param s HEVC decoding context
1515 * @param dst target buffer for block data at block position
1516 * @param dststride stride of the dst buffer
1517 * @param ref0 reference picture0 buffer at origin (0, 0)
1518 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1519 * @param x_off horizontal position of block from origin (0, 0)
1520 * @param y_off vertical position of block from origin (0, 0)
1521 * @param block_w width of block
1522 * @param block_h height of block
1523 * @param ref1 reference picture1 buffer at origin (0, 0)
1524 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1525 * @param current_mv current motion vector structure
1526 * @param cidx chroma component(cb, cr)
1528 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1529 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1531 HEVCLocalContext *lc = s->HEVClc;
1532 uint8_t *src1 = ref0->data[cidx+1];
1533 uint8_t *src2 = ref1->data[cidx+1];
1534 ptrdiff_t src1stride = ref0->linesize[cidx+1];
1535 ptrdiff_t src2stride = ref1->linesize[cidx+1];
1536 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1537 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1538 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1539 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1540 Mv *mv0 = ¤t_mv->mv[0];
1541 Mv *mv1 = ¤t_mv->mv[1];
1542 int hshift = s->ps.sps->hshift[1];
1543 int vshift = s->ps.sps->vshift[1];
1545 intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1546 intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1547 intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1548 intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1549 intptr_t _mx0 = mx0 << (1 - hshift);
1550 intptr_t _my0 = my0 << (1 - vshift);
1551 intptr_t _mx1 = mx1 << (1 - hshift);
1552 intptr_t _my1 = my1 << (1 - vshift);
1554 int x_off0 = x_off + (mv0->x >> (2 + hshift));
1555 int y_off0 = y_off + (mv0->y >> (2 + vshift));
1556 int x_off1 = x_off + (mv1->x >> (2 + hshift));
1557 int y_off1 = y_off + (mv1->y >> (2 + vshift));
1558 int idx = ff_hevc_pel_weight[block_w];
1559 src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1560 src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1562 if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1563 x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1564 y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1565 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1566 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1567 int buf_offset1 = EPEL_EXTRA_BEFORE *
1568 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1570 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1571 edge_emu_stride, src1stride,
1572 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1573 x_off0 - EPEL_EXTRA_BEFORE,
1574 y_off0 - EPEL_EXTRA_BEFORE,
1575 pic_width, pic_height);
1577 src1 = lc->edge_emu_buffer + buf_offset1;
1578 src1stride = edge_emu_stride;
1581 if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1582 x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1583 y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1584 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1585 int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1586 int buf_offset1 = EPEL_EXTRA_BEFORE *
1587 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1589 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1590 edge_emu_stride, src2stride,
1591 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1592 x_off1 - EPEL_EXTRA_BEFORE,
1593 y_off1 - EPEL_EXTRA_BEFORE,
1594 pic_width, pic_height);
1596 src2 = lc->edge_emu_buffer2 + buf_offset1;
1597 src2stride = edge_emu_stride;
1600 s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1601 block_h, _mx0, _my0, block_w);
1603 s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1604 src2, src2stride, lc->tmp,
1605 block_h, _mx1, _my1, block_w);
1607 s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1608 src2, src2stride, lc->tmp,
1610 s->sh.chroma_log2_weight_denom,
1611 s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1612 s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1613 s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1614 s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1615 _mx1, _my1, block_w);
1618 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1619 const Mv *mv, int y0, int height)
1621 int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1623 if (s->threads_type == FF_THREAD_FRAME )
1624 ff_thread_await_progress(&ref->tf, y, 0);
1627 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1628 int nPbH, int log2_cb_size, int part_idx,
1629 int merge_idx, MvField *mv)
1631 HEVCLocalContext *lc = s->HEVClc;
1632 enum InterPredIdc inter_pred_idc = PRED_L0;
1635 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1637 if (s->sh.slice_type == B_SLICE)
1638 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1640 if (inter_pred_idc != PRED_L1) {
1641 if (s->sh.nb_refs[L0])
1642 mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1644 mv->pred_flag = PF_L0;
1645 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1646 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1647 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1648 part_idx, merge_idx, mv, mvp_flag, 0);
1649 mv->mv[0].x += lc->pu.mvd.x;
1650 mv->mv[0].y += lc->pu.mvd.y;
1653 if (inter_pred_idc != PRED_L0) {
1654 if (s->sh.nb_refs[L1])
1655 mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1657 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1658 AV_ZERO32(&lc->pu.mvd);
1660 ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1663 mv->pred_flag += PF_L1;
1664 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1665 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1666 part_idx, merge_idx, mv, mvp_flag, 1);
1667 mv->mv[1].x += lc->pu.mvd.x;
1668 mv->mv[1].y += lc->pu.mvd.y;
1672 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1674 int log2_cb_size, int partIdx, int idx)
1676 #define POS(c_idx, x, y) \
1677 &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1678 (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1679 HEVCLocalContext *lc = s->HEVClc;
1681 struct MvField current_mv = {{{ 0 }}};
1683 int min_pu_width = s->ps.sps->min_pu_width;
1685 MvField *tab_mvf = s->ref->tab_mvf;
1686 RefPicList *refPicList = s->ref->refPicList;
1687 HEVCFrame *ref0 = NULL, *ref1 = NULL;
1688 uint8_t *dst0 = POS(0, x0, y0);
1689 uint8_t *dst1 = POS(1, x0, y0);
1690 uint8_t *dst2 = POS(2, x0, y0);
1691 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1692 int min_cb_width = s->ps.sps->min_cb_width;
1693 int x_cb = x0 >> log2_min_cb_size;
1694 int y_cb = y0 >> log2_min_cb_size;
1698 int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1701 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1703 if (skip_flag || lc->pu.merge_flag) {
1704 if (s->sh.max_num_merge_cand > 1)
1705 merge_idx = ff_hevc_merge_idx_decode(s);
1709 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1710 partIdx, merge_idx, ¤t_mv);
1712 hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1713 partIdx, merge_idx, ¤t_mv);
1716 x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1717 y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1719 for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1720 for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1721 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1723 if (current_mv.pred_flag & PF_L0) {
1724 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1727 hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH);
1729 if (current_mv.pred_flag & PF_L1) {
1730 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1733 hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH);
1736 if (current_mv.pred_flag == PF_L0) {
1737 int x0_c = x0 >> s->ps.sps->hshift[1];
1738 int y0_c = y0 >> s->ps.sps->vshift[1];
1739 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1740 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1742 luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1743 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1744 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1745 s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1747 if (s->ps.sps->chroma_format_idc) {
1748 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1749 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1750 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1751 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1752 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1753 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1755 } else if (current_mv.pred_flag == PF_L1) {
1756 int x0_c = x0 >> s->ps.sps->hshift[1];
1757 int y0_c = y0 >> s->ps.sps->vshift[1];
1758 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1759 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1761 luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1762 ¤t_mv.mv[1], x0, y0, nPbW, nPbH,
1763 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1764 s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1766 if (s->ps.sps->chroma_format_idc) {
1767 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1768 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1769 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1771 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1772 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1773 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1775 } else if (current_mv.pred_flag == PF_BI) {
1776 int x0_c = x0 >> s->ps.sps->hshift[1];
1777 int y0_c = y0 >> s->ps.sps->vshift[1];
1778 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1779 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1781 luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1782 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1783 ref1->frame, ¤t_mv.mv[1], ¤t_mv);
1785 if (s->ps.sps->chroma_format_idc) {
1786 chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1787 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0);
1789 chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1790 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1);
1798 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1799 int prev_intra_luma_pred_flag)
1801 HEVCLocalContext *lc = s->HEVClc;
1802 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1803 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1804 int min_pu_width = s->ps.sps->min_pu_width;
1805 int size_in_pus = pu_size >> s->ps.sps->log2_min_pu_size;
1806 int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1807 int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1809 int cand_up = (lc->ctb_up_flag || y0b) ?
1810 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1811 int cand_left = (lc->ctb_left_flag || x0b) ?
1812 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1814 int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1816 MvField *tab_mvf = s->ref->tab_mvf;
1817 int intra_pred_mode;
1821 // intra_pred_mode prediction does not cross vertical CTB boundaries
1822 if ((y0 - 1) < y_ctb)
1825 if (cand_left == cand_up) {
1826 if (cand_left < 2) {
1827 candidate[0] = INTRA_PLANAR;
1828 candidate[1] = INTRA_DC;
1829 candidate[2] = INTRA_ANGULAR_26;
1831 candidate[0] = cand_left;
1832 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1833 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1836 candidate[0] = cand_left;
1837 candidate[1] = cand_up;
1838 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1839 candidate[2] = INTRA_PLANAR;
1840 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1841 candidate[2] = INTRA_DC;
1843 candidate[2] = INTRA_ANGULAR_26;
1847 if (prev_intra_luma_pred_flag) {
1848 intra_pred_mode = candidate[lc->pu.mpm_idx];
1850 if (candidate[0] > candidate[1])
1851 FFSWAP(uint8_t, candidate[0], candidate[1]);
1852 if (candidate[0] > candidate[2])
1853 FFSWAP(uint8_t, candidate[0], candidate[2]);
1854 if (candidate[1] > candidate[2])
1855 FFSWAP(uint8_t, candidate[1], candidate[2]);
1857 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1858 for (i = 0; i < 3; i++)
1859 if (intra_pred_mode >= candidate[i])
1863 /* write the intra prediction units into the mv array */
1866 for (i = 0; i < size_in_pus; i++) {
1867 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1868 intra_pred_mode, size_in_pus);
1870 for (j = 0; j < size_in_pus; j++) {
1871 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1875 return intra_pred_mode;
1878 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1879 int log2_cb_size, int ct_depth)
1881 int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1882 int x_cb = x0 >> s->ps.sps->log2_min_cb_size;
1883 int y_cb = y0 >> s->ps.sps->log2_min_cb_size;
1886 for (y = 0; y < length; y++)
1887 memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1891 static const uint8_t tab_mode_idx[] = {
1892 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
1893 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1895 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1898 HEVCLocalContext *lc = s->HEVClc;
1899 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1900 uint8_t prev_intra_luma_pred_flag[4];
1901 int split = lc->cu.part_mode == PART_NxN;
1902 int pb_size = (1 << log2_cb_size) >> split;
1903 int side = split + 1;
1907 for (i = 0; i < side; i++)
1908 for (j = 0; j < side; j++)
1909 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1911 for (i = 0; i < side; i++) {
1912 for (j = 0; j < side; j++) {
1913 if (prev_intra_luma_pred_flag[2 * i + j])
1914 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1916 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1918 lc->pu.intra_pred_mode[2 * i + j] =
1919 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1920 prev_intra_luma_pred_flag[2 * i + j]);
1924 if (s->ps.sps->chroma_format_idc == 3) {
1925 for (i = 0; i < side; i++) {
1926 for (j = 0; j < side; j++) {
1927 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1928 if (chroma_mode != 4) {
1929 if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1930 lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1932 lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1934 lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1938 } else if (s->ps.sps->chroma_format_idc == 2) {
1940 lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1941 if (chroma_mode != 4) {
1942 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1945 mode_idx = intra_chroma_table[chroma_mode];
1947 mode_idx = lc->pu.intra_pred_mode[0];
1949 lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1950 } else if (s->ps.sps->chroma_format_idc != 0) {
1951 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1952 if (chroma_mode != 4) {
1953 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1954 lc->pu.intra_pred_mode_c[0] = 34;
1956 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1958 lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1963 static void intra_prediction_unit_default_value(HEVCContext *s,
1967 HEVCLocalContext *lc = s->HEVClc;
1968 int pb_size = 1 << log2_cb_size;
1969 int size_in_pus = pb_size >> s->ps.sps->log2_min_pu_size;
1970 int min_pu_width = s->ps.sps->min_pu_width;
1971 MvField *tab_mvf = s->ref->tab_mvf;
1972 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1973 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1976 if (size_in_pus == 0)
1978 for (j = 0; j < size_in_pus; j++)
1979 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1980 if (lc->cu.pred_mode == MODE_INTRA)
1981 for (j = 0; j < size_in_pus; j++)
1982 for (k = 0; k < size_in_pus; k++)
1983 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1986 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1988 int cb_size = 1 << log2_cb_size;
1989 HEVCLocalContext *lc = s->HEVClc;
1990 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1991 int length = cb_size >> log2_min_cb_size;
1992 int min_cb_width = s->ps.sps->min_cb_width;
1993 int x_cb = x0 >> log2_min_cb_size;
1994 int y_cb = y0 >> log2_min_cb_size;
1995 int idx = log2_cb_size - 2;
1996 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2001 lc->cu.pred_mode = MODE_INTRA;
2002 lc->cu.part_mode = PART_2Nx2N;
2003 lc->cu.intra_split_flag = 0;
2005 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2006 for (x = 0; x < 4; x++)
2007 lc->pu.intra_pred_mode[x] = 1;
2008 if (s->ps.pps->transquant_bypass_enable_flag) {
2009 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2010 if (lc->cu.cu_transquant_bypass_flag)
2011 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2013 lc->cu.cu_transquant_bypass_flag = 0;
2015 if (s->sh.slice_type != I_SLICE) {
2016 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2018 x = y_cb * min_cb_width + x_cb;
2019 for (y = 0; y < length; y++) {
2020 memset(&s->skip_flag[x], skip_flag, length);
2023 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2025 x = y_cb * min_cb_width + x_cb;
2026 for (y = 0; y < length; y++) {
2027 memset(&s->skip_flag[x], 0, length);
2032 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2033 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2034 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2036 if (!s->sh.disable_deblocking_filter_flag)
2037 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2041 if (s->sh.slice_type != I_SLICE)
2042 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2043 if (lc->cu.pred_mode != MODE_INTRA ||
2044 log2_cb_size == s->ps.sps->log2_min_cb_size) {
2045 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2046 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2047 lc->cu.pred_mode == MODE_INTRA;
2050 if (lc->cu.pred_mode == MODE_INTRA) {
2051 if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2052 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2053 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2054 pcm_flag = ff_hevc_pcm_flag_decode(s);
2057 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2058 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2059 if (s->ps.sps->pcm.loop_filter_disable_flag)
2060 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2065 intra_prediction_unit(s, x0, y0, log2_cb_size);
2068 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069 switch (lc->cu.part_mode) {
2071 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2074 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
2075 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2078 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2079 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2082 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
2083 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2086 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2087 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
2090 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
2091 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2094 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2095 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
2098 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2099 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2100 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2101 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2107 int rqt_root_cbf = 1;
2109 if (lc->cu.pred_mode != MODE_INTRA &&
2110 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2111 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2114 const static int cbf[2] = { 0 };
2115 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2116 s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2117 s->ps.sps->max_transform_hierarchy_depth_inter;
2118 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2120 log2_cb_size, 0, 0, cbf, cbf);
2124 if (!s->sh.disable_deblocking_filter_flag)
2125 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2130 if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2131 ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2133 x = y_cb * min_cb_width + x_cb;
2134 for (y = 0; y < length; y++) {
2135 memset(&s->qp_y_tab[x], lc->qp_y, length);
2139 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2140 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2141 lc->qPy_pred = lc->qp_y;
2144 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2149 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2150 int log2_cb_size, int cb_depth)
2152 HEVCLocalContext *lc = s->HEVClc;
2153 const int cb_size = 1 << log2_cb_size;
2157 lc->ct_depth = cb_depth;
2158 if (x0 + cb_size <= s->ps.sps->width &&
2159 y0 + cb_size <= s->ps.sps->height &&
2160 log2_cb_size > s->ps.sps->log2_min_cb_size) {
2161 split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2163 split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2165 if (s->ps.pps->cu_qp_delta_enabled_flag &&
2166 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2167 lc->tu.is_cu_qp_delta_coded = 0;
2168 lc->tu.cu_qp_delta = 0;
2171 if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2172 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2173 lc->tu.is_cu_chroma_qp_offset_coded = 0;
2177 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2178 const int cb_size_split = cb_size >> 1;
2179 const int x1 = x0 + cb_size_split;
2180 const int y1 = y0 + cb_size_split;
2184 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2188 if (more_data && x1 < s->ps.sps->width) {
2189 more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2193 if (more_data && y1 < s->ps.sps->height) {
2194 more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2198 if (more_data && x1 < s->ps.sps->width &&
2199 y1 < s->ps.sps->height) {
2200 more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2205 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2206 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2207 lc->qPy_pred = lc->qp_y;
2210 return ((x1 + cb_size_split) < s->ps.sps->width ||
2211 (y1 + cb_size_split) < s->ps.sps->height);
2215 ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2218 if ((!((x0 + cb_size) %
2219 (1 << (s->ps.sps->log2_ctb_size))) ||
2220 (x0 + cb_size >= s->ps.sps->width)) &&
2222 (1 << (s->ps.sps->log2_ctb_size))) ||
2223 (y0 + cb_size >= s->ps.sps->height))) {
2224 int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2225 return !end_of_slice_flag;
2234 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2237 HEVCLocalContext *lc = s->HEVClc;
2238 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2239 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2240 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2242 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2244 if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2245 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2246 lc->first_qp_group = 1;
2247 lc->end_of_tiles_x = s->ps.sps->width;
2248 } else if (s->ps.pps->tiles_enabled_flag) {
2249 if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2250 int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2251 lc->end_of_tiles_x = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2252 lc->first_qp_group = 1;
2255 lc->end_of_tiles_x = s->ps.sps->width;
2258 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2260 lc->boundary_flags = 0;
2261 if (s->ps.pps->tiles_enabled_flag) {
2262 if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2263 lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2264 if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2265 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2266 if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2267 lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2268 if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2269 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2271 if (ctb_addr_in_slice <= 0)
2272 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2273 if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2274 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2277 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2278 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2279 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2280 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2283 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2285 HEVCContext *s = avctxt->priv_data;
2286 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2290 int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2292 if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2293 av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2294 return AVERROR_INVALIDDATA;
2297 if (s->sh.dependent_slice_segment_flag) {
2298 int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2299 if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2300 av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2301 return AVERROR_INVALIDDATA;
2305 while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2306 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2308 x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2309 y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2310 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2312 ff_hevc_cabac_init(s, ctb_addr_ts);
2314 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2316 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2317 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2318 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2320 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2321 if (more_data < 0) {
2322 s->tab_slice_address[ctb_addr_rs] = -1;
2328 ff_hevc_save_states(s, ctb_addr_ts);
2329 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2332 if (x_ctb + ctb_size >= s->ps.sps->width &&
2333 y_ctb + ctb_size >= s->ps.sps->height)
2334 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2339 static int hls_slice_data(HEVCContext *s)
2347 s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2350 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2352 HEVCContext *s1 = avctxt->priv_data, *s;
2353 HEVCLocalContext *lc;
2354 int ctb_size = 1<< s1->ps.sps->log2_ctb_size;
2356 int *ctb_row_p = input_ctb_row;
2357 int ctb_row = ctb_row_p[job];
2358 int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2359 int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2360 int thread = ctb_row % s1->threads_number;
2363 s = s1->sList[self_id];
2367 ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2371 ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2374 while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2375 int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2376 int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2378 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2380 ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2382 if (avpriv_atomic_int_get(&s1->wpp_err)){
2383 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2387 ff_hevc_cabac_init(s, ctb_addr_ts);
2388 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2389 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2391 if (more_data < 0) {
2392 s->tab_slice_address[ctb_addr_rs] = -1;
2398 ff_hevc_save_states(s, ctb_addr_ts);
2399 ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2400 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2402 if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2403 avpriv_atomic_int_set(&s1->wpp_err, 1);
2404 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2408 if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2409 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2410 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2413 ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2416 if(x_ctb >= s->ps.sps->width) {
2420 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2425 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2427 const uint8_t *data = nal->data;
2428 int length = nal->size;
2429 HEVCLocalContext *lc = s->HEVClc;
2430 int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2431 int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2433 int startheader, cmpt = 0;
2439 return AVERROR(ENOMEM);
2444 ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2447 for (i = 1; i < s->threads_number; i++) {
2448 s->sList[i] = av_malloc(sizeof(HEVCContext));
2449 memcpy(s->sList[i], s, sizeof(HEVCContext));
2450 s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2451 s->sList[i]->HEVClc = s->HEVClcList[i];
2455 offset = (lc->gb.index >> 3);
2457 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2458 if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2464 for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2465 offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2466 for (j = 0, cmpt = 0, startheader = offset
2467 + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2468 if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2473 s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2474 s->sh.offset[i - 1] = offset;
2477 if (s->sh.num_entry_point_offsets != 0) {
2478 offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2479 s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2480 s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2485 for (i = 1; i < s->threads_number; i++) {
2486 s->sList[i]->HEVClc->first_qp_group = 1;
2487 s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2488 memcpy(s->sList[i], s, sizeof(HEVCContext));
2489 s->sList[i]->HEVClc = s->HEVClcList[i];
2492 avpriv_atomic_int_set(&s->wpp_err, 0);
2493 ff_reset_entries(s->avctx);
2495 for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2500 if (s->ps.pps->entropy_coding_sync_enabled_flag)
2501 s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2503 for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2510 static int set_side_data(HEVCContext *s)
2512 AVFrame *out = s->ref->frame;
2514 if (s->sei_frame_packing_present &&
2515 s->frame_packing_arrangement_type >= 3 &&
2516 s->frame_packing_arrangement_type <= 5 &&
2517 s->content_interpretation_type > 0 &&
2518 s->content_interpretation_type < 3) {
2519 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2521 return AVERROR(ENOMEM);
2523 switch (s->frame_packing_arrangement_type) {
2525 if (s->quincunx_subsampling)
2526 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2528 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2531 stereo->type = AV_STEREO3D_TOPBOTTOM;
2534 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2538 if (s->content_interpretation_type == 2)
2539 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2542 if (s->sei_display_orientation_present &&
2543 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2544 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2545 AVFrameSideData *rotation = av_frame_new_side_data(out,
2546 AV_FRAME_DATA_DISPLAYMATRIX,
2547 sizeof(int32_t) * 9);
2549 return AVERROR(ENOMEM);
2551 av_display_rotation_set((int32_t *)rotation->data, angle);
2552 av_display_matrix_flip((int32_t *)rotation->data,
2553 s->sei_hflip, s->sei_vflip);
2559 static int hevc_frame_start(HEVCContext *s)
2561 HEVCLocalContext *lc = s->HEVClc;
2562 int pic_size_in_ctb = ((s->ps.sps->width >> s->ps.sps->log2_min_cb_size) + 1) *
2563 ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2566 memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2567 memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
2568 memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2569 memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2570 memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2573 s->first_nal_type = s->nal_unit_type;
2575 if (s->ps.pps->tiles_enabled_flag)
2576 lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2578 ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2582 ret = ff_hevc_frame_rps(s);
2584 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2588 s->ref->frame->key_frame = IS_IRAP(s);
2590 ret = set_side_data(s);
2594 s->frame->pict_type = 3 - s->sh.slice_type;
2597 ff_hevc_bump_frame(s);
2599 av_frame_unref(s->output_frame);
2600 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2604 if (!s->avctx->hwaccel)
2605 ff_thread_finish_setup(s->avctx);
2611 ff_hevc_unref_frame(s, s->ref, ~0);
2616 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2618 HEVCLocalContext *lc = s->HEVClc;
2619 GetBitContext *gb = &lc->gb;
2620 int ctb_addr_ts, ret;
2623 s->nal_unit_type = nal->type;
2624 s->temporal_id = nal->temporal_id;
2626 switch (s->nal_unit_type) {
2628 ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2633 ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2634 s->apply_defdispwin);
2639 ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2643 case NAL_SEI_PREFIX:
2644 case NAL_SEI_SUFFIX:
2645 ret = ff_hevc_decode_nal_sei(s);
2656 case NAL_BLA_W_RADL:
2658 case NAL_IDR_W_RADL:
2665 ret = hls_slice_header(s);
2669 if (s->max_ra == INT_MAX) {
2670 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2674 s->max_ra = INT_MIN;
2678 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2679 s->poc <= s->max_ra) {
2683 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2684 s->max_ra = INT_MIN;
2687 if (s->sh.first_slice_in_pic_flag) {
2688 ret = hevc_frame_start(s);
2691 } else if (!s->ref) {
2692 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2696 if (s->nal_unit_type != s->first_nal_type) {
2697 av_log(s->avctx, AV_LOG_ERROR,
2698 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2699 s->first_nal_type, s->nal_unit_type);
2700 return AVERROR_INVALIDDATA;
2703 if (!s->sh.dependent_slice_segment_flag &&
2704 s->sh.slice_type != I_SLICE) {
2705 ret = ff_hevc_slice_rpl(s);
2707 av_log(s->avctx, AV_LOG_WARNING,
2708 "Error constructing the reference lists for the current slice.\n");
2713 if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2714 ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2719 if (s->avctx->hwaccel) {
2720 ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2724 if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2725 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2727 ctb_addr_ts = hls_slice_data(s);
2728 if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2732 if (ctb_addr_ts < 0) {
2740 s->seq_decode = (s->seq_decode + 1) & 0xff;
2741 s->max_ra = INT_MAX;
2747 av_log(s->avctx, AV_LOG_INFO,
2748 "Skipping NAL unit %d\n", s->nal_unit_type);
2753 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2758 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2763 s->last_eos = s->eos;
2766 /* split the input packet into NAL units, so we know the upper bound on the
2767 * number of slices in the frame */
2768 ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2769 s->nal_length_size);
2771 av_log(s->avctx, AV_LOG_ERROR,
2772 "Error splitting the input into NAL units.\n");
2776 for (i = 0; i < s->pkt.nb_nals; i++) {
2777 if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2778 s->pkt.nals[i].type == NAL_EOS_NUT)
2782 /* decode the NAL units */
2783 for (i = 0; i < s->pkt.nb_nals; i++) {
2784 s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2786 ret = decode_nal_unit(s, &s->pkt.nals[i]);
2788 av_log(s->avctx, AV_LOG_WARNING,
2789 "Error parsing NAL unit #%d.\n", i);
2795 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2796 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2801 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2804 for (i = 0; i < 16; i++)
2805 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2808 static int verify_md5(HEVCContext *s, AVFrame *frame)
2810 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2815 return AVERROR(EINVAL);
2817 pixel_shift = desc->comp[0].depth_minus1 > 7;
2819 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2822 /* the checksums are LE, so we have to byteswap for >8bpp formats
2825 if (pixel_shift && !s->checksum_buf) {
2826 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2827 FFMAX3(frame->linesize[0], frame->linesize[1],
2828 frame->linesize[2]));
2829 if (!s->checksum_buf)
2830 return AVERROR(ENOMEM);
2834 for (i = 0; frame->data[i]; i++) {
2835 int width = s->avctx->coded_width;
2836 int height = s->avctx->coded_height;
2837 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2838 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2841 av_md5_init(s->md5_ctx);
2842 for (j = 0; j < h; j++) {
2843 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2846 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2847 (const uint16_t *) src, w);
2848 src = s->checksum_buf;
2851 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2853 av_md5_final(s->md5_ctx, md5);
2855 if (!memcmp(md5, s->md5[i], 16)) {
2856 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2857 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2858 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2860 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2861 print_md5(s->avctx, AV_LOG_ERROR, md5);
2862 av_log (s->avctx, AV_LOG_ERROR, " != ");
2863 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2864 av_log (s->avctx, AV_LOG_ERROR, "\n");
2865 return AVERROR_INVALIDDATA;
2869 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2874 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2878 HEVCContext *s = avctx->priv_data;
2881 ret = ff_hevc_output_frame(s, data, 1);
2890 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2894 if (avctx->hwaccel) {
2895 if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2896 av_log(avctx, AV_LOG_ERROR,
2897 "hardware accelerator failed to decode picture\n");
2899 /* verify the SEI checksum */
2900 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2902 ret = verify_md5(s, s->ref->frame);
2903 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2904 ff_hevc_unref_frame(s, s->ref, ~0);
2911 if (s->is_decoded) {
2912 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2916 if (s->output_frame->buf[0]) {
2917 av_frame_move_ref(data, s->output_frame);
2924 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2928 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2932 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2933 if (!dst->tab_mvf_buf)
2935 dst->tab_mvf = src->tab_mvf;
2937 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2938 if (!dst->rpl_tab_buf)
2940 dst->rpl_tab = src->rpl_tab;
2942 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2946 dst->poc = src->poc;
2947 dst->ctb_count = src->ctb_count;
2948 dst->window = src->window;
2949 dst->flags = src->flags;
2950 dst->sequence = src->sequence;
2952 if (src->hwaccel_picture_private) {
2953 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2954 if (!dst->hwaccel_priv_buf)
2956 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2961 ff_hevc_unref_frame(s, dst, ~0);
2962 return AVERROR(ENOMEM);
2965 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2967 HEVCContext *s = avctx->priv_data;
2972 av_freep(&s->md5_ctx);
2974 for(i=0; i < s->pkt.nals_allocated; i++) {
2975 av_freep(&s->skipped_bytes_pos_nal[i]);
2977 av_freep(&s->skipped_bytes_pos_nal);
2979 av_freep(&s->cabac_state);
2981 for (i = 0; i < 3; i++) {
2982 av_freep(&s->sao_pixel_buffer_h[i]);
2983 av_freep(&s->sao_pixel_buffer_v[i]);
2985 av_frame_free(&s->output_frame);
2987 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2988 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2989 av_frame_free(&s->DPB[i].frame);
2992 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2993 av_buffer_unref(&s->ps.vps_list[i]);
2994 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2995 av_buffer_unref(&s->ps.sps_list[i]);
2996 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2997 av_buffer_unref(&s->ps.pps_list[i]);
3002 av_freep(&s->sh.entry_point_offset);
3003 av_freep(&s->sh.offset);
3004 av_freep(&s->sh.size);
3006 for (i = 1; i < s->threads_number; i++) {
3007 HEVCLocalContext *lc = s->HEVClcList[i];
3009 av_freep(&s->HEVClcList[i]);
3010 av_freep(&s->sList[i]);
3013 if (s->HEVClc == s->HEVClcList[0])
3015 av_freep(&s->HEVClcList[0]);
3017 for (i = 0; i < s->pkt.nals_allocated; i++)
3018 av_freep(&s->pkt.nals[i].rbsp_buffer);
3019 av_freep(&s->pkt.nals);
3020 s->pkt.nals_allocated = 0;
3025 static av_cold int hevc_init_context(AVCodecContext *avctx)
3027 HEVCContext *s = avctx->priv_data;
3032 s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3035 s->HEVClcList[0] = s->HEVClc;
3038 s->cabac_state = av_malloc(HEVC_CONTEXTS);
3039 if (!s->cabac_state)
3042 s->output_frame = av_frame_alloc();
3043 if (!s->output_frame)
3046 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3047 s->DPB[i].frame = av_frame_alloc();
3048 if (!s->DPB[i].frame)
3050 s->DPB[i].tf.f = s->DPB[i].frame;
3053 s->max_ra = INT_MAX;
3055 s->md5_ctx = av_md5_alloc();
3059 ff_bswapdsp_init(&s->bdsp);
3061 s->context_initialized = 1;
3067 hevc_decode_free(avctx);
3068 return AVERROR(ENOMEM);
3071 static int hevc_update_thread_context(AVCodecContext *dst,
3072 const AVCodecContext *src)
3074 HEVCContext *s = dst->priv_data;
3075 HEVCContext *s0 = src->priv_data;
3078 if (!s->context_initialized) {
3079 ret = hevc_init_context(dst);
3084 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3085 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3086 if (s0->DPB[i].frame->buf[0]) {
3087 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3093 if (s->ps.sps != s0->ps.sps)
3095 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3096 av_buffer_unref(&s->ps.vps_list[i]);
3097 if (s0->ps.vps_list[i]) {
3098 s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3099 if (!s->ps.vps_list[i])
3100 return AVERROR(ENOMEM);
3104 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3105 av_buffer_unref(&s->ps.sps_list[i]);
3106 if (s0->ps.sps_list[i]) {
3107 s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3108 if (!s->ps.sps_list[i])
3109 return AVERROR(ENOMEM);
3113 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3114 av_buffer_unref(&s->ps.pps_list[i]);
3115 if (s0->ps.pps_list[i]) {
3116 s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3117 if (!s->ps.pps_list[i])
3118 return AVERROR(ENOMEM);
3122 if (s->ps.sps != s0->ps.sps)
3123 if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3126 s->seq_decode = s0->seq_decode;
3127 s->seq_output = s0->seq_output;
3128 s->pocTid0 = s0->pocTid0;
3129 s->max_ra = s0->max_ra;
3132 s->is_nalff = s0->is_nalff;
3133 s->nal_length_size = s0->nal_length_size;
3135 s->threads_number = s0->threads_number;
3136 s->threads_type = s0->threads_type;
3139 s->seq_decode = (s->seq_decode + 1) & 0xff;
3140 s->max_ra = INT_MAX;
3146 static int hevc_decode_extradata(HEVCContext *s)
3148 AVCodecContext *avctx = s->avctx;
3152 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3154 if (avctx->extradata_size > 3 &&
3155 (avctx->extradata[0] || avctx->extradata[1] ||
3156 avctx->extradata[2] > 1)) {
3157 /* It seems the extradata is encoded as hvcC format.
3158 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3159 * is finalized. When finalized, configurationVersion will be 1 and we
3160 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3161 int i, j, num_arrays, nal_len_size;
3165 bytestream2_skip(&gb, 21);
3166 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3167 num_arrays = bytestream2_get_byte(&gb);
3169 /* nal units in the hvcC always have length coded with 2 bytes,
3170 * so put a fake nal_length_size = 2 while parsing them */
3171 s->nal_length_size = 2;
3173 /* Decode nal units from hvcC. */
3174 for (i = 0; i < num_arrays; i++) {
3175 int type = bytestream2_get_byte(&gb) & 0x3f;
3176 int cnt = bytestream2_get_be16(&gb);
3178 for (j = 0; j < cnt; j++) {
3179 // +2 for the nal size field
3180 int nalsize = bytestream2_peek_be16(&gb) + 2;
3181 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3182 av_log(s->avctx, AV_LOG_ERROR,
3183 "Invalid NAL unit size in extradata.\n");
3184 return AVERROR_INVALIDDATA;
3187 ret = decode_nal_units(s, gb.buffer, nalsize);
3189 av_log(avctx, AV_LOG_ERROR,
3190 "Decoding nal unit %d %d from hvcC failed\n",
3194 bytestream2_skip(&gb, nalsize);
3198 /* Now store right nal length size, that will be used to parse
3200 s->nal_length_size = nal_len_size;
3203 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3208 /* export stream parameters from the first SPS */
3209 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3210 if (s->ps.sps_list[i]) {
3211 const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3212 export_stream_params(s->avctx, s, sps);
3220 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3222 HEVCContext *s = avctx->priv_data;
3225 ff_init_cabac_states();
3227 avctx->internal->allocate_progress = 1;
3229 ret = hevc_init_context(avctx);
3233 s->enable_parallel_tiles = 0;
3234 s->picture_struct = 0;
3236 if(avctx->active_thread_type & FF_THREAD_SLICE)
3237 s->threads_number = avctx->thread_count;
3239 s->threads_number = 1;
3241 if (avctx->extradata_size > 0 && avctx->extradata) {
3242 ret = hevc_decode_extradata(s);
3244 hevc_decode_free(avctx);
3249 if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3250 s->threads_type = FF_THREAD_FRAME;
3252 s->threads_type = FF_THREAD_SLICE;
3257 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3259 HEVCContext *s = avctx->priv_data;
3262 memset(s, 0, sizeof(*s));
3264 ret = hevc_init_context(avctx);
3271 static void hevc_decode_flush(AVCodecContext *avctx)
3273 HEVCContext *s = avctx->priv_data;
3274 ff_hevc_flush_dpb(s);
3275 s->max_ra = INT_MAX;
3278 #define OFFSET(x) offsetof(HEVCContext, x)
3279 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3281 static const AVProfile profiles[] = {
3282 { FF_PROFILE_HEVC_MAIN, "Main" },
3283 { FF_PROFILE_HEVC_MAIN_10, "Main 10" },
3284 { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" },
3285 { FF_PROFILE_HEVC_REXT, "Rext" },
3286 { FF_PROFILE_UNKNOWN },
3289 static const AVOption options[] = {
3290 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3291 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3292 { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3293 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3297 static const AVClass hevc_decoder_class = {
3298 .class_name = "HEVC decoder",
3299 .item_name = av_default_item_name,
3301 .version = LIBAVUTIL_VERSION_INT,
3304 AVCodec ff_hevc_decoder = {
3306 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3307 .type = AVMEDIA_TYPE_VIDEO,
3308 .id = AV_CODEC_ID_HEVC,
3309 .priv_data_size = sizeof(HEVCContext),
3310 .priv_class = &hevc_decoder_class,
3311 .init = hevc_decode_init,
3312 .close = hevc_decode_free,
3313 .decode = hevc_decode_frame,
3314 .flush = hevc_decode_flush,
3315 .update_thread_context = hevc_update_thread_context,
3316 .init_thread_copy = hevc_init_thread_copy,
3317 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3318 CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3319 .profiles = NULL_IF_CONFIG_SMALL(profiles),