4 * Copyright (C) 2012 - 2013 Guillaume Martres
5 * Copyright (C) 2012 - 2013 Mickael Raulet
6 * Copyright (C) 2012 - 2013 Gildas Cocherel
7 * Copyright (C) 2012 - 2013 Wassim Hamidouche
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/atomic.h"
27 #include "libavutil/attributes.h"
28 #include "libavutil/common.h"
29 #include "libavutil/display.h"
30 #include "libavutil/internal.h"
31 #include "libavutil/md5.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/stereo3d.h"
37 #include "bytestream.h"
38 #include "cabac_functions.h"
42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
45 * NOTE: Each function hls_foo correspond to the function foo in the
46 * specification (HLS stands for High Level Syntax).
53 /* free everything allocated by pic_arrays_init() */
54 static void pic_arrays_free(HEVCContext *s)
57 av_freep(&s->deblock);
59 av_freep(&s->skip_flag);
60 av_freep(&s->tab_ct_depth);
62 av_freep(&s->tab_ipm);
63 av_freep(&s->cbf_luma);
66 av_freep(&s->qp_y_tab);
67 av_freep(&s->tab_slice_address);
68 av_freep(&s->filter_slice_edges);
70 av_freep(&s->horizontal_bs);
71 av_freep(&s->vertical_bs);
73 av_freep(&s->sh.entry_point_offset);
74 av_freep(&s->sh.size);
75 av_freep(&s->sh.offset);
77 av_buffer_pool_uninit(&s->tab_mvf_pool);
78 av_buffer_pool_uninit(&s->rpl_tab_pool);
81 /* allocate arrays that depend on frame dimensions */
82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
84 int log2_min_cb_size = sps->log2_min_cb_size;
85 int width = sps->width;
86 int height = sps->height;
87 int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) *
88 ((height >> log2_min_cb_size) + 1);
89 int ctb_count = sps->ctb_width * sps->ctb_height;
90 int min_pu_size = sps->min_pu_width * sps->min_pu_height;
92 s->bs_width = (width >> 2) + 1;
93 s->bs_height = (height >> 2) + 1;
95 s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao));
96 s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock));
97 if (!s->sao || !s->deblock)
100 s->skip_flag = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
101 s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
102 if (!s->skip_flag || !s->tab_ct_depth)
105 s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
106 s->tab_ipm = av_mallocz(min_pu_size);
107 s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
108 if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
111 s->filter_slice_edges = av_mallocz(ctb_count);
112 s->tab_slice_address = av_malloc_array(pic_size_in_ctb,
113 sizeof(*s->tab_slice_address));
114 s->qp_y_tab = av_malloc_array(pic_size_in_ctb,
115 sizeof(*s->qp_y_tab));
116 if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
119 s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
120 s->vertical_bs = av_mallocz_array(s->bs_width, s->bs_height);
121 if (!s->horizontal_bs || !s->vertical_bs)
124 s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
126 s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
128 if (!s->tab_mvf_pool || !s->rpl_tab_pool)
135 return AVERROR(ENOMEM);
138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
142 uint8_t luma_weight_l0_flag[16];
143 uint8_t chroma_weight_l0_flag[16];
144 uint8_t luma_weight_l1_flag[16];
145 uint8_t chroma_weight_l1_flag[16];
146 int luma_log2_weight_denom;
148 luma_log2_weight_denom = get_ue_golomb_long(gb);
149 if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
150 av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
151 s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
152 if (s->ps.sps->chroma_format_idc != 0) {
153 int delta = get_se_golomb(gb);
154 s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
157 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
158 luma_weight_l0_flag[i] = get_bits1(gb);
159 if (!luma_weight_l0_flag[i]) {
160 s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
161 s->sh.luma_offset_l0[i] = 0;
164 if (s->ps.sps->chroma_format_idc != 0) {
165 for (i = 0; i < s->sh.nb_refs[L0]; i++)
166 chroma_weight_l0_flag[i] = get_bits1(gb);
168 for (i = 0; i < s->sh.nb_refs[L0]; i++)
169 chroma_weight_l0_flag[i] = 0;
171 for (i = 0; i < s->sh.nb_refs[L0]; i++) {
172 if (luma_weight_l0_flag[i]) {
173 int delta_luma_weight_l0 = get_se_golomb(gb);
174 s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
175 s->sh.luma_offset_l0[i] = get_se_golomb(gb);
177 if (chroma_weight_l0_flag[i]) {
178 for (j = 0; j < 2; j++) {
179 int delta_chroma_weight_l0 = get_se_golomb(gb);
180 int delta_chroma_offset_l0 = get_se_golomb(gb);
181 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
182 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
183 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
186 s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
187 s->sh.chroma_offset_l0[i][0] = 0;
188 s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
189 s->sh.chroma_offset_l0[i][1] = 0;
192 if (s->sh.slice_type == B_SLICE) {
193 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
194 luma_weight_l1_flag[i] = get_bits1(gb);
195 if (!luma_weight_l1_flag[i]) {
196 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
197 s->sh.luma_offset_l1[i] = 0;
200 if (s->ps.sps->chroma_format_idc != 0) {
201 for (i = 0; i < s->sh.nb_refs[L1]; i++)
202 chroma_weight_l1_flag[i] = get_bits1(gb);
204 for (i = 0; i < s->sh.nb_refs[L1]; i++)
205 chroma_weight_l1_flag[i] = 0;
207 for (i = 0; i < s->sh.nb_refs[L1]; i++) {
208 if (luma_weight_l1_flag[i]) {
209 int delta_luma_weight_l1 = get_se_golomb(gb);
210 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
211 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
213 if (chroma_weight_l1_flag[i]) {
214 for (j = 0; j < 2; j++) {
215 int delta_chroma_weight_l1 = get_se_golomb(gb);
216 int delta_chroma_offset_l1 = get_se_golomb(gb);
217 s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
218 s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
219 >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
222 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
223 s->sh.chroma_offset_l1[i][0] = 0;
224 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
225 s->sh.chroma_offset_l1[i][1] = 0;
231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
233 const HEVCSPS *sps = s->ps.sps;
234 int max_poc_lsb = 1 << sps->log2_max_poc_lsb;
235 int prev_delta_msb = 0;
236 unsigned int nb_sps = 0, nb_sh;
240 if (!sps->long_term_ref_pics_present_flag)
243 if (sps->num_long_term_ref_pics_sps > 0)
244 nb_sps = get_ue_golomb_long(gb);
245 nb_sh = get_ue_golomb_long(gb);
247 if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
248 return AVERROR_INVALIDDATA;
250 rps->nb_refs = nb_sh + nb_sps;
252 for (i = 0; i < rps->nb_refs; i++) {
253 uint8_t delta_poc_msb_present;
256 uint8_t lt_idx_sps = 0;
258 if (sps->num_long_term_ref_pics_sps > 1)
259 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
261 rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
262 rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
264 rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb);
265 rps->used[i] = get_bits1(gb);
268 delta_poc_msb_present = get_bits1(gb);
269 if (delta_poc_msb_present) {
270 int delta = get_ue_golomb_long(gb);
272 if (i && i != nb_sps)
273 delta += prev_delta_msb;
275 rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
276 prev_delta_msb = delta;
283 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
286 const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
287 unsigned int num = 0, den = 0;
289 avctx->pix_fmt = sps->pix_fmt;
290 avctx->coded_width = sps->width;
291 avctx->coded_height = sps->height;
292 avctx->width = sps->output_width;
293 avctx->height = sps->output_height;
294 avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
295 avctx->profile = sps->ptl.general_ptl.profile_idc;
296 avctx->level = sps->ptl.general_ptl.level_idc;
298 ff_set_sar(avctx, sps->vui.sar);
300 if (sps->vui.video_signal_type_present_flag)
301 avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
304 avctx->color_range = AVCOL_RANGE_MPEG;
306 if (sps->vui.colour_description_present_flag) {
307 avctx->color_primaries = sps->vui.colour_primaries;
308 avctx->color_trc = sps->vui.transfer_characteristic;
309 avctx->colorspace = sps->vui.matrix_coeffs;
311 avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
312 avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
313 avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
316 if (vps->vps_timing_info_present_flag) {
317 num = vps->vps_num_units_in_tick;
318 den = vps->vps_time_scale;
319 } else if (sps->vui.vui_timing_info_present_flag) {
320 num = sps->vui.vui_num_units_in_tick;
321 den = sps->vui.vui_time_scale;
324 if (num != 0 && den != 0)
325 av_reduce(&avctx->framerate.den, &avctx->framerate.num,
329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
331 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
332 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
342 ret = pic_arrays_init(s, sps);
346 export_stream_params(s->avctx, &s->ps, sps);
348 if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
349 #if CONFIG_HEVC_DXVA2_HWACCEL
350 *fmt++ = AV_PIX_FMT_DXVA2_VLD;
352 #if CONFIG_HEVC_D3D11VA_HWACCEL
353 *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
355 #if CONFIG_HEVC_VDPAU_HWACCEL
356 *fmt++ = AV_PIX_FMT_VDPAU;
360 if (pix_fmt == AV_PIX_FMT_NONE) {
361 *fmt++ = sps->pix_fmt;
362 *fmt = AV_PIX_FMT_NONE;
364 ret = ff_thread_get_format(s->avctx, pix_fmts);
367 s->avctx->pix_fmt = ret;
370 s->avctx->pix_fmt = pix_fmt;
373 ff_hevc_pred_init(&s->hpc, sps->bit_depth);
374 ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
375 ff_videodsp_init (&s->vdsp, sps->bit_depth);
377 for (i = 0; i < 3; i++) {
378 av_freep(&s->sao_pixel_buffer_h[i]);
379 av_freep(&s->sao_pixel_buffer_v[i]);
382 if (sps->sao_enabled && !s->avctx->hwaccel) {
383 int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
386 for(c_idx = 0; c_idx < c_count; c_idx++) {
387 int w = sps->width >> sps->hshift[c_idx];
388 int h = sps->height >> sps->vshift[c_idx];
389 s->sao_pixel_buffer_h[c_idx] =
390 av_malloc((w * 2 * sps->ctb_height) <<
392 s->sao_pixel_buffer_v[c_idx] =
393 av_malloc((h * 2 * sps->ctb_width) <<
399 s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
409 static int hls_slice_header(HEVCContext *s)
411 GetBitContext *gb = &s->HEVClc->gb;
412 SliceHeader *sh = &s->sh;
416 sh->first_slice_in_pic_flag = get_bits1(gb);
417 if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
418 s->seq_decode = (s->seq_decode + 1) & 0xff;
421 ff_hevc_clear_refs(s);
423 sh->no_output_of_prior_pics_flag = 0;
425 sh->no_output_of_prior_pics_flag = get_bits1(gb);
427 sh->pps_id = get_ue_golomb_long(gb);
428 if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
429 av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
430 return AVERROR_INVALIDDATA;
432 if (!sh->first_slice_in_pic_flag &&
433 s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
434 av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
435 return AVERROR_INVALIDDATA;
437 s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
438 if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
439 sh->no_output_of_prior_pics_flag = 1;
441 if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
442 const HEVCSPS* last_sps = s->ps.sps;
443 s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
444 if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
445 if (s->ps.sps->width != last_sps->width || s->ps.sps->height != last_sps->height ||
446 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
447 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
448 sh->no_output_of_prior_pics_flag = 0;
450 ff_hevc_clear_refs(s);
451 ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
455 s->seq_decode = (s->seq_decode + 1) & 0xff;
459 sh->dependent_slice_segment_flag = 0;
460 if (!sh->first_slice_in_pic_flag) {
461 int slice_address_length;
463 if (s->ps.pps->dependent_slice_segments_enabled_flag)
464 sh->dependent_slice_segment_flag = get_bits1(gb);
466 slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
467 s->ps.sps->ctb_height);
468 sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
469 if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
470 av_log(s->avctx, AV_LOG_ERROR,
471 "Invalid slice segment address: %u.\n",
472 sh->slice_segment_addr);
473 return AVERROR_INVALIDDATA;
476 if (!sh->dependent_slice_segment_flag) {
477 sh->slice_addr = sh->slice_segment_addr;
481 sh->slice_segment_addr = sh->slice_addr = 0;
483 s->slice_initialized = 0;
486 if (!sh->dependent_slice_segment_flag) {
487 s->slice_initialized = 0;
489 for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
490 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
492 sh->slice_type = get_ue_golomb_long(gb);
493 if (!(sh->slice_type == I_SLICE ||
494 sh->slice_type == P_SLICE ||
495 sh->slice_type == B_SLICE)) {
496 av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
498 return AVERROR_INVALIDDATA;
500 if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
501 av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
502 return AVERROR_INVALIDDATA;
505 // when flag is not present, picture is inferred to be output
506 sh->pic_output_flag = 1;
507 if (s->ps.pps->output_flag_present_flag)
508 sh->pic_output_flag = get_bits1(gb);
510 if (s->ps.sps->separate_colour_plane_flag)
511 sh->colour_plane_id = get_bits(gb, 2);
516 sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
517 poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
518 if (!sh->first_slice_in_pic_flag && poc != s->poc) {
519 av_log(s->avctx, AV_LOG_WARNING,
520 "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
521 if (s->avctx->err_recognition & AV_EF_EXPLODE)
522 return AVERROR_INVALIDDATA;
527 sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
528 pos = get_bits_left(gb);
529 if (!sh->short_term_ref_pic_set_sps_flag) {
530 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
534 sh->short_term_rps = &sh->slice_rps;
536 int numbits, rps_idx;
538 if (!s->ps.sps->nb_st_rps) {
539 av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
540 return AVERROR_INVALIDDATA;
543 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
544 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
545 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
547 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
549 pos = get_bits_left(gb);
550 ret = decode_lt_rps(s, &sh->long_term_rps, gb);
552 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
553 if (s->avctx->err_recognition & AV_EF_EXPLODE)
554 return AVERROR_INVALIDDATA;
556 sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
558 if (s->ps.sps->sps_temporal_mvp_enabled_flag)
559 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
561 sh->slice_temporal_mvp_enabled_flag = 0;
563 s->sh.short_term_rps = NULL;
568 if (s->temporal_id == 0 &&
569 s->nal_unit_type != NAL_TRAIL_N &&
570 s->nal_unit_type != NAL_TSA_N &&
571 s->nal_unit_type != NAL_STSA_N &&
572 s->nal_unit_type != NAL_RADL_N &&
573 s->nal_unit_type != NAL_RADL_R &&
574 s->nal_unit_type != NAL_RASL_N &&
575 s->nal_unit_type != NAL_RASL_R)
578 if (s->ps.sps->sao_enabled) {
579 sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
580 if (s->ps.sps->chroma_format_idc) {
581 sh->slice_sample_adaptive_offset_flag[1] =
582 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
585 sh->slice_sample_adaptive_offset_flag[0] = 0;
586 sh->slice_sample_adaptive_offset_flag[1] = 0;
587 sh->slice_sample_adaptive_offset_flag[2] = 0;
590 sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
591 if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
594 sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
595 if (sh->slice_type == B_SLICE)
596 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
598 if (get_bits1(gb)) { // num_ref_idx_active_override_flag
599 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
600 if (sh->slice_type == B_SLICE)
601 sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
603 if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
604 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
605 sh->nb_refs[L0], sh->nb_refs[L1]);
606 return AVERROR_INVALIDDATA;
609 sh->rpl_modification_flag[0] = 0;
610 sh->rpl_modification_flag[1] = 0;
611 nb_refs = ff_hevc_frame_nb_refs(s);
613 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
614 return AVERROR_INVALIDDATA;
617 if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
618 sh->rpl_modification_flag[0] = get_bits1(gb);
619 if (sh->rpl_modification_flag[0]) {
620 for (i = 0; i < sh->nb_refs[L0]; i++)
621 sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
624 if (sh->slice_type == B_SLICE) {
625 sh->rpl_modification_flag[1] = get_bits1(gb);
626 if (sh->rpl_modification_flag[1] == 1)
627 for (i = 0; i < sh->nb_refs[L1]; i++)
628 sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
632 if (sh->slice_type == B_SLICE)
633 sh->mvd_l1_zero_flag = get_bits1(gb);
635 if (s->ps.pps->cabac_init_present_flag)
636 sh->cabac_init_flag = get_bits1(gb);
638 sh->cabac_init_flag = 0;
640 sh->collocated_ref_idx = 0;
641 if (sh->slice_temporal_mvp_enabled_flag) {
642 sh->collocated_list = L0;
643 if (sh->slice_type == B_SLICE)
644 sh->collocated_list = !get_bits1(gb);
646 if (sh->nb_refs[sh->collocated_list] > 1) {
647 sh->collocated_ref_idx = get_ue_golomb_long(gb);
648 if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
649 av_log(s->avctx, AV_LOG_ERROR,
650 "Invalid collocated_ref_idx: %d.\n",
651 sh->collocated_ref_idx);
652 return AVERROR_INVALIDDATA;
657 if ((s->ps.pps->weighted_pred_flag && sh->slice_type == P_SLICE) ||
658 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
659 pred_weight_table(s, gb);
662 sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
663 if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
664 av_log(s->avctx, AV_LOG_ERROR,
665 "Invalid number of merging MVP candidates: %d.\n",
666 sh->max_num_merge_cand);
667 return AVERROR_INVALIDDATA;
671 sh->slice_qp_delta = get_se_golomb(gb);
673 if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
674 sh->slice_cb_qp_offset = get_se_golomb(gb);
675 sh->slice_cr_qp_offset = get_se_golomb(gb);
677 sh->slice_cb_qp_offset = 0;
678 sh->slice_cr_qp_offset = 0;
681 if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
682 sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
684 sh->cu_chroma_qp_offset_enabled_flag = 0;
686 if (s->ps.pps->deblocking_filter_control_present_flag) {
687 int deblocking_filter_override_flag = 0;
689 if (s->ps.pps->deblocking_filter_override_enabled_flag)
690 deblocking_filter_override_flag = get_bits1(gb);
692 if (deblocking_filter_override_flag) {
693 sh->disable_deblocking_filter_flag = get_bits1(gb);
694 if (!sh->disable_deblocking_filter_flag) {
695 sh->beta_offset = get_se_golomb(gb) * 2;
696 sh->tc_offset = get_se_golomb(gb) * 2;
699 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
700 sh->beta_offset = s->ps.pps->beta_offset;
701 sh->tc_offset = s->ps.pps->tc_offset;
704 sh->disable_deblocking_filter_flag = 0;
709 if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
710 (sh->slice_sample_adaptive_offset_flag[0] ||
711 sh->slice_sample_adaptive_offset_flag[1] ||
712 !sh->disable_deblocking_filter_flag)) {
713 sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
715 sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
717 } else if (!s->slice_initialized) {
718 av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
719 return AVERROR_INVALIDDATA;
722 sh->num_entry_point_offsets = 0;
723 if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
724 unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
725 // It would be possible to bound this tighter but this here is simpler
726 if (num_entry_point_offsets > get_bits_left(gb)) {
727 av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
728 return AVERROR_INVALIDDATA;
731 sh->num_entry_point_offsets = num_entry_point_offsets;
732 if (sh->num_entry_point_offsets > 0) {
733 int offset_len = get_ue_golomb_long(gb) + 1;
735 if (offset_len < 1 || offset_len > 32) {
736 sh->num_entry_point_offsets = 0;
737 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
738 return AVERROR_INVALIDDATA;
741 av_freep(&sh->entry_point_offset);
742 av_freep(&sh->offset);
744 sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
745 sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
746 sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
747 if (!sh->entry_point_offset || !sh->offset || !sh->size) {
748 sh->num_entry_point_offsets = 0;
749 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
750 return AVERROR(ENOMEM);
752 for (i = 0; i < sh->num_entry_point_offsets; i++) {
753 unsigned val = get_bits_long(gb, offset_len);
754 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
756 if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
757 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
758 s->threads_number = 1;
760 s->enable_parallel_tiles = 0;
762 s->enable_parallel_tiles = 0;
765 if (s->ps.pps->slice_header_extension_present_flag) {
766 unsigned int length = get_ue_golomb_long(gb);
767 if (length*8LL > get_bits_left(gb)) {
768 av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
769 return AVERROR_INVALIDDATA;
771 for (i = 0; i < length; i++)
772 skip_bits(gb, 8); // slice_header_extension_data_byte
775 // Inferred parameters
776 sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
777 if (sh->slice_qp > 51 ||
778 sh->slice_qp < -s->ps.sps->qp_bd_offset) {
779 av_log(s->avctx, AV_LOG_ERROR,
780 "The slice_qp %d is outside the valid range "
783 -s->ps.sps->qp_bd_offset);
784 return AVERROR_INVALIDDATA;
787 sh->slice_ctb_addr_rs = sh->slice_segment_addr;
789 if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
790 av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
791 return AVERROR_INVALIDDATA;
794 if (get_bits_left(gb) < 0) {
795 av_log(s->avctx, AV_LOG_ERROR,
796 "Overread slice header by %d bits\n", -get_bits_left(gb));
797 return AVERROR_INVALIDDATA;
800 s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
802 if (!s->ps.pps->cu_qp_delta_enabled_flag)
803 s->HEVClc->qp_y = s->sh.slice_qp;
805 s->slice_initialized = 1;
806 s->HEVClc->tu.cu_qp_offset_cb = 0;
807 s->HEVClc->tu.cu_qp_offset_cr = 0;
812 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
814 #define SET_SAO(elem, value) \
816 if (!sao_merge_up_flag && !sao_merge_left_flag) \
818 else if (sao_merge_left_flag) \
819 sao->elem = CTB(s->sao, rx-1, ry).elem; \
820 else if (sao_merge_up_flag) \
821 sao->elem = CTB(s->sao, rx, ry-1).elem; \
826 static void hls_sao_param(HEVCContext *s, int rx, int ry)
828 HEVCLocalContext *lc = s->HEVClc;
829 int sao_merge_left_flag = 0;
830 int sao_merge_up_flag = 0;
831 SAOParams *sao = &CTB(s->sao, rx, ry);
834 if (s->sh.slice_sample_adaptive_offset_flag[0] ||
835 s->sh.slice_sample_adaptive_offset_flag[1]) {
837 if (lc->ctb_left_flag)
838 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
840 if (ry > 0 && !sao_merge_left_flag) {
842 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
846 for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
847 int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
848 s->ps.pps->log2_sao_offset_scale_chroma;
850 if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
851 sao->type_idx[c_idx] = SAO_NOT_APPLIED;
856 sao->type_idx[2] = sao->type_idx[1];
857 sao->eo_class[2] = sao->eo_class[1];
859 SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
862 if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
865 for (i = 0; i < 4; i++)
866 SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
868 if (sao->type_idx[c_idx] == SAO_BAND) {
869 for (i = 0; i < 4; i++) {
870 if (sao->offset_abs[c_idx][i]) {
871 SET_SAO(offset_sign[c_idx][i],
872 ff_hevc_sao_offset_sign_decode(s));
874 sao->offset_sign[c_idx][i] = 0;
877 SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
878 } else if (c_idx != 2) {
879 SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
882 // Inferred parameters
883 sao->offset_val[c_idx][0] = 0;
884 for (i = 0; i < 4; i++) {
885 sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
886 if (sao->type_idx[c_idx] == SAO_EDGE) {
888 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
889 } else if (sao->offset_sign[c_idx][i]) {
890 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
892 sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
900 static int hls_cross_component_pred(HEVCContext *s, int idx) {
901 HEVCLocalContext *lc = s->HEVClc;
902 int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
904 if (log2_res_scale_abs_plus1 != 0) {
905 int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
906 lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
907 (1 - 2 * res_scale_sign_flag);
909 lc->tu.res_scale_val = 0;
916 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
917 int xBase, int yBase, int cb_xBase, int cb_yBase,
918 int log2_cb_size, int log2_trafo_size,
919 int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
921 HEVCLocalContext *lc = s->HEVClc;
922 const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
925 if (lc->cu.pred_mode == MODE_INTRA) {
926 int trafo_size = 1 << log2_trafo_size;
927 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
929 s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
932 if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
933 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
934 int scan_idx = SCAN_DIAG;
935 int scan_idx_c = SCAN_DIAG;
936 int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
937 (s->ps.sps->chroma_format_idc == 2 &&
938 (cbf_cb[1] || cbf_cr[1]));
940 if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
941 lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
942 if (lc->tu.cu_qp_delta != 0)
943 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
944 lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
945 lc->tu.is_cu_qp_delta_coded = 1;
947 if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
948 lc->tu.cu_qp_delta > (25 + s->ps.sps->qp_bd_offset / 2)) {
949 av_log(s->avctx, AV_LOG_ERROR,
950 "The cu_qp_delta %d is outside the valid range "
953 -(26 + s->ps.sps->qp_bd_offset / 2),
954 (25 + s->ps.sps->qp_bd_offset / 2));
955 return AVERROR_INVALIDDATA;
958 ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
961 if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
962 !lc->cu.cu_transquant_bypass_flag && !lc->tu.is_cu_chroma_qp_offset_coded) {
963 int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
964 if (cu_chroma_qp_offset_flag) {
965 int cu_chroma_qp_offset_idx = 0;
966 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
967 cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
968 av_log(s->avctx, AV_LOG_ERROR,
969 "cu_chroma_qp_offset_idx not yet tested.\n");
971 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
972 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
974 lc->tu.cu_qp_offset_cb = 0;
975 lc->tu.cu_qp_offset_cr = 0;
977 lc->tu.is_cu_chroma_qp_offset_coded = 1;
980 if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
981 if (lc->tu.intra_pred_mode >= 6 &&
982 lc->tu.intra_pred_mode <= 14) {
983 scan_idx = SCAN_VERT;
984 } else if (lc->tu.intra_pred_mode >= 22 &&
985 lc->tu.intra_pred_mode <= 30) {
986 scan_idx = SCAN_HORIZ;
989 if (lc->tu.intra_pred_mode_c >= 6 &&
990 lc->tu.intra_pred_mode_c <= 14) {
991 scan_idx_c = SCAN_VERT;
992 } else if (lc->tu.intra_pred_mode_c >= 22 &&
993 lc->tu.intra_pred_mode_c <= 30) {
994 scan_idx_c = SCAN_HORIZ;
1001 ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1002 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1003 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1004 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1005 lc->tu.cross_pf = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1006 (lc->cu.pred_mode == MODE_INTER ||
1007 (lc->tu.chroma_mode_c == 4)));
1009 if (lc->tu.cross_pf) {
1010 hls_cross_component_pred(s, 0);
1012 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1013 if (lc->cu.pred_mode == MODE_INTRA) {
1014 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1015 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1018 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1019 log2_trafo_size_c, scan_idx_c, 1);
1021 if (lc->tu.cross_pf) {
1022 ptrdiff_t stride = s->frame->linesize[1];
1023 int hshift = s->ps.sps->hshift[1];
1024 int vshift = s->ps.sps->vshift[1];
1025 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1026 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1027 int size = 1 << log2_trafo_size_c;
1029 uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1030 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1031 for (i = 0; i < (size * size); i++) {
1032 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1034 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1038 if (lc->tu.cross_pf) {
1039 hls_cross_component_pred(s, 1);
1041 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1042 if (lc->cu.pred_mode == MODE_INTRA) {
1043 ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1044 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1047 ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1048 log2_trafo_size_c, scan_idx_c, 2);
1050 if (lc->tu.cross_pf) {
1051 ptrdiff_t stride = s->frame->linesize[2];
1052 int hshift = s->ps.sps->hshift[2];
1053 int vshift = s->ps.sps->vshift[2];
1054 int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1055 int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2;
1056 int size = 1 << log2_trafo_size_c;
1058 uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1059 ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1060 for (i = 0; i < (size * size); i++) {
1061 coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1063 s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1066 } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1067 int trafo_size_h = 1 << (log2_trafo_size + 1);
1068 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1069 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1070 if (lc->cu.pred_mode == MODE_INTRA) {
1071 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1072 trafo_size_h, trafo_size_v);
1073 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1076 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1077 log2_trafo_size, scan_idx_c, 1);
1079 for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1080 if (lc->cu.pred_mode == MODE_INTRA) {
1081 ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1082 trafo_size_h, trafo_size_v);
1083 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1086 ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1087 log2_trafo_size, scan_idx_c, 2);
1090 } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1091 if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1092 int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1093 int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1094 ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1095 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1096 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1097 if (s->ps.sps->chroma_format_idc == 2) {
1098 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1099 trafo_size_h, trafo_size_v);
1100 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1101 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1103 } else if (blk_idx == 3) {
1104 int trafo_size_h = 1 << (log2_trafo_size + 1);
1105 int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1106 ff_hevc_set_neighbour_available(s, xBase, yBase,
1107 trafo_size_h, trafo_size_v);
1108 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1109 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1110 if (s->ps.sps->chroma_format_idc == 2) {
1111 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1112 trafo_size_h, trafo_size_v);
1113 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1114 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1122 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1124 int cb_size = 1 << log2_cb_size;
1125 int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1127 int min_pu_width = s->ps.sps->min_pu_width;
1128 int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1129 int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1132 for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1133 for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1134 s->is_pcm[i + j * min_pu_width] = 2;
1137 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1138 int xBase, int yBase, int cb_xBase, int cb_yBase,
1139 int log2_cb_size, int log2_trafo_size,
1140 int trafo_depth, int blk_idx,
1141 const int *base_cbf_cb, const int *base_cbf_cr)
1143 HEVCLocalContext *lc = s->HEVClc;
1144 uint8_t split_transform_flag;
1149 cbf_cb[0] = base_cbf_cb[0];
1150 cbf_cb[1] = base_cbf_cb[1];
1151 cbf_cr[0] = base_cbf_cr[0];
1152 cbf_cr[1] = base_cbf_cr[1];
1154 if (lc->cu.intra_split_flag) {
1155 if (trafo_depth == 1) {
1156 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
1157 if (s->ps.sps->chroma_format_idc == 3) {
1158 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1159 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx];
1161 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1162 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1166 lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0];
1167 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1168 lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0];
1171 if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1172 log2_trafo_size > s->ps.sps->log2_min_tb_size &&
1173 trafo_depth < lc->cu.max_trafo_depth &&
1174 !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1175 split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1177 int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1178 lc->cu.pred_mode == MODE_INTER &&
1179 lc->cu.part_mode != PART_2Nx2N &&
1182 split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1183 (lc->cu.intra_split_flag && trafo_depth == 0) ||
1187 if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1188 if (trafo_depth == 0 || cbf_cb[0]) {
1189 cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1190 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1191 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1195 if (trafo_depth == 0 || cbf_cr[0]) {
1196 cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1197 if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1198 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1203 if (split_transform_flag) {
1204 const int trafo_size_split = 1 << (log2_trafo_size - 1);
1205 const int x1 = x0 + trafo_size_split;
1206 const int y1 = y0 + trafo_size_split;
1208 #define SUBDIVIDE(x, y, idx) \
1210 ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1211 log2_trafo_size - 1, trafo_depth + 1, idx, \
1217 SUBDIVIDE(x0, y0, 0);
1218 SUBDIVIDE(x1, y0, 1);
1219 SUBDIVIDE(x0, y1, 2);
1220 SUBDIVIDE(x1, y1, 3);
1224 int min_tu_size = 1 << s->ps.sps->log2_min_tb_size;
1225 int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1226 int min_tu_width = s->ps.sps->min_tb_width;
1229 if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1230 cbf_cb[0] || cbf_cr[0] ||
1231 (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1232 cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1235 ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1236 log2_cb_size, log2_trafo_size,
1237 blk_idx, cbf_luma, cbf_cb, cbf_cr);
1240 // TODO: store cbf_luma somewhere else
1243 for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1244 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1245 int x_tu = (x0 + j) >> log2_min_tu_size;
1246 int y_tu = (y0 + i) >> log2_min_tu_size;
1247 s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1250 if (!s->sh.disable_deblocking_filter_flag) {
1251 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1252 if (s->ps.pps->transquant_bypass_enable_flag &&
1253 lc->cu.cu_transquant_bypass_flag)
1254 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1260 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1262 HEVCLocalContext *lc = s->HEVClc;
1264 int cb_size = 1 << log2_cb_size;
1265 int stride0 = s->frame->linesize[0];
1266 uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1267 int stride1 = s->frame->linesize[1];
1268 uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1269 int stride2 = s->frame->linesize[2];
1270 uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1272 int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1273 (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1274 ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1275 s->ps.sps->pcm.bit_depth_chroma;
1276 const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1279 if (!s->sh.disable_deblocking_filter_flag)
1280 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1282 ret = init_get_bits(&gb, pcm, length);
1286 s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->ps.sps->pcm.bit_depth);
1287 if (s->ps.sps->chroma_format_idc) {
1288 s->hevcdsp.put_pcm(dst1, stride1,
1289 cb_size >> s->ps.sps->hshift[1],
1290 cb_size >> s->ps.sps->vshift[1],
1291 &gb, s->ps.sps->pcm.bit_depth_chroma);
1292 s->hevcdsp.put_pcm(dst2, stride2,
1293 cb_size >> s->ps.sps->hshift[2],
1294 cb_size >> s->ps.sps->vshift[2],
1295 &gb, s->ps.sps->pcm.bit_depth_chroma);
1302 * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1304 * @param s HEVC decoding context
1305 * @param dst target buffer for block data at block position
1306 * @param dststride stride of the dst buffer
1307 * @param ref reference picture buffer at origin (0, 0)
1308 * @param mv motion vector (relative to block position) to get pixel data from
1309 * @param x_off horizontal position of block from origin (0, 0)
1310 * @param y_off vertical position of block from origin (0, 0)
1311 * @param block_w width of block
1312 * @param block_h height of block
1313 * @param luma_weight weighting factor applied to the luma prediction
1314 * @param luma_offset additive offset applied to the luma prediction value
1317 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1318 AVFrame *ref, const Mv *mv, int x_off, int y_off,
1319 int block_w, int block_h, int luma_weight, int luma_offset)
1321 HEVCLocalContext *lc = s->HEVClc;
1322 uint8_t *src = ref->data[0];
1323 ptrdiff_t srcstride = ref->linesize[0];
1324 int pic_width = s->ps.sps->width;
1325 int pic_height = s->ps.sps->height;
1328 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1329 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1330 int idx = ff_hevc_pel_weight[block_w];
1332 x_off += mv->x >> 2;
1333 y_off += mv->y >> 2;
1334 src += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1336 if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1337 x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1338 y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1339 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1340 int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1341 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1343 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1344 edge_emu_stride, srcstride,
1345 block_w + QPEL_EXTRA,
1346 block_h + QPEL_EXTRA,
1347 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1348 pic_width, pic_height);
1349 src = lc->edge_emu_buffer + buf_offset;
1350 srcstride = edge_emu_stride;
1354 s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1355 block_h, mx, my, block_w);
1357 s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1358 block_h, s->sh.luma_log2_weight_denom,
1359 luma_weight, luma_offset, mx, my, block_w);
1363 * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1365 * @param s HEVC decoding context
1366 * @param dst target buffer for block data at block position
1367 * @param dststride stride of the dst buffer
1368 * @param ref0 reference picture0 buffer at origin (0, 0)
1369 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1370 * @param x_off horizontal position of block from origin (0, 0)
1371 * @param y_off vertical position of block from origin (0, 0)
1372 * @param block_w width of block
1373 * @param block_h height of block
1374 * @param ref1 reference picture1 buffer at origin (0, 0)
1375 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1376 * @param current_mv current motion vector structure
1378 static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1379 AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1380 int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1382 HEVCLocalContext *lc = s->HEVClc;
1383 ptrdiff_t src0stride = ref0->linesize[0];
1384 ptrdiff_t src1stride = ref1->linesize[0];
1385 int pic_width = s->ps.sps->width;
1386 int pic_height = s->ps.sps->height;
1387 int mx0 = mv0->x & 3;
1388 int my0 = mv0->y & 3;
1389 int mx1 = mv1->x & 3;
1390 int my1 = mv1->y & 3;
1391 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1392 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1393 int x_off0 = x_off + (mv0->x >> 2);
1394 int y_off0 = y_off + (mv0->y >> 2);
1395 int x_off1 = x_off + (mv1->x >> 2);
1396 int y_off1 = y_off + (mv1->y >> 2);
1397 int idx = ff_hevc_pel_weight[block_w];
1399 uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1400 uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1402 if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1403 x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1404 y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1405 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1406 int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1407 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1409 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1410 edge_emu_stride, src0stride,
1411 block_w + QPEL_EXTRA,
1412 block_h + QPEL_EXTRA,
1413 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1414 pic_width, pic_height);
1415 src0 = lc->edge_emu_buffer + buf_offset;
1416 src0stride = edge_emu_stride;
1419 if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1420 x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1421 y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1422 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1423 int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1424 int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1426 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1427 edge_emu_stride, src1stride,
1428 block_w + QPEL_EXTRA,
1429 block_h + QPEL_EXTRA,
1430 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1431 pic_width, pic_height);
1432 src1 = lc->edge_emu_buffer2 + buf_offset;
1433 src1stride = edge_emu_stride;
1436 s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1437 block_h, mx0, my0, block_w);
1439 s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1440 block_h, mx1, my1, block_w);
1442 s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1443 block_h, s->sh.luma_log2_weight_denom,
1444 s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1445 s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1446 s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1447 s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1453 * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1455 * @param s HEVC decoding context
1456 * @param dst1 target buffer for block data at block position (U plane)
1457 * @param dst2 target buffer for block data at block position (V plane)
1458 * @param dststride stride of the dst1 and dst2 buffers
1459 * @param ref reference picture buffer at origin (0, 0)
1460 * @param mv motion vector (relative to block position) to get pixel data from
1461 * @param x_off horizontal position of block from origin (0, 0)
1462 * @param y_off vertical position of block from origin (0, 0)
1463 * @param block_w width of block
1464 * @param block_h height of block
1465 * @param chroma_weight weighting factor applied to the chroma prediction
1466 * @param chroma_offset additive offset applied to the chroma prediction value
1469 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1470 ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1471 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1473 HEVCLocalContext *lc = s->HEVClc;
1474 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1475 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1476 const Mv *mv = ¤t_mv->mv[reflist];
1477 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1478 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1479 int idx = ff_hevc_pel_weight[block_w];
1480 int hshift = s->ps.sps->hshift[1];
1481 int vshift = s->ps.sps->vshift[1];
1482 intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift);
1483 intptr_t my = av_mod_uintp2(mv->y, 2 + vshift);
1484 intptr_t _mx = mx << (1 - hshift);
1485 intptr_t _my = my << (1 - vshift);
1487 x_off += mv->x >> (2 + hshift);
1488 y_off += mv->y >> (2 + vshift);
1489 src0 += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1491 if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1492 x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1493 y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1494 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1495 int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1496 int buf_offset0 = EPEL_EXTRA_BEFORE *
1497 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1498 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1499 edge_emu_stride, srcstride,
1500 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1501 x_off - EPEL_EXTRA_BEFORE,
1502 y_off - EPEL_EXTRA_BEFORE,
1503 pic_width, pic_height);
1505 src0 = lc->edge_emu_buffer + buf_offset0;
1506 srcstride = edge_emu_stride;
1509 s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1510 block_h, _mx, _my, block_w);
1512 s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1513 block_h, s->sh.chroma_log2_weight_denom,
1514 chroma_weight, chroma_offset, _mx, _my, block_w);
1518 * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1520 * @param s HEVC decoding context
1521 * @param dst target buffer for block data at block position
1522 * @param dststride stride of the dst buffer
1523 * @param ref0 reference picture0 buffer at origin (0, 0)
1524 * @param mv0 motion vector0 (relative to block position) to get pixel data from
1525 * @param x_off horizontal position of block from origin (0, 0)
1526 * @param y_off vertical position of block from origin (0, 0)
1527 * @param block_w width of block
1528 * @param block_h height of block
1529 * @param ref1 reference picture1 buffer at origin (0, 0)
1530 * @param mv1 motion vector1 (relative to block position) to get pixel data from
1531 * @param current_mv current motion vector structure
1532 * @param cidx chroma component(cb, cr)
1534 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1535 int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1537 HEVCLocalContext *lc = s->HEVClc;
1538 uint8_t *src1 = ref0->data[cidx+1];
1539 uint8_t *src2 = ref1->data[cidx+1];
1540 ptrdiff_t src1stride = ref0->linesize[cidx+1];
1541 ptrdiff_t src2stride = ref1->linesize[cidx+1];
1542 int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1543 (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1544 int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1];
1545 int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1];
1546 Mv *mv0 = ¤t_mv->mv[0];
1547 Mv *mv1 = ¤t_mv->mv[1];
1548 int hshift = s->ps.sps->hshift[1];
1549 int vshift = s->ps.sps->vshift[1];
1551 intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1552 intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1553 intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1554 intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1555 intptr_t _mx0 = mx0 << (1 - hshift);
1556 intptr_t _my0 = my0 << (1 - vshift);
1557 intptr_t _mx1 = mx1 << (1 - hshift);
1558 intptr_t _my1 = my1 << (1 - vshift);
1560 int x_off0 = x_off + (mv0->x >> (2 + hshift));
1561 int y_off0 = y_off + (mv0->y >> (2 + vshift));
1562 int x_off1 = x_off + (mv1->x >> (2 + hshift));
1563 int y_off1 = y_off + (mv1->y >> (2 + vshift));
1564 int idx = ff_hevc_pel_weight[block_w];
1565 src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1566 src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1568 if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1569 x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1570 y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1571 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1572 int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1573 int buf_offset1 = EPEL_EXTRA_BEFORE *
1574 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1576 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1577 edge_emu_stride, src1stride,
1578 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1579 x_off0 - EPEL_EXTRA_BEFORE,
1580 y_off0 - EPEL_EXTRA_BEFORE,
1581 pic_width, pic_height);
1583 src1 = lc->edge_emu_buffer + buf_offset1;
1584 src1stride = edge_emu_stride;
1587 if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1588 x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1589 y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1590 const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1591 int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1592 int buf_offset1 = EPEL_EXTRA_BEFORE *
1593 (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1595 s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1596 edge_emu_stride, src2stride,
1597 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1598 x_off1 - EPEL_EXTRA_BEFORE,
1599 y_off1 - EPEL_EXTRA_BEFORE,
1600 pic_width, pic_height);
1602 src2 = lc->edge_emu_buffer2 + buf_offset1;
1603 src2stride = edge_emu_stride;
1606 s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1607 block_h, _mx0, _my0, block_w);
1609 s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1610 src2, src2stride, lc->tmp,
1611 block_h, _mx1, _my1, block_w);
1613 s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1614 src2, src2stride, lc->tmp,
1616 s->sh.chroma_log2_weight_denom,
1617 s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1618 s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1619 s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1620 s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1621 _mx1, _my1, block_w);
1624 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1625 const Mv *mv, int y0, int height)
1627 int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1629 if (s->threads_type == FF_THREAD_FRAME )
1630 ff_thread_await_progress(&ref->tf, y, 0);
1633 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1634 int nPbH, int log2_cb_size, int part_idx,
1635 int merge_idx, MvField *mv)
1637 HEVCLocalContext *lc = s->HEVClc;
1638 enum InterPredIdc inter_pred_idc = PRED_L0;
1641 ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1643 if (s->sh.slice_type == B_SLICE)
1644 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1646 if (inter_pred_idc != PRED_L1) {
1647 if (s->sh.nb_refs[L0])
1648 mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1650 mv->pred_flag = PF_L0;
1651 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1652 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1653 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1654 part_idx, merge_idx, mv, mvp_flag, 0);
1655 mv->mv[0].x += lc->pu.mvd.x;
1656 mv->mv[0].y += lc->pu.mvd.y;
1659 if (inter_pred_idc != PRED_L0) {
1660 if (s->sh.nb_refs[L1])
1661 mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1663 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1664 AV_ZERO32(&lc->pu.mvd);
1666 ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1669 mv->pred_flag += PF_L1;
1670 mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1671 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1672 part_idx, merge_idx, mv, mvp_flag, 1);
1673 mv->mv[1].x += lc->pu.mvd.x;
1674 mv->mv[1].y += lc->pu.mvd.y;
1678 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1680 int log2_cb_size, int partIdx, int idx)
1682 #define POS(c_idx, x, y) \
1683 &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1684 (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1685 HEVCLocalContext *lc = s->HEVClc;
1687 struct MvField current_mv = {{{ 0 }}};
1689 int min_pu_width = s->ps.sps->min_pu_width;
1691 MvField *tab_mvf = s->ref->tab_mvf;
1692 RefPicList *refPicList = s->ref->refPicList;
1693 HEVCFrame *ref0 = NULL, *ref1 = NULL;
1694 uint8_t *dst0 = POS(0, x0, y0);
1695 uint8_t *dst1 = POS(1, x0, y0);
1696 uint8_t *dst2 = POS(2, x0, y0);
1697 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1698 int min_cb_width = s->ps.sps->min_cb_width;
1699 int x_cb = x0 >> log2_min_cb_size;
1700 int y_cb = y0 >> log2_min_cb_size;
1704 int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1707 lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1709 if (skip_flag || lc->pu.merge_flag) {
1710 if (s->sh.max_num_merge_cand > 1)
1711 merge_idx = ff_hevc_merge_idx_decode(s);
1715 ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1716 partIdx, merge_idx, ¤t_mv);
1718 hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1719 partIdx, merge_idx, ¤t_mv);
1722 x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1723 y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1725 for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1726 for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1727 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1729 if (current_mv.pred_flag & PF_L0) {
1730 ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1733 hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH);
1735 if (current_mv.pred_flag & PF_L1) {
1736 ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1739 hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH);
1742 if (current_mv.pred_flag == PF_L0) {
1743 int x0_c = x0 >> s->ps.sps->hshift[1];
1744 int y0_c = y0 >> s->ps.sps->vshift[1];
1745 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1746 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1748 luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1749 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1750 s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1751 s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1753 if (s->ps.sps->chroma_format_idc) {
1754 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1755 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1756 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1757 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1758 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1759 s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1761 } else if (current_mv.pred_flag == PF_L1) {
1762 int x0_c = x0 >> s->ps.sps->hshift[1];
1763 int y0_c = y0 >> s->ps.sps->vshift[1];
1764 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1765 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1767 luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1768 ¤t_mv.mv[1], x0, y0, nPbW, nPbH,
1769 s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1770 s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1772 if (s->ps.sps->chroma_format_idc) {
1773 chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1774 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1775 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1777 chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1778 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv,
1779 s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1781 } else if (current_mv.pred_flag == PF_BI) {
1782 int x0_c = x0 >> s->ps.sps->hshift[1];
1783 int y0_c = y0 >> s->ps.sps->vshift[1];
1784 int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1785 int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1787 luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1788 ¤t_mv.mv[0], x0, y0, nPbW, nPbH,
1789 ref1->frame, ¤t_mv.mv[1], ¤t_mv);
1791 if (s->ps.sps->chroma_format_idc) {
1792 chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1793 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0);
1795 chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1796 x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1);
1804 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1805 int prev_intra_luma_pred_flag)
1807 HEVCLocalContext *lc = s->HEVClc;
1808 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1809 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1810 int min_pu_width = s->ps.sps->min_pu_width;
1811 int size_in_pus = pu_size >> s->ps.sps->log2_min_pu_size;
1812 int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1813 int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1815 int cand_up = (lc->ctb_up_flag || y0b) ?
1816 s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1817 int cand_left = (lc->ctb_left_flag || x0b) ?
1818 s->tab_ipm[y_pu * min_pu_width + x_pu - 1] : INTRA_DC;
1820 int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1822 MvField *tab_mvf = s->ref->tab_mvf;
1823 int intra_pred_mode;
1827 // intra_pred_mode prediction does not cross vertical CTB boundaries
1828 if ((y0 - 1) < y_ctb)
1831 if (cand_left == cand_up) {
1832 if (cand_left < 2) {
1833 candidate[0] = INTRA_PLANAR;
1834 candidate[1] = INTRA_DC;
1835 candidate[2] = INTRA_ANGULAR_26;
1837 candidate[0] = cand_left;
1838 candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1839 candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1842 candidate[0] = cand_left;
1843 candidate[1] = cand_up;
1844 if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1845 candidate[2] = INTRA_PLANAR;
1846 } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1847 candidate[2] = INTRA_DC;
1849 candidate[2] = INTRA_ANGULAR_26;
1853 if (prev_intra_luma_pred_flag) {
1854 intra_pred_mode = candidate[lc->pu.mpm_idx];
1856 if (candidate[0] > candidate[1])
1857 FFSWAP(uint8_t, candidate[0], candidate[1]);
1858 if (candidate[0] > candidate[2])
1859 FFSWAP(uint8_t, candidate[0], candidate[2]);
1860 if (candidate[1] > candidate[2])
1861 FFSWAP(uint8_t, candidate[1], candidate[2]);
1863 intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1864 for (i = 0; i < 3; i++)
1865 if (intra_pred_mode >= candidate[i])
1869 /* write the intra prediction units into the mv array */
1872 for (i = 0; i < size_in_pus; i++) {
1873 memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1874 intra_pred_mode, size_in_pus);
1876 for (j = 0; j < size_in_pus; j++) {
1877 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1881 return intra_pred_mode;
1884 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1885 int log2_cb_size, int ct_depth)
1887 int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1888 int x_cb = x0 >> s->ps.sps->log2_min_cb_size;
1889 int y_cb = y0 >> s->ps.sps->log2_min_cb_size;
1892 for (y = 0; y < length; y++)
1893 memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1897 static const uint8_t tab_mode_idx[] = {
1898 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
1899 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1901 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1904 HEVCLocalContext *lc = s->HEVClc;
1905 static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1906 uint8_t prev_intra_luma_pred_flag[4];
1907 int split = lc->cu.part_mode == PART_NxN;
1908 int pb_size = (1 << log2_cb_size) >> split;
1909 int side = split + 1;
1913 for (i = 0; i < side; i++)
1914 for (j = 0; j < side; j++)
1915 prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1917 for (i = 0; i < side; i++) {
1918 for (j = 0; j < side; j++) {
1919 if (prev_intra_luma_pred_flag[2 * i + j])
1920 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1922 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1924 lc->pu.intra_pred_mode[2 * i + j] =
1925 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1926 prev_intra_luma_pred_flag[2 * i + j]);
1930 if (s->ps.sps->chroma_format_idc == 3) {
1931 for (i = 0; i < side; i++) {
1932 for (j = 0; j < side; j++) {
1933 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1934 if (chroma_mode != 4) {
1935 if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1936 lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1938 lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1940 lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1944 } else if (s->ps.sps->chroma_format_idc == 2) {
1946 lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1947 if (chroma_mode != 4) {
1948 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1951 mode_idx = intra_chroma_table[chroma_mode];
1953 mode_idx = lc->pu.intra_pred_mode[0];
1955 lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1956 } else if (s->ps.sps->chroma_format_idc != 0) {
1957 chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1958 if (chroma_mode != 4) {
1959 if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1960 lc->pu.intra_pred_mode_c[0] = 34;
1962 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1964 lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1969 static void intra_prediction_unit_default_value(HEVCContext *s,
1973 HEVCLocalContext *lc = s->HEVClc;
1974 int pb_size = 1 << log2_cb_size;
1975 int size_in_pus = pb_size >> s->ps.sps->log2_min_pu_size;
1976 int min_pu_width = s->ps.sps->min_pu_width;
1977 MvField *tab_mvf = s->ref->tab_mvf;
1978 int x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1979 int y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1982 if (size_in_pus == 0)
1984 for (j = 0; j < size_in_pus; j++)
1985 memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1986 if (lc->cu.pred_mode == MODE_INTRA)
1987 for (j = 0; j < size_in_pus; j++)
1988 for (k = 0; k < size_in_pus; k++)
1989 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1992 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1994 int cb_size = 1 << log2_cb_size;
1995 HEVCLocalContext *lc = s->HEVClc;
1996 int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1997 int length = cb_size >> log2_min_cb_size;
1998 int min_cb_width = s->ps.sps->min_cb_width;
1999 int x_cb = x0 >> log2_min_cb_size;
2000 int y_cb = y0 >> log2_min_cb_size;
2001 int idx = log2_cb_size - 2;
2002 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2007 lc->cu.pred_mode = MODE_INTRA;
2008 lc->cu.part_mode = PART_2Nx2N;
2009 lc->cu.intra_split_flag = 0;
2011 SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2012 for (x = 0; x < 4; x++)
2013 lc->pu.intra_pred_mode[x] = 1;
2014 if (s->ps.pps->transquant_bypass_enable_flag) {
2015 lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2016 if (lc->cu.cu_transquant_bypass_flag)
2017 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2019 lc->cu.cu_transquant_bypass_flag = 0;
2021 if (s->sh.slice_type != I_SLICE) {
2022 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2024 x = y_cb * min_cb_width + x_cb;
2025 for (y = 0; y < length; y++) {
2026 memset(&s->skip_flag[x], skip_flag, length);
2029 lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2031 x = y_cb * min_cb_width + x_cb;
2032 for (y = 0; y < length; y++) {
2033 memset(&s->skip_flag[x], 0, length);
2038 if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2039 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2040 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2042 if (!s->sh.disable_deblocking_filter_flag)
2043 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2047 if (s->sh.slice_type != I_SLICE)
2048 lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2049 if (lc->cu.pred_mode != MODE_INTRA ||
2050 log2_cb_size == s->ps.sps->log2_min_cb_size) {
2051 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size);
2052 lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2053 lc->cu.pred_mode == MODE_INTRA;
2056 if (lc->cu.pred_mode == MODE_INTRA) {
2057 if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2058 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2059 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2060 pcm_flag = ff_hevc_pcm_flag_decode(s);
2063 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2064 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2065 if (s->ps.sps->pcm.loop_filter_disable_flag)
2066 set_deblocking_bypass(s, x0, y0, log2_cb_size);
2071 intra_prediction_unit(s, x0, y0, log2_cb_size);
2074 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2075 switch (lc->cu.part_mode) {
2077 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2080 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx);
2081 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2084 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2085 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2088 hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx);
2089 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2092 hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2093 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx);
2096 hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2);
2097 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2100 hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2101 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2);
2104 hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2105 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2106 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2107 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2113 int rqt_root_cbf = 1;
2115 if (lc->cu.pred_mode != MODE_INTRA &&
2116 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2117 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2120 const static int cbf[2] = { 0 };
2121 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2122 s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2123 s->ps.sps->max_transform_hierarchy_depth_inter;
2124 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2126 log2_cb_size, 0, 0, cbf, cbf);
2130 if (!s->sh.disable_deblocking_filter_flag)
2131 ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2136 if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2137 ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2139 x = y_cb * min_cb_width + x_cb;
2140 for (y = 0; y < length; y++) {
2141 memset(&s->qp_y_tab[x], lc->qp_y, length);
2145 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2146 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2147 lc->qPy_pred = lc->qp_y;
2150 set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2155 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2156 int log2_cb_size, int cb_depth)
2158 HEVCLocalContext *lc = s->HEVClc;
2159 const int cb_size = 1 << log2_cb_size;
2163 lc->ct_depth = cb_depth;
2164 if (x0 + cb_size <= s->ps.sps->width &&
2165 y0 + cb_size <= s->ps.sps->height &&
2166 log2_cb_size > s->ps.sps->log2_min_cb_size) {
2167 split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2169 split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2171 if (s->ps.pps->cu_qp_delta_enabled_flag &&
2172 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2173 lc->tu.is_cu_qp_delta_coded = 0;
2174 lc->tu.cu_qp_delta = 0;
2177 if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2178 log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2179 lc->tu.is_cu_chroma_qp_offset_coded = 0;
2183 int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2184 const int cb_size_split = cb_size >> 1;
2185 const int x1 = x0 + cb_size_split;
2186 const int y1 = y0 + cb_size_split;
2190 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2194 if (more_data && x1 < s->ps.sps->width) {
2195 more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2199 if (more_data && y1 < s->ps.sps->height) {
2200 more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2204 if (more_data && x1 < s->ps.sps->width &&
2205 y1 < s->ps.sps->height) {
2206 more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2211 if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2212 ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2213 lc->qPy_pred = lc->qp_y;
2216 return ((x1 + cb_size_split) < s->ps.sps->width ||
2217 (y1 + cb_size_split) < s->ps.sps->height);
2221 ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2224 if ((!((x0 + cb_size) %
2225 (1 << (s->ps.sps->log2_ctb_size))) ||
2226 (x0 + cb_size >= s->ps.sps->width)) &&
2228 (1 << (s->ps.sps->log2_ctb_size))) ||
2229 (y0 + cb_size >= s->ps.sps->height))) {
2230 int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2231 return !end_of_slice_flag;
2240 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2243 HEVCLocalContext *lc = s->HEVClc;
2244 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2245 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2246 int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2248 s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2250 if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2251 if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2252 lc->first_qp_group = 1;
2253 lc->end_of_tiles_x = s->ps.sps->width;
2254 } else if (s->ps.pps->tiles_enabled_flag) {
2255 if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2256 int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2257 lc->end_of_tiles_x = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2258 lc->first_qp_group = 1;
2261 lc->end_of_tiles_x = s->ps.sps->width;
2264 lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2266 lc->boundary_flags = 0;
2267 if (s->ps.pps->tiles_enabled_flag) {
2268 if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2269 lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2270 if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2271 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2272 if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2273 lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2274 if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2275 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2277 if (ctb_addr_in_slice <= 0)
2278 lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2279 if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2280 lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2283 lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2284 lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2285 lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2286 lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2289 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2291 HEVCContext *s = avctxt->priv_data;
2292 int ctb_size = 1 << s->ps.sps->log2_ctb_size;
2296 int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2298 if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2299 av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2300 return AVERROR_INVALIDDATA;
2303 if (s->sh.dependent_slice_segment_flag) {
2304 int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2305 if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2306 av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2307 return AVERROR_INVALIDDATA;
2311 while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2312 int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2314 x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2315 y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2316 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2318 ff_hevc_cabac_init(s, ctb_addr_ts);
2320 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2322 s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2323 s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset;
2324 s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag;
2326 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2327 if (more_data < 0) {
2328 s->tab_slice_address[ctb_addr_rs] = -1;
2334 ff_hevc_save_states(s, ctb_addr_ts);
2335 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2338 if (x_ctb + ctb_size >= s->ps.sps->width &&
2339 y_ctb + ctb_size >= s->ps.sps->height)
2340 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2345 static int hls_slice_data(HEVCContext *s)
2353 s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2356 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2358 HEVCContext *s1 = avctxt->priv_data, *s;
2359 HEVCLocalContext *lc;
2360 int ctb_size = 1<< s1->ps.sps->log2_ctb_size;
2362 int *ctb_row_p = input_ctb_row;
2363 int ctb_row = ctb_row_p[job];
2364 int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2365 int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2366 int thread = ctb_row % s1->threads_number;
2369 s = s1->sList[self_id];
2373 ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2377 ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2380 while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2381 int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2382 int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2384 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2386 ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2388 if (avpriv_atomic_int_get(&s1->wpp_err)){
2389 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2393 ff_hevc_cabac_init(s, ctb_addr_ts);
2394 hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2395 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2397 if (more_data < 0) {
2398 s->tab_slice_address[ctb_addr_rs] = -1;
2404 ff_hevc_save_states(s, ctb_addr_ts);
2405 ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2406 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2408 if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2409 avpriv_atomic_int_set(&s1->wpp_err, 1);
2410 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2414 if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2415 ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2416 ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2419 ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2422 if(x_ctb >= s->ps.sps->width) {
2426 ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2431 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2433 const uint8_t *data = nal->data;
2434 int length = nal->size;
2435 HEVCLocalContext *lc = s->HEVClc;
2436 int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2437 int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2439 int startheader, cmpt = 0;
2445 return AVERROR(ENOMEM);
2450 ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2453 for (i = 1; i < s->threads_number; i++) {
2454 s->sList[i] = av_malloc(sizeof(HEVCContext));
2455 memcpy(s->sList[i], s, sizeof(HEVCContext));
2456 s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2457 s->sList[i]->HEVClc = s->HEVClcList[i];
2461 offset = (lc->gb.index >> 3);
2463 for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2464 if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2470 for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2471 offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2472 for (j = 0, cmpt = 0, startheader = offset
2473 + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2474 if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2479 s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2480 s->sh.offset[i - 1] = offset;
2483 if (s->sh.num_entry_point_offsets != 0) {
2484 offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2485 s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2486 s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2491 for (i = 1; i < s->threads_number; i++) {
2492 s->sList[i]->HEVClc->first_qp_group = 1;
2493 s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2494 memcpy(s->sList[i], s, sizeof(HEVCContext));
2495 s->sList[i]->HEVClc = s->HEVClcList[i];
2498 avpriv_atomic_int_set(&s->wpp_err, 0);
2499 ff_reset_entries(s->avctx);
2501 for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2506 if (s->ps.pps->entropy_coding_sync_enabled_flag)
2507 s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2509 for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2516 static int set_side_data(HEVCContext *s)
2518 AVFrame *out = s->ref->frame;
2520 if (s->sei_frame_packing_present &&
2521 s->frame_packing_arrangement_type >= 3 &&
2522 s->frame_packing_arrangement_type <= 5 &&
2523 s->content_interpretation_type > 0 &&
2524 s->content_interpretation_type < 3) {
2525 AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2527 return AVERROR(ENOMEM);
2529 switch (s->frame_packing_arrangement_type) {
2531 if (s->quincunx_subsampling)
2532 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2534 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2537 stereo->type = AV_STEREO3D_TOPBOTTOM;
2540 stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2544 if (s->content_interpretation_type == 2)
2545 stereo->flags = AV_STEREO3D_FLAG_INVERT;
2548 if (s->sei_display_orientation_present &&
2549 (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2550 double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2551 AVFrameSideData *rotation = av_frame_new_side_data(out,
2552 AV_FRAME_DATA_DISPLAYMATRIX,
2553 sizeof(int32_t) * 9);
2555 return AVERROR(ENOMEM);
2557 av_display_rotation_set((int32_t *)rotation->data, angle);
2558 av_display_matrix_flip((int32_t *)rotation->data,
2559 s->sei_hflip, s->sei_vflip);
2565 static int hevc_frame_start(HEVCContext *s)
2567 HEVCLocalContext *lc = s->HEVClc;
2568 int pic_size_in_ctb = ((s->ps.sps->width >> s->ps.sps->log2_min_cb_size) + 1) *
2569 ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2572 memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2573 memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
2574 memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2575 memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2576 memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2579 s->first_nal_type = s->nal_unit_type;
2581 if (s->ps.pps->tiles_enabled_flag)
2582 lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2584 ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2588 ret = ff_hevc_frame_rps(s);
2590 av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2594 s->ref->frame->key_frame = IS_IRAP(s);
2596 ret = set_side_data(s);
2600 s->frame->pict_type = 3 - s->sh.slice_type;
2603 ff_hevc_bump_frame(s);
2605 av_frame_unref(s->output_frame);
2606 ret = ff_hevc_output_frame(s, s->output_frame, 0);
2610 if (!s->avctx->hwaccel)
2611 ff_thread_finish_setup(s->avctx);
2617 ff_hevc_unref_frame(s, s->ref, ~0);
2622 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2624 HEVCLocalContext *lc = s->HEVClc;
2625 GetBitContext *gb = &lc->gb;
2626 int ctb_addr_ts, ret;
2629 s->nal_unit_type = nal->type;
2630 s->temporal_id = nal->temporal_id;
2632 switch (s->nal_unit_type) {
2634 ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2639 ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2640 s->apply_defdispwin);
2645 ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2649 case NAL_SEI_PREFIX:
2650 case NAL_SEI_SUFFIX:
2651 ret = ff_hevc_decode_nal_sei(s);
2662 case NAL_BLA_W_RADL:
2664 case NAL_IDR_W_RADL:
2671 ret = hls_slice_header(s);
2675 if (s->max_ra == INT_MAX) {
2676 if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2680 s->max_ra = INT_MIN;
2684 if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2685 s->poc <= s->max_ra) {
2689 if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2690 s->max_ra = INT_MIN;
2693 if (s->sh.first_slice_in_pic_flag) {
2694 ret = hevc_frame_start(s);
2697 } else if (!s->ref) {
2698 av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2702 if (s->nal_unit_type != s->first_nal_type) {
2703 av_log(s->avctx, AV_LOG_ERROR,
2704 "Non-matching NAL types of the VCL NALUs: %d %d\n",
2705 s->first_nal_type, s->nal_unit_type);
2706 return AVERROR_INVALIDDATA;
2709 if (!s->sh.dependent_slice_segment_flag &&
2710 s->sh.slice_type != I_SLICE) {
2711 ret = ff_hevc_slice_rpl(s);
2713 av_log(s->avctx, AV_LOG_WARNING,
2714 "Error constructing the reference lists for the current slice.\n");
2719 if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2720 ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2725 if (s->avctx->hwaccel) {
2726 ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2730 if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2731 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2733 ctb_addr_ts = hls_slice_data(s);
2734 if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2738 if (ctb_addr_ts < 0) {
2746 s->seq_decode = (s->seq_decode + 1) & 0xff;
2747 s->max_ra = INT_MAX;
2753 av_log(s->avctx, AV_LOG_INFO,
2754 "Skipping NAL unit %d\n", s->nal_unit_type);
2759 if (s->avctx->err_recognition & AV_EF_EXPLODE)
2764 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2769 s->last_eos = s->eos;
2772 /* split the input packet into NAL units, so we know the upper bound on the
2773 * number of slices in the frame */
2774 ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2775 s->nal_length_size);
2777 av_log(s->avctx, AV_LOG_ERROR,
2778 "Error splitting the input into NAL units.\n");
2782 for (i = 0; i < s->pkt.nb_nals; i++) {
2783 if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2784 s->pkt.nals[i].type == NAL_EOS_NUT)
2788 /* decode the NAL units */
2789 for (i = 0; i < s->pkt.nb_nals; i++) {
2790 ret = decode_nal_unit(s, &s->pkt.nals[i]);
2792 av_log(s->avctx, AV_LOG_WARNING,
2793 "Error parsing NAL unit #%d.\n", i);
2799 if (s->ref && s->threads_type == FF_THREAD_FRAME)
2800 ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2805 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2808 for (i = 0; i < 16; i++)
2809 av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2812 static int verify_md5(HEVCContext *s, AVFrame *frame)
2814 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2819 return AVERROR(EINVAL);
2821 pixel_shift = desc->comp[0].depth_minus1 > 7;
2823 av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2826 /* the checksums are LE, so we have to byteswap for >8bpp formats
2829 if (pixel_shift && !s->checksum_buf) {
2830 av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2831 FFMAX3(frame->linesize[0], frame->linesize[1],
2832 frame->linesize[2]));
2833 if (!s->checksum_buf)
2834 return AVERROR(ENOMEM);
2838 for (i = 0; frame->data[i]; i++) {
2839 int width = s->avctx->coded_width;
2840 int height = s->avctx->coded_height;
2841 int w = (i == 1 || i == 2) ? (width >> desc->log2_chroma_w) : width;
2842 int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2845 av_md5_init(s->md5_ctx);
2846 for (j = 0; j < h; j++) {
2847 const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2850 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2851 (const uint16_t *) src, w);
2852 src = s->checksum_buf;
2855 av_md5_update(s->md5_ctx, src, w << pixel_shift);
2857 av_md5_final(s->md5_ctx, md5);
2859 if (!memcmp(md5, s->md5[i], 16)) {
2860 av_log (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2861 print_md5(s->avctx, AV_LOG_DEBUG, md5);
2862 av_log (s->avctx, AV_LOG_DEBUG, "; ");
2864 av_log (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2865 print_md5(s->avctx, AV_LOG_ERROR, md5);
2866 av_log (s->avctx, AV_LOG_ERROR, " != ");
2867 print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2868 av_log (s->avctx, AV_LOG_ERROR, "\n");
2869 return AVERROR_INVALIDDATA;
2873 av_log(s->avctx, AV_LOG_DEBUG, "\n");
2878 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2882 HEVCContext *s = avctx->priv_data;
2885 ret = ff_hevc_output_frame(s, data, 1);
2894 ret = decode_nal_units(s, avpkt->data, avpkt->size);
2898 if (avctx->hwaccel) {
2899 if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2900 av_log(avctx, AV_LOG_ERROR,
2901 "hardware accelerator failed to decode picture\n");
2903 /* verify the SEI checksum */
2904 if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2906 ret = verify_md5(s, s->ref->frame);
2907 if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2908 ff_hevc_unref_frame(s, s->ref, ~0);
2915 if (s->is_decoded) {
2916 av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2920 if (s->output_frame->buf[0]) {
2921 av_frame_move_ref(data, s->output_frame);
2928 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2932 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2936 dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2937 if (!dst->tab_mvf_buf)
2939 dst->tab_mvf = src->tab_mvf;
2941 dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2942 if (!dst->rpl_tab_buf)
2944 dst->rpl_tab = src->rpl_tab;
2946 dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2950 dst->poc = src->poc;
2951 dst->ctb_count = src->ctb_count;
2952 dst->window = src->window;
2953 dst->flags = src->flags;
2954 dst->sequence = src->sequence;
2956 if (src->hwaccel_picture_private) {
2957 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2958 if (!dst->hwaccel_priv_buf)
2960 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2965 ff_hevc_unref_frame(s, dst, ~0);
2966 return AVERROR(ENOMEM);
2969 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2971 HEVCContext *s = avctx->priv_data;
2976 av_freep(&s->md5_ctx);
2978 av_freep(&s->cabac_state);
2980 for (i = 0; i < 3; i++) {
2981 av_freep(&s->sao_pixel_buffer_h[i]);
2982 av_freep(&s->sao_pixel_buffer_v[i]);
2984 av_frame_free(&s->output_frame);
2986 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2987 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2988 av_frame_free(&s->DPB[i].frame);
2991 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2992 av_buffer_unref(&s->ps.vps_list[i]);
2993 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2994 av_buffer_unref(&s->ps.sps_list[i]);
2995 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2996 av_buffer_unref(&s->ps.pps_list[i]);
3001 av_freep(&s->sh.entry_point_offset);
3002 av_freep(&s->sh.offset);
3003 av_freep(&s->sh.size);
3005 for (i = 1; i < s->threads_number; i++) {
3006 HEVCLocalContext *lc = s->HEVClcList[i];
3008 av_freep(&s->HEVClcList[i]);
3009 av_freep(&s->sList[i]);
3012 if (s->HEVClc == s->HEVClcList[0])
3014 av_freep(&s->HEVClcList[0]);
3016 for (i = 0; i < s->pkt.nals_allocated; i++) {
3017 av_freep(&s->pkt.nals[i].rbsp_buffer);
3018 av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3020 av_freep(&s->pkt.nals);
3021 s->pkt.nals_allocated = 0;
3026 static av_cold int hevc_init_context(AVCodecContext *avctx)
3028 HEVCContext *s = avctx->priv_data;
3033 s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3036 s->HEVClcList[0] = s->HEVClc;
3039 s->cabac_state = av_malloc(HEVC_CONTEXTS);
3040 if (!s->cabac_state)
3043 s->output_frame = av_frame_alloc();
3044 if (!s->output_frame)
3047 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3048 s->DPB[i].frame = av_frame_alloc();
3049 if (!s->DPB[i].frame)
3051 s->DPB[i].tf.f = s->DPB[i].frame;
3054 s->max_ra = INT_MAX;
3056 s->md5_ctx = av_md5_alloc();
3060 ff_bswapdsp_init(&s->bdsp);
3062 s->context_initialized = 1;
3068 hevc_decode_free(avctx);
3069 return AVERROR(ENOMEM);
3072 static int hevc_update_thread_context(AVCodecContext *dst,
3073 const AVCodecContext *src)
3075 HEVCContext *s = dst->priv_data;
3076 HEVCContext *s0 = src->priv_data;
3079 if (!s->context_initialized) {
3080 ret = hevc_init_context(dst);
3085 for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3086 ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3087 if (s0->DPB[i].frame->buf[0]) {
3088 ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3094 if (s->ps.sps != s0->ps.sps)
3096 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3097 av_buffer_unref(&s->ps.vps_list[i]);
3098 if (s0->ps.vps_list[i]) {
3099 s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3100 if (!s->ps.vps_list[i])
3101 return AVERROR(ENOMEM);
3105 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3106 av_buffer_unref(&s->ps.sps_list[i]);
3107 if (s0->ps.sps_list[i]) {
3108 s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3109 if (!s->ps.sps_list[i])
3110 return AVERROR(ENOMEM);
3114 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3115 av_buffer_unref(&s->ps.pps_list[i]);
3116 if (s0->ps.pps_list[i]) {
3117 s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3118 if (!s->ps.pps_list[i])
3119 return AVERROR(ENOMEM);
3123 if (s->ps.sps != s0->ps.sps)
3124 if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3127 s->seq_decode = s0->seq_decode;
3128 s->seq_output = s0->seq_output;
3129 s->pocTid0 = s0->pocTid0;
3130 s->max_ra = s0->max_ra;
3133 s->is_nalff = s0->is_nalff;
3134 s->nal_length_size = s0->nal_length_size;
3136 s->threads_number = s0->threads_number;
3137 s->threads_type = s0->threads_type;
3140 s->seq_decode = (s->seq_decode + 1) & 0xff;
3141 s->max_ra = INT_MAX;
3147 static int hevc_decode_extradata(HEVCContext *s)
3149 AVCodecContext *avctx = s->avctx;
3153 bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3155 if (avctx->extradata_size > 3 &&
3156 (avctx->extradata[0] || avctx->extradata[1] ||
3157 avctx->extradata[2] > 1)) {
3158 /* It seems the extradata is encoded as hvcC format.
3159 * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3160 * is finalized. When finalized, configurationVersion will be 1 and we
3161 * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3162 int i, j, num_arrays, nal_len_size;
3166 bytestream2_skip(&gb, 21);
3167 nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3168 num_arrays = bytestream2_get_byte(&gb);
3170 /* nal units in the hvcC always have length coded with 2 bytes,
3171 * so put a fake nal_length_size = 2 while parsing them */
3172 s->nal_length_size = 2;
3174 /* Decode nal units from hvcC. */
3175 for (i = 0; i < num_arrays; i++) {
3176 int type = bytestream2_get_byte(&gb) & 0x3f;
3177 int cnt = bytestream2_get_be16(&gb);
3179 for (j = 0; j < cnt; j++) {
3180 // +2 for the nal size field
3181 int nalsize = bytestream2_peek_be16(&gb) + 2;
3182 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3183 av_log(s->avctx, AV_LOG_ERROR,
3184 "Invalid NAL unit size in extradata.\n");
3185 return AVERROR_INVALIDDATA;
3188 ret = decode_nal_units(s, gb.buffer, nalsize);
3190 av_log(avctx, AV_LOG_ERROR,
3191 "Decoding nal unit %d %d from hvcC failed\n",
3195 bytestream2_skip(&gb, nalsize);
3199 /* Now store right nal length size, that will be used to parse
3201 s->nal_length_size = nal_len_size;
3204 ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3209 /* export stream parameters from the first SPS */
3210 for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3211 if (s->ps.sps_list[i]) {
3212 const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3213 export_stream_params(s->avctx, &s->ps, sps);
3221 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3223 HEVCContext *s = avctx->priv_data;
3226 ff_init_cabac_states();
3228 avctx->internal->allocate_progress = 1;
3230 ret = hevc_init_context(avctx);
3234 s->enable_parallel_tiles = 0;
3235 s->picture_struct = 0;
3237 if(avctx->active_thread_type & FF_THREAD_SLICE)
3238 s->threads_number = avctx->thread_count;
3240 s->threads_number = 1;
3242 if (avctx->extradata_size > 0 && avctx->extradata) {
3243 ret = hevc_decode_extradata(s);
3245 hevc_decode_free(avctx);
3250 if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3251 s->threads_type = FF_THREAD_FRAME;
3253 s->threads_type = FF_THREAD_SLICE;
3258 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3260 HEVCContext *s = avctx->priv_data;
3263 memset(s, 0, sizeof(*s));
3265 ret = hevc_init_context(avctx);
3272 static void hevc_decode_flush(AVCodecContext *avctx)
3274 HEVCContext *s = avctx->priv_data;
3275 ff_hevc_flush_dpb(s);
3276 s->max_ra = INT_MAX;
3279 #define OFFSET(x) offsetof(HEVCContext, x)
3280 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3282 static const AVProfile profiles[] = {
3283 { FF_PROFILE_HEVC_MAIN, "Main" },
3284 { FF_PROFILE_HEVC_MAIN_10, "Main 10" },
3285 { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" },
3286 { FF_PROFILE_HEVC_REXT, "Rext" },
3287 { FF_PROFILE_UNKNOWN },
3290 static const AVOption options[] = {
3291 { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3292 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3293 { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3294 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3298 static const AVClass hevc_decoder_class = {
3299 .class_name = "HEVC decoder",
3300 .item_name = av_default_item_name,
3302 .version = LIBAVUTIL_VERSION_INT,
3305 AVCodec ff_hevc_decoder = {
3307 .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3308 .type = AVMEDIA_TYPE_VIDEO,
3309 .id = AV_CODEC_ID_HEVC,
3310 .priv_data_size = sizeof(HEVCContext),
3311 .priv_class = &hevc_decoder_class,
3312 .init = hevc_decode_init,
3313 .close = hevc_decode_free,
3314 .decode = hevc_decode_frame,
3315 .flush = hevc_decode_flush,
3316 .update_thread_context = hevc_update_thread_context,
3317 .init_thread_copy = hevc_init_thread_copy,
3318 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3319 CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3320 .profiles = NULL_IF_CONFIG_SMALL(profiles),