X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fhevc.c;h=4c06fb8d620ab5ff886ddef0939178e6e5495a58;hb=338ed3ed33c412c2828446c4e2a76949161fec6a;hp=b7e7757fae088b807bbf5109df6f97ec6f4b7711;hpb=b769cf4b44c8112827c2fdfcab74bd95600fd6d3;p=ffmpeg diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index b7e7757fae0..4c06fb8d620 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -25,15 +25,16 @@ #include "libavutil/attributes.h" #include "libavutil/common.h" +#include "libavutil/display.h" #include "libavutil/internal.h" #include "libavutil/md5.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "libavutil/stereo3d.h" +#include "bswapdsp.h" #include "bytestream.h" #include "cabac_functions.h" -#include "dsputil.h" #include "golomb.h" #include "hevc.h" @@ -165,7 +166,6 @@ static void pic_arrays_free(HEVCContext *s) { av_freep(&s->sao); av_freep(&s->deblock); - av_freep(&s->split_cu_flag); av_freep(&s->skip_flag); av_freep(&s->tab_ct_depth); @@ -191,7 +191,6 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) int log2_min_cb_size = sps->log2_min_cb_size; int width = sps->width; int height = sps->height; - int pic_size = width * height; int pic_size_in_ctb = ((width >> log2_min_cb_size) + 1) * ((height >> log2_min_cb_size) + 1); int ctb_count = sps->ctb_width * sps->ctb_height; @@ -202,8 +201,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao)); s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock)); - s->split_cu_flag = av_malloc(pic_size); - if (!s->sao || !s->deblock || !s->split_cu_flag) + if (!s->sao || !s->deblock) goto fail; s->skip_flag = av_malloc(pic_size_in_ctb); @@ -212,7 +210,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) goto fail; s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height); - s->tab_ipm = av_malloc(min_pu_size); + s->tab_ipm = av_mallocz(min_pu_size); s->is_pcm = av_malloc(min_pu_size); if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm) goto fail; @@ -253,10 +251,10 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) uint8_t luma_weight_l1_flag[16]; uint8_t chroma_weight_l1_flag[16]; - s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb); + s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7); if (s->sps->chroma_format_idc != 0) { int delta = get_se_golomb(gb); - s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7); + s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7); } for (i = 0; i < s->sh.nb_refs[L0]; i++) { @@ -284,7 +282,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) int delta_chroma_weight_l0 = get_se_golomb(gb); int delta_chroma_offset_l0 = get_se_golomb(gb); s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0; - s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j]) + s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j]) >> s->sh.chroma_log2_weight_denom) + 128), -128, 127); } } else { @@ -320,7 +318,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) int delta_chroma_weight_l1 = get_se_golomb(gb); int delta_chroma_offset_l1 = get_se_golomb(gb); s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1; - s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j]) + s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j]) >> s->sh.chroma_log2_weight_denom) + 128), -128, 127); } } else { @@ -338,7 +336,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) const HEVCSPS *sps = s->sps; int max_poc_lsb = 1 << sps->log2_max_poc_lsb; int prev_delta_msb = 0; - int nb_sps = 0, nb_sh; + unsigned int nb_sps = 0, nb_sh; int i; rps->nb_refs = 0; @@ -385,44 +383,84 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) return 0; } +static void export_stream_params(AVCodecContext *avctx, + const HEVCContext *s, const HEVCSPS *sps) +{ + const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data; + unsigned int num = 0, den = 0; + + avctx->pix_fmt = sps->pix_fmt; + avctx->coded_width = sps->width; + avctx->coded_height = sps->height; + avctx->width = sps->output_width; + avctx->height = sps->output_height; + avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics; + avctx->profile = sps->ptl.general_ptl.profile_idc; + avctx->level = sps->ptl.general_ptl.level_idc; + + ff_set_sar(avctx, sps->vui.sar); + + if (sps->vui.video_signal_type_present_flag) + avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG + : AVCOL_RANGE_MPEG; + else + avctx->color_range = AVCOL_RANGE_MPEG; + + if (sps->vui.colour_description_present_flag) { + avctx->color_primaries = sps->vui.colour_primaries; + avctx->color_trc = sps->vui.transfer_characteristic; + avctx->colorspace = sps->vui.matrix_coeffs; + } else { + avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; + avctx->color_trc = AVCOL_TRC_UNSPECIFIED; + avctx->colorspace = AVCOL_SPC_UNSPECIFIED; + } + + if (vps->vps_timing_info_present_flag) { + num = vps->vps_num_units_in_tick; + den = vps->vps_time_scale; + } else if (sps->vui.vui_timing_info_present_flag) { + num = sps->vui.vui_num_units_in_tick; + den = sps->vui.vui_time_scale; + } + + if (num != 0 && den != 0) + av_reduce(&avctx->framerate.den, &avctx->framerate.num, + num, den, 1 << 30); +} + static int set_sps(HEVCContext *s, const HEVCSPS *sps) { + #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL) + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; int ret; + export_stream_params(s->avctx, s, sps); + pic_arrays_free(s); ret = pic_arrays_init(s, sps); if (ret < 0) goto fail; - s->avctx->coded_width = sps->width; - s->avctx->coded_height = sps->height; - s->avctx->width = sps->output_width; - s->avctx->height = sps->output_height; - s->avctx->pix_fmt = sps->pix_fmt; - s->avctx->sample_aspect_ratio = sps->vui.sar; - s->avctx->has_b_frames = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics; + if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) { +#if CONFIG_HEVC_DXVA2_HWACCEL + *fmt++ = AV_PIX_FMT_DXVA2_VLD; +#endif + } - if (sps->vui.video_signal_type_present_flag) - s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG - : AVCOL_RANGE_MPEG; - else - s->avctx->color_range = AVCOL_RANGE_MPEG; + *fmt++ = sps->pix_fmt; + *fmt = AV_PIX_FMT_NONE; - if (sps->vui.colour_description_present_flag) { - s->avctx->color_primaries = sps->vui.colour_primaries; - s->avctx->color_trc = sps->vui.transfer_characteristic; - s->avctx->colorspace = sps->vui.matrix_coeffs; - } else { - s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; - s->avctx->color_trc = AVCOL_TRC_UNSPECIFIED; - s->avctx->colorspace = AVCOL_SPC_UNSPECIFIED; - } + ret = ff_get_format(s->avctx, pix_fmts); + if (ret < 0) + goto fail; + s->avctx->pix_fmt = ret; ff_hevc_pred_init(&s->hpc, sps->bit_depth); ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth); ff_videodsp_init (&s->vdsp, sps->bit_depth); - if (sps->sao_enabled) { + if (sps->sao_enabled && !s->avctx->hwaccel) { av_frame_unref(s->tmp_frame); ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF); if (ret < 0) @@ -431,7 +469,8 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps) } s->sps = sps; - s->vps = s->vps_list[s->sps->vps_id]; + s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data; + return 0; fail: @@ -454,7 +493,7 @@ static int hls_slice_header(HEVCContext *s) if (IS_IDR(s)) ff_hevc_clear_refs(s); } - if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23) + if (IS_IRAP(s)) sh->no_output_of_prior_pics_flag = get_bits1(gb); sh->pps_id = get_ue_golomb_long(gb); @@ -527,6 +566,8 @@ static int hls_slice_header(HEVCContext *s) return AVERROR_INVALIDDATA; } + // when flag is not present, picture is inferred to be output + sh->pic_output_flag = 1; if (s->pps->output_flag_present_flag) sh->pic_output_flag = get_bits1(gb); @@ -534,7 +575,7 @@ static int hls_slice_header(HEVCContext *s) sh->colour_plane_id = get_bits(gb, 2); if (!IS_IDR(s)) { - int short_term_ref_pic_set_sps_flag, poc; + int poc; sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb); poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb); @@ -547,12 +588,14 @@ static int hls_slice_header(HEVCContext *s) } s->poc = poc; - short_term_ref_pic_set_sps_flag = get_bits1(gb); - if (!short_term_ref_pic_set_sps_flag) { + sh->short_term_ref_pic_set_sps_flag = get_bits1(gb); + if (!sh->short_term_ref_pic_set_sps_flag) { + int pos = get_bits_left(gb); ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1); if (ret < 0) return ret; + sh->short_term_ref_pic_set_size = pos - get_bits_left(gb); sh->short_term_rps = &sh->slice_rps; } else { int numbits, rps_idx; @@ -686,6 +729,7 @@ static int hls_slice_header(HEVCContext *s) } sh->slice_qp_delta = get_se_golomb(gb); + if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) { sh->slice_cb_qp_offset = get_se_golomb(gb); sh->slice_cr_qp_offset = get_se_golomb(gb); @@ -742,20 +786,35 @@ static int hls_slice_header(HEVCContext *s) } if (s->pps->slice_header_extension_present_flag) { - int length = get_ue_golomb_long(gb); + unsigned int length = get_ue_golomb_long(gb); for (i = 0; i < length; i++) skip_bits(gb, 8); // slice_header_extension_data_byte } // Inferred parameters - sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta; + sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta; + if (sh->slice_qp > 51 || + sh->slice_qp < -s->sps->qp_bd_offset) { + av_log(s->avctx, AV_LOG_ERROR, + "The slice_qp %d is outside the valid range " + "[%d, 51].\n", + sh->slice_qp, + -s->sps->qp_bd_offset); + return AVERROR_INVALIDDATA; + } + sh->slice_ctb_addr_rs = sh->slice_segment_addr; + if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) { + av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n"); + return AVERROR_INVALIDDATA; + } + s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag; if (!s->pps->cu_qp_delta_enabled_flag) - s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) % - (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset; + s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset, + 52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset; s->slice_initialized = 1; @@ -913,7 +972,7 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0, else offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset; - qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57); + qp_i = av_clip(qp_y + offset, -s->sps->qp_bd_offset, 57); if (qp_i < 30) qp = qp_i; else if (qp_i > 43) @@ -1196,10 +1255,10 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0, } } -static void hls_transform_unit(HEVCContext *s, int x0, int y0, - int xBase, int yBase, int cb_xBase, int cb_yBase, - int log2_cb_size, int log2_trafo_size, - int trafo_depth, int blk_idx) +static int hls_transform_unit(HEVCContext *s, int x0, int y0, + int xBase, int yBase, int cb_xBase, int cb_yBase, + int log2_cb_size, int log2_trafo_size, + int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr) { HEVCLocalContext *lc = &s->HEVClc; @@ -1207,24 +1266,22 @@ static void hls_transform_unit(HEVCContext *s, int x0, int y0, int trafo_size = 1 << log2_trafo_size; ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size); - s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0); + s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0); if (log2_trafo_size > 2) { trafo_size = trafo_size << (s->sps->hshift[1] - 1); ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size); - s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1); - s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2); + s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1); + s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2); } else if (blk_idx == 3) { trafo_size = trafo_size << s->sps->hshift[1]; ff_hevc_set_neighbour_available(s, xBase, yBase, trafo_size, trafo_size); - s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1); - s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2); } } - if (lc->tt.cbf_luma || - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) || - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) { + if (cbf_luma || cbf_cb || cbf_cr) { int scan_idx = SCAN_DIAG; int scan_idx_c = SCAN_DIAG; @@ -1234,6 +1291,18 @@ static void hls_transform_unit(HEVCContext *s, int x0, int y0, if (ff_hevc_cu_qp_delta_sign_flag(s) == 1) lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta; lc->tu.is_cu_qp_delta_coded = 1; + + if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) || + lc->tu.cu_qp_delta > (25 + s->sps->qp_bd_offset / 2)) { + av_log(s->avctx, AV_LOG_ERROR, + "The cu_qp_delta %d is outside the valid range " + "[%d, %d].\n", + lc->tu.cu_qp_delta, + -(26 + s->sps->qp_bd_offset / 2), + (25 + s->sps->qp_bd_offset / 2)); + return AVERROR_INVALIDDATA; + } + ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size); } @@ -1255,20 +1324,21 @@ static void hls_transform_unit(HEVCContext *s, int x0, int y0, } } - if (lc->tt.cbf_luma) + if (cbf_luma) hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0); if (log2_trafo_size > 2) { - if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0)) + if (cbf_cb) hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1); - if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) + if (cbf_cr) hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2); } else if (blk_idx == 3) { - if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase)) + if (cbf_cb) hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1); - if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase)) + if (cbf_cr) hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2); } } + return 0; } static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size) @@ -1286,23 +1356,15 @@ static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_si s->is_pcm[i + j * min_pu_width] = 2; } -static void hls_transform_tree(HEVCContext *s, int x0, int y0, - int xBase, int yBase, int cb_xBase, int cb_yBase, - int log2_cb_size, int log2_trafo_size, - int trafo_depth, int blk_idx) +static int hls_transform_tree(HEVCContext *s, int x0, int y0, + int xBase, int yBase, int cb_xBase, int cb_yBase, + int log2_cb_size, int log2_trafo_size, + int trafo_depth, int blk_idx, + int cbf_cb, int cbf_cr) { HEVCLocalContext *lc = &s->HEVClc; uint8_t split_transform_flag; - - if (trafo_depth > 0 && log2_trafo_size == 2) { - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) = - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase); - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase); - } else { - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) = - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0; - } + int ret; if (lc->cu.intra_split_flag) { if (trafo_depth == 1) @@ -1311,66 +1373,68 @@ static void hls_transform_tree(HEVCContext *s, int x0, int y0, lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0]; } - lc->tt.cbf_luma = 1; - - lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 && - lc->cu.pred_mode == MODE_INTER && - lc->cu.part_mode != PART_2Nx2N && - trafo_depth == 0; - if (log2_trafo_size <= s->sps->log2_max_trafo_size && log2_trafo_size > s->sps->log2_min_tb_size && trafo_depth < lc->cu.max_trafo_depth && !(lc->cu.intra_split_flag && trafo_depth == 0)) { split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size); } else { + int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 && + lc->cu.pred_mode == MODE_INTER && + lc->cu.part_mode != PART_2Nx2N && + trafo_depth == 0; + split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size || (lc->cu.intra_split_flag && trafo_depth == 0) || - lc->tt.inter_split_flag; + inter_split; } - if (log2_trafo_size > 2) { - if (trafo_depth == 0 || - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) { - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) = - ff_hevc_cbf_cb_cr_decode(s, trafo_depth); - } - - if (trafo_depth == 0 || - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) { - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = - ff_hevc_cbf_cb_cr_decode(s, trafo_depth); - } - } + if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb)) + cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); + else if (log2_trafo_size > 2 || trafo_depth == 0) + cbf_cb = 0; + if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr)) + cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); + else if (log2_trafo_size > 2 || trafo_depth == 0) + cbf_cr = 0; if (split_transform_flag) { - int x1 = x0 + ((1 << log2_trafo_size) >> 1); - int y1 = y0 + ((1 << log2_trafo_size) >> 1); - - hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size, - log2_trafo_size - 1, trafo_depth + 1, 0); - hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size, - log2_trafo_size - 1, trafo_depth + 1, 1); - hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size, - log2_trafo_size - 1, trafo_depth + 1, 2); - hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size, - log2_trafo_size - 1, trafo_depth + 1, 3); + const int trafo_size_split = 1 << (log2_trafo_size - 1); + const int x1 = x0 + trafo_size_split; + const int y1 = y0 + trafo_size_split; + +#define SUBDIVIDE(x, y, idx) \ +do { \ + ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \ + log2_trafo_size - 1, trafo_depth + 1, idx, \ + cbf_cb, cbf_cr); \ + if (ret < 0) \ + return ret; \ +} while (0) + + SUBDIVIDE(x0, y0, 0); + SUBDIVIDE(x1, y0, 1); + SUBDIVIDE(x0, y1, 2); + SUBDIVIDE(x1, y1, 3); + +#undef SUBDIVIDE } else { int min_tu_size = 1 << s->sps->log2_min_tb_size; int log2_min_tu_size = s->sps->log2_min_tb_size; int min_tu_width = s->sps->min_tb_width; + int cbf_luma = 1; if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 || - SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) || - SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0)) { - lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth); - } - - hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase, - log2_cb_size, log2_trafo_size, trafo_depth, blk_idx); + cbf_cb || cbf_cr) + cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth); + ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase, + log2_cb_size, log2_trafo_size, + blk_idx, cbf_luma, cbf_cb, cbf_cr); + if (ret < 0) + return ret; // TODO: store cbf_luma somewhere else - if (lc->tt.cbf_luma) { + if (cbf_luma) { int i, j; for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size) for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) { @@ -1380,14 +1444,13 @@ static void hls_transform_tree(HEVCContext *s, int x0, int y0, } } if (!s->sh.disable_deblocking_filter_flag) { - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size); if (s->pps->transquant_bypass_enable_flag && lc->cu.cu_transquant_bypass_flag) set_deblocking_bypass(s, x0, y0, log2_trafo_size); } } + return 0; } static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) @@ -1403,21 +1466,20 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) int stride2 = s->frame->linesize[2]; uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)]; - int length = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth; - const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3); + int length = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma; + const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3); int ret; - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + if (!s->sh.disable_deblocking_filter_flag) + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); ret = init_get_bits(&gb, pcm, length); if (ret < 0) return ret; s->hevcdsp.put_pcm(dst0, stride0, cb_size, &gb, s->sps->pcm.bit_depth); - s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth); - s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth); + s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma); + s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma); return 0; } @@ -1480,15 +1542,19 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride, if (x_off < extra_left || y_off < extra_top || x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] || y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift); + int buf_offset = extra_top * + edge_emu_stride + (extra_left << s->sps->pixel_shift); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset, - srcstride, srcstride, + edge_emu_stride, srcstride, block_w + ff_hevc_qpel_extra[mx], block_h + ff_hevc_qpel_extra[my], x_off - extra_left, y_off - extra_top, pic_width, pic_height); - src = lc->edge_emu_buffer + offset; + src = lc->edge_emu_buffer + buf_offset; + srcstride = edge_emu_stride; } s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w, block_h, lc->mc_buffer); @@ -1531,27 +1597,35 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2, if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER || x_off >= pic_width - block_w - EPEL_EXTRA_AFTER || y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift; int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift)); + int buf_offset1 = EPEL_EXTRA_BEFORE * + (edge_emu_stride + (1 << s->sps->pixel_shift)); int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift)); + int buf_offset2 = EPEL_EXTRA_BEFORE * + (edge_emu_stride + (1 << s->sps->pixel_shift)); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1, - src1stride, src1stride, + edge_emu_stride, src1stride, block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, x_off - EPEL_EXTRA_BEFORE, y_off - EPEL_EXTRA_BEFORE, pic_width, pic_height); - src1 = lc->edge_emu_buffer + offset1; + src1 = lc->edge_emu_buffer + buf_offset1; + src1stride = edge_emu_stride; s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride, block_w, block_h, mx, my, lc->mc_buffer); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2, - src2stride, src2stride, + edge_emu_stride, src2stride, block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, x_off - EPEL_EXTRA_BEFORE, y_off - EPEL_EXTRA_BEFORE, pic_width, pic_height); - src2 = lc->edge_emu_buffer + offset2; + src2 = lc->edge_emu_buffer + buf_offset2; + src2stride = edge_emu_stride; + s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride, block_w, block_h, mx, my, lc->mc_buffer); @@ -1572,6 +1646,50 @@ static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref, ff_thread_await_progress(&ref->tf, y, 0); } +static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW, + int nPbH, int log2_cb_size, int part_idx, + int merge_idx, MvField *mv) +{ + HEVCLocalContext *lc = &s->HEVClc; + enum InterPredIdc inter_pred_idc = PRED_L0; + int mvp_flag; + + ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH); + if (s->sh.slice_type == B_SLICE) + inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH); + + if (inter_pred_idc != PRED_L1) { + if (s->sh.nb_refs[L0]) + mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]); + + mv->pred_flag[0] = 1; + hls_mvd_coding(s, x0, y0, 0); + mvp_flag = ff_hevc_mvp_lx_flag_decode(s); + ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, + part_idx, merge_idx, mv, mvp_flag, 0); + mv->mv[0].x += lc->pu.mvd.x; + mv->mv[0].y += lc->pu.mvd.y; + } + + if (inter_pred_idc != PRED_L0) { + if (s->sh.nb_refs[L1]) + mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]); + + if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) { + AV_ZERO32(&lc->pu.mvd); + } else { + hls_mvd_coding(s, x0, y0, 1); + } + + mv->pred_flag[1] = 1; + mvp_flag = ff_hevc_mvp_lx_flag_decode(s); + ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, + part_idx, merge_idx, mv, mvp_flag, 1); + mv->mv[1].x += lc->pu.mvd.x; + mv->mv[1].y += lc->pu.mvd.y; + } +} + static void hls_prediction_unit(HEVCContext *s, int x0, int y0, int nPbW, int nPbH, int log2_cb_size, int partIdx) @@ -1598,95 +1716,33 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, int min_cb_width = s->sps->min_cb_width; int x_cb = x0 >> log2_min_cb_size; int y_cb = y0 >> log2_min_cb_size; - int ref_idx[2]; - int mvp_flag[2]; int x_pu, y_pu; int i, j; - if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) { + int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb); + + if (!skip_flag) + lc->pu.merge_flag = ff_hevc_merge_flag_decode(s); + + if (skip_flag || lc->pu.merge_flag) { if (s->sh.max_num_merge_cand > 1) merge_idx = ff_hevc_merge_idx_decode(s); else merge_idx = 0; - ff_hevc_luma_mv_merge_mode(s, x0, y0, - 1 << log2_cb_size, - 1 << log2_cb_size, - log2_cb_size, partIdx, - merge_idx, ¤t_mv); - x_pu = x0 >> s->sps->log2_min_pu_size; - y_pu = y0 >> s->sps->log2_min_pu_size; - - for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++) - for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++) - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv; - } else { /* MODE_INTER */ - lc->pu.merge_flag = ff_hevc_merge_flag_decode(s); - if (lc->pu.merge_flag) { - if (s->sh.max_num_merge_cand > 1) - merge_idx = ff_hevc_merge_idx_decode(s); - else - merge_idx = 0; - - ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, - partIdx, merge_idx, ¤t_mv); - x_pu = x0 >> s->sps->log2_min_pu_size; - y_pu = y0 >> s->sps->log2_min_pu_size; - - for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++) - for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++) - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv; - } else { - enum InterPredIdc inter_pred_idc = PRED_L0; - ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH); - if (s->sh.slice_type == B_SLICE) - inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH); - - if (inter_pred_idc != PRED_L1) { - if (s->sh.nb_refs[L0]) { - ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]); - current_mv.ref_idx[0] = ref_idx[0]; - } - current_mv.pred_flag[0] = 1; - hls_mvd_coding(s, x0, y0, 0); - mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s); - ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, - partIdx, merge_idx, ¤t_mv, - mvp_flag[0], 0); - current_mv.mv[0].x += lc->pu.mvd.x; - current_mv.mv[0].y += lc->pu.mvd.y; - } - - if (inter_pred_idc != PRED_L0) { - if (s->sh.nb_refs[L1]) { - ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]); - current_mv.ref_idx[1] = ref_idx[1]; - } - - if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) { - lc->pu.mvd.x = 0; - lc->pu.mvd.y = 0; - } else { - hls_mvd_coding(s, x0, y0, 1); - } - - current_mv.pred_flag[1] = 1; - mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s); - ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, - partIdx, merge_idx, ¤t_mv, - mvp_flag[1], 1); - current_mv.mv[1].x += lc->pu.mvd.x; - current_mv.mv[1].y += lc->pu.mvd.y; - } + ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, + partIdx, merge_idx, ¤t_mv); + } else { + hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, + partIdx, merge_idx, ¤t_mv); + } - x_pu = x0 >> s->sps->log2_min_pu_size; - y_pu = y0 >> s->sps->log2_min_pu_size; + x_pu = x0 >> s->sps->log2_min_pu_size; + y_pu = y0 >> s->sps->log2_min_pu_size; - for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++) - for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++) - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv; - } - } + for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++) + for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++) + tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv; if (current_mv.pred_flag[0]) { ref0 = refPicList[0].ref[current_mv.ref_idx[0]]; @@ -1741,9 +1797,6 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]); DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]); - if (!ref1) - return; - luma_mc(s, tmp, tmpstride, ref1->frame, ¤t_mv.mv[1], x0, y0, nPbW, nPbH); @@ -1780,11 +1833,6 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]); DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]); DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]); - HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]]; - HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]]; - - if (!ref0 || !ref1) - return; luma_mc(s, tmp, tmpstride, ref0->frame, ¤t_mv.mv[0], x0, y0, nPbW, nPbH); @@ -2008,15 +2056,13 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) int min_cb_width = s->sps->min_cb_width; int x_cb = x0 >> log2_min_cb_size; int y_cb = y0 >> log2_min_cb_size; - int x, y; + int x, y, ret; lc->cu.x = x0; lc->cu.y = y0; - lc->cu.rqt_root_cbf = 1; lc->cu.pred_mode = MODE_INTRA; lc->cu.part_mode = PART_2Nx2N; lc->cu.intra_split_flag = 0; - lc->cu.pcm_flag = 0; SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0; for (x = 0; x < 4; x++) @@ -2031,7 +2077,6 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) if (s->sh.slice_type != I_SLICE) { uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb); - lc->cu.pred_mode = MODE_SKIP; x = y_cb * min_cb_width + x_cb; for (y = 0; y < length; y++) { memset(&s->skip_flag[x], skip_flag, length); @@ -2045,10 +2090,10 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); if (!s->sh.disable_deblocking_filter_flag) - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); } else { + int pcm_flag = 0; + if (s->sh.slice_type != I_SLICE) lc->cu.pred_mode = ff_hevc_pred_mode_decode(s); if (lc->cu.pred_mode != MODE_INTRA || @@ -2062,10 +2107,9 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag && log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size && log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) { - lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s); + pcm_flag = ff_hevc_pcm_flag_decode(s); } - if (lc->cu.pcm_flag) { - int ret; + if (pcm_flag) { intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); ret = hls_pcm_sample(s, x0, y0, log2_cb_size); if (s->sps->pcm.loop_filter_disable_flag) @@ -2115,22 +2159,25 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) } } - if (!lc->cu.pcm_flag) { + if (!pcm_flag) { + int rqt_root_cbf = 1; + if (lc->cu.pred_mode != MODE_INTRA && !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) { - lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s); + rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s); } - if (lc->cu.rqt_root_cbf) { + if (rqt_root_cbf) { lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ? s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag : s->sps->max_transform_hierarchy_depth_inter; - hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size, - log2_cb_size, 0, 0); + ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0, + log2_cb_size, + log2_cb_size, 0, 0, 0, 0); + if (ret < 0) + return ret; } else { if (!s->sh.disable_deblocking_filter_flag) - ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size, - lc->slice_or_tiles_up_boundary, - lc->slice_or_tiles_left_boundary); + ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); } } } @@ -2154,16 +2201,15 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0, { HEVCLocalContext *lc = &s->HEVClc; const int cb_size = 1 << log2_cb_size; + int split_cu; lc->ct.depth = cb_depth; if (x0 + cb_size <= s->sps->width && y0 + cb_size <= s->sps->height && log2_cb_size > s->sps->log2_min_cb_size) { - SAMPLE(s->split_cu_flag, x0, y0) = - ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0); + split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0); } else { - SAMPLE(s->split_cu_flag, x0, y0) = - (log2_cb_size > s->sps->log2_min_cb_size); + split_cu = (log2_cb_size > s->sps->log2_min_cb_size); } if (s->pps->cu_qp_delta_enabled_flag && log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) { @@ -2171,7 +2217,7 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0, lc->tu.cu_qp_delta = 0; } - if (SAMPLE(s->split_cu_flag, x0, y0)) { + if (split_cu) { const int cb_size_split = cb_size >> 1; const int x1 = x0 + cb_size_split; const int y1 = y0 + cb_size_split; @@ -2209,9 +2255,6 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts]; int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr; - int tile_left_boundary, tile_up_boundary; - int slice_left_boundary, slice_up_boundary; - s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr; if (s->pps->entropy_coding_sync_enabled_flag) { @@ -2231,25 +2274,25 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height); + lc->boundary_flags = 0; if (s->pps->tiles_enabled_flag) { - tile_left_boundary = x_ctb > 0 && - s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]]; - slice_left_boundary = x_ctb > 0 && - s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1]; - tile_up_boundary = y_ctb > 0 && - s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]]; - slice_up_boundary = y_ctb > 0 && - s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width]; + if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]]) + lc->boundary_flags |= BOUNDARY_LEFT_TILE; + if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1]) + lc->boundary_flags |= BOUNDARY_LEFT_SLICE; + if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]]) + lc->boundary_flags |= BOUNDARY_UPPER_TILE; + if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width]) + lc->boundary_flags |= BOUNDARY_UPPER_SLICE; } else { - tile_left_boundary = - tile_up_boundary = 1; - slice_left_boundary = ctb_addr_in_slice > 0; - slice_up_boundary = ctb_addr_in_slice >= s->sps->ctb_width; - } - lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1); - lc->slice_or_tiles_up_boundary = (!slice_up_boundary + (!tile_up_boundary << 1)); - lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary); - lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary); + if (!ctb_addr_in_slice > 0) + lc->boundary_flags |= BOUNDARY_LEFT_SLICE; + if (ctb_addr_in_slice < s->sps->ctb_width) + lc->boundary_flags |= BOUNDARY_UPPER_SLICE; + } + + lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE)); + lc->ctb_up_flag = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE)); lc->ctb_up_right_flag = ((y_ctb > 0) && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]])); lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]])); } @@ -2380,6 +2423,20 @@ static int set_side_data(HEVCContext *s) stereo->flags = AV_STEREO3D_FLAG_INVERT; } + if (s->sei_display_orientation_present && + (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) { + double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16); + AVFrameSideData *rotation = av_frame_new_side_data(out, + AV_FRAME_DATA_DISPLAYMATRIX, + sizeof(int32_t) * 9); + if (!rotation) + return AVERROR(ENOMEM); + + av_display_rotation_set((int32_t *)rotation->data, angle); + av_display_matrix_flip((int32_t *)rotation->data, + s->sei_hflip, s->sei_vflip); + } + return 0; } @@ -2395,6 +2452,7 @@ static int hevc_frame_start(HEVCContext *s) lc->start_of_tiles_x = 0; s->is_decoded = 0; + s->first_nal_type = s->nal_unit_type; if (s->pps->tiles_enabled_flag) lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size; @@ -2404,19 +2462,14 @@ static int hevc_frame_start(HEVCContext *s) if (ret < 0) goto fail; - av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size, - (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]); - if (!lc->edge_emu_buffer) { - ret = AVERROR(ENOMEM); - goto fail; - } - ret = ff_hevc_frame_rps(s); if (ret < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n"); goto fail; } + s->ref->frame->key_frame = IS_IRAP(s); + ret = set_side_data(s); if (ret < 0) goto fail; @@ -2432,18 +2485,18 @@ static int hevc_frame_start(HEVCContext *s) fail: if (s->ref) - ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); + ff_hevc_unref_frame(s, s->ref, ~0); s->ref = NULL; return ret; } -static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length) +static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal) { HEVCLocalContext *lc = &s->HEVClc; GetBitContext *gb = &lc->gb; int ctb_addr_ts, ret; - ret = init_get_bits8(gb, nal, length); + ret = init_get_bits8(gb, nal->data, nal->size); if (ret < 0) return ret; @@ -2451,9 +2504,7 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length) if (ret < 0) { av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n", s->nal_unit_type); - if (s->avctx->err_recognition & AV_EF_EXPLODE) - return ret; - return 0; + goto fail; } else if (!ret) return 0; @@ -2461,23 +2512,23 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length) case NAL_VPS: ret = ff_hevc_decode_nal_vps(s); if (ret < 0) - return ret; + goto fail; break; case NAL_SPS: ret = ff_hevc_decode_nal_sps(s); if (ret < 0) - return ret; + goto fail; break; case NAL_PPS: ret = ff_hevc_decode_nal_pps(s); if (ret < 0) - return ret; + goto fail; break; case NAL_SEI_PREFIX: case NAL_SEI_SUFFIX: ret = ff_hevc_decode_nal_sei(s); if (ret < 0) - return ret; + goto fail; break; case NAL_TRAIL_R: case NAL_TRAIL_N: @@ -2523,6 +2574,13 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length) return ret; } else if (!s->ref) { av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n"); + goto fail; + } + + if (s->nal_unit_type != s->first_nal_type) { + av_log(s->avctx, AV_LOG_ERROR, + "Non-matching NAL types of the VCL NALUs: %d %d\n", + s->first_nal_type, s->nal_unit_type); return AVERROR_INVALIDDATA; } @@ -2532,22 +2590,35 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length) if (ret < 0) { av_log(s->avctx, AV_LOG_WARNING, "Error constructing the reference lists for the current slice.\n"); - if (s->avctx->err_recognition & AV_EF_EXPLODE) - return ret; + goto fail; } } - ctb_addr_ts = hls_slice_data(s); - if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) { - s->is_decoded = 1; - if ((s->pps->transquant_bypass_enable_flag || - (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) && - s->sps->sao_enabled) - restore_tqb_pixels(s); + if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) { + ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0); + if (ret < 0) + goto fail; } - if (ctb_addr_ts < 0) - return ctb_addr_ts; + if (s->avctx->hwaccel) { + ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size); + if (ret < 0) + goto fail; + } else { + ctb_addr_ts = hls_slice_data(s); + if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) { + s->is_decoded = 1; + if ((s->pps->transquant_bypass_enable_flag || + (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) && + s->sps->sao_enabled) + restore_tqb_pixels(s); + } + + if (ctb_addr_ts < 0) { + ret = ctb_addr_ts; + goto fail; + } + } break; case NAL_EOS_NUT: case NAL_EOB_NUT: @@ -2563,6 +2634,10 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length) } return 0; +fail: + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return ret; + return 0; } /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication @@ -2619,8 +2694,10 @@ static int extract_rbsp(const uint8_t *src, int length, #endif /* HAVE_FAST_UNALIGNED */ if (i >= length - 1) { // no escaped 0 - nal->data = src; - nal->size = length; + nal->data = + nal->raw_data = src; + nal->size = + nal->raw_size = length; return length; } @@ -2659,6 +2736,8 @@ nsc: nal->data = dst; nal->size = di; + nal->raw_data = src; + nal->raw_size = si; return si; } @@ -2739,12 +2818,11 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) /* parse the NAL units */ for (i = 0; i < s->nb_nals; i++) { - int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size); + int ret = decode_nal_unit(s, &s->nals[i]); if (ret < 0) { av_log(s->avctx, AV_LOG_WARNING, "Error parsing NAL unit #%d.\n", i); - if (s->avctx->err_recognition & AV_EF_EXPLODE) - goto fail; + goto fail; } } @@ -2800,8 +2878,8 @@ static int verify_md5(HEVCContext *s, AVFrame *frame) const uint8_t *src = frame->data[i] + j * frame->linesize[i]; #if HAVE_BIGENDIAN if (pixel_shift) { - s->dsp.bswap16_buf((uint16_t*)s->checksum_buf, - (const uint16_t*)src, w); + s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf, + (const uint16_t *) src, w); src = s->checksum_buf; } #endif @@ -2848,13 +2926,19 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, if (ret < 0) return ret; - /* verify the SEI checksum */ - if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded && - s->is_md5) { - ret = verify_md5(s, s->ref->frame); - if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) { - ff_hevc_unref_frame(s, s->ref, ~0); - return ret; + if (avctx->hwaccel) { + if (s->ref && avctx->hwaccel->end_frame(avctx) < 0) + av_log(avctx, AV_LOG_ERROR, + "hardware accelerator failed to decode picture\n"); + } else { + /* verify the SEI checksum */ + if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded && + s->is_md5) { + ret = verify_md5(s, s->ref->frame); + if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) { + ff_hevc_unref_frame(s, s->ref, ~0); + return ret; + } } } s->is_md5 = 0; @@ -2898,6 +2982,13 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src) dst->flags = src->flags; dst->sequence = src->sequence; + if (src->hwaccel_picture_private) { + dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf); + if (!dst->hwaccel_priv_buf) + goto fail; + dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data; + } + return 0; fail: ff_hevc_unref_frame(s, dst, ~0); @@ -2907,12 +2998,10 @@ fail: static av_cold int hevc_decode_free(AVCodecContext *avctx) { HEVCContext *s = avctx->priv_data; - HEVCLocalContext *lc = &s->HEVClc; int i; pic_arrays_free(s); - av_freep(&lc->edge_emu_buffer); av_freep(&s->md5_ctx); av_frame_free(&s->tmp_frame); @@ -2924,7 +3013,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) } for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) - av_freep(&s->vps_list[i]); + av_buffer_unref(&s->vps_list[i]); for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) av_buffer_unref(&s->sps_list[i]); for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) @@ -2966,7 +3055,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) if (!s->md5_ctx) goto fail; - ff_dsputil_init(&s->dsp, avctx); + ff_bswapdsp_init(&s->bdsp); s->context_initialized = 1; @@ -2999,6 +3088,15 @@ static int hevc_update_thread_context(AVCodecContext *dst, } } + for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) { + av_buffer_unref(&s->vps_list[i]); + if (s0->vps_list[i]) { + s->vps_list[i] = av_buffer_ref(s0->vps_list[i]); + if (!s->vps_list[i]) + return AVERROR(ENOMEM); + } + } + for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) { av_buffer_unref(&s->sps_list[i]); if (s0->sps_list[i]) { @@ -3040,7 +3138,7 @@ static int hevc_decode_extradata(HEVCContext *s) { AVCodecContext *avctx = s->avctx; GetByteContext gb; - int ret; + int ret, i; bytestream2_init(&gb, avctx->extradata, avctx->extradata_size); @@ -3097,6 +3195,16 @@ static int hevc_decode_extradata(HEVCContext *s) if (ret < 0) return ret; } + + /* export stream parameters from the first SPS */ + for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) { + if (s->sps_list[i]) { + const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data; + export_stream_params(s->avctx, s, sps); + break; + } + } + return 0; } @@ -3147,6 +3255,14 @@ static void hevc_decode_flush(AVCodecContext *avctx) #define OFFSET(x) offsetof(HEVCContext, x) #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +static const AVProfile profiles[] = { + { FF_PROFILE_HEVC_MAIN, "Main" }, + { FF_PROFILE_HEVC_MAIN_10, "Main 10" }, + { FF_PROFILE_HEVC_MAIN_STILL_PICTURE, "Main Still Picture" }, + { FF_PROFILE_UNKNOWN }, +}; + static const AVOption options[] = { { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR }, @@ -3175,4 +3291,5 @@ AVCodec ff_hevc_decoder = { .init_thread_copy = hevc_init_thread_copy, .capabilities = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_FRAME_THREADS, + .profiles = NULL_IF_CONFIG_SMALL(profiles), };