X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvaapi_encode.c;h=2fe8501287108389542962a8b7a191d21fa63a56;hb=506839a3e9cc34c8f719937430008fc12d132fce;hp=e9eeb6eb835688d9f11f1e0e403707871e7416c1;hpb=8479f99c7dd227d9cb94d262602f1298f71cf33b;p=ffmpeg diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c index e9eeb6eb835..2fe85012871 100644 --- a/libavcodec/vaapi_encode.c +++ b/libavcodec/vaapi_encode.c @@ -319,16 +319,60 @@ static int vaapi_encode_issue(AVCodecContext *avctx, } } + if (pic->nb_slices == 0) + pic->nb_slices = ctx->nb_slices; if (pic->nb_slices > 0) { + int rounding; + pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices)); if (!pic->slices) { err = AVERROR(ENOMEM); goto fail; } + + for (i = 0; i < pic->nb_slices; i++) + pic->slices[i].row_size = ctx->slice_size; + + rounding = ctx->slice_block_rows - ctx->nb_slices * ctx->slice_size; + if (rounding > 0) { + // Place rounding error at top and bottom of frame. + av_assert0(rounding < pic->nb_slices); + // Some Intel drivers contain a bug where the encoder will fail + // if the last slice is smaller than the one before it. Since + // that's straightforward to avoid here, just do so. + if (rounding <= 2) { + for (i = 0; i < rounding; i++) + ++pic->slices[i].row_size; + } else { + for (i = 0; i < (rounding + 1) / 2; i++) + ++pic->slices[pic->nb_slices - i - 1].row_size; + for (i = 0; i < rounding / 2; i++) + ++pic->slices[i].row_size; + } + } else if (rounding < 0) { + // Remove rounding error from last slice only. + av_assert0(rounding < ctx->slice_size); + pic->slices[pic->nb_slices - 1].row_size += rounding; + } } for (i = 0; i < pic->nb_slices; i++) { slice = &pic->slices[i]; slice->index = i; + if (i == 0) { + slice->row_start = 0; + slice->block_start = 0; + } else { + const VAAPIEncodeSlice *prev = &pic->slices[i - 1]; + slice->row_start = prev->row_start + prev->row_size; + slice->block_start = prev->block_start + prev->block_size; + } + slice->block_size = slice->row_size * ctx->slice_block_cols; + + av_log(avctx, AV_LOG_DEBUG, "Slice %d: %d-%d (%d rows), " + "%d-%d (%d blocks).\n", i, slice->row_start, + slice->row_start + slice->row_size - 1, slice->row_size, + slice->block_start, slice->block_start + slice->block_size - 1, + slice->block_size); if (ctx->codec->slice_params_size > 0) { slice->codec_slice_params = av_mallocz(ctx->codec->slice_params_size); @@ -671,7 +715,7 @@ static int vaapi_encode_get_next(AVCodecContext *avctx, return AVERROR(ENOMEM); if (ctx->input_order == 0 || ctx->force_idr || - ctx->gop_counter >= avctx->gop_size) { + ctx->gop_counter >= ctx->gop_size) { pic->type = PICTURE_TYPE_IDR; ctx->force_idr = 0; ctx->gop_counter = 1; @@ -694,7 +738,7 @@ static int vaapi_encode_get_next(AVCodecContext *avctx, // encode-after it, but not exceeding the GOP size. for (i = 0; i < ctx->b_per_p && - ctx->gop_counter < avctx->gop_size; i++) { + ctx->gop_counter < ctx->gop_size; i++) { pic = vaapi_encode_alloc(); if (!pic) goto fail; @@ -1206,79 +1250,6 @@ fail: return err; } -static av_cold int vaapi_encode_config_attributes(AVCodecContext *avctx) -{ - VAAPIEncodeContext *ctx = avctx->priv_data; - VAStatus vas; - int i; - - VAConfigAttrib attr[] = { - { VAConfigAttribEncMaxRefFrames }, - { VAConfigAttribEncPackedHeaders }, - }; - - vas = vaGetConfigAttributes(ctx->hwctx->display, - ctx->va_profile, ctx->va_entrypoint, - attr, FF_ARRAY_ELEMS(attr)); - if (vas != VA_STATUS_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Failed to fetch config " - "attributes: %d (%s).\n", vas, vaErrorStr(vas)); - return AVERROR(EINVAL); - } - - for (i = 0; i < FF_ARRAY_ELEMS(attr); i++) { - if (attr[i].value == VA_ATTRIB_NOT_SUPPORTED) { - // Unfortunately we have to treat this as "don't know" and hope - // for the best, because the Intel MJPEG encoder returns this - // for all the interesting attributes. - av_log(avctx, AV_LOG_DEBUG, "Attribute (%d) is not supported.\n", - attr[i].type); - continue; - } - switch (attr[i].type) { - case VAConfigAttribEncMaxRefFrames: - { - unsigned int ref_l0 = attr[i].value & 0xffff; - unsigned int ref_l1 = (attr[i].value >> 16) & 0xffff; - - if (avctx->gop_size > 1 && ref_l0 < 1) { - av_log(avctx, AV_LOG_ERROR, "P frames are not " - "supported (%#x).\n", attr[i].value); - return AVERROR(EINVAL); - } - if (avctx->max_b_frames > 0 && ref_l1 < 1) { - av_log(avctx, AV_LOG_WARNING, "B frames are not " - "supported (%#x) by the underlying driver.\n", - attr[i].value); - avctx->max_b_frames = 0; - } - } - break; - case VAConfigAttribEncPackedHeaders: - if (ctx->va_packed_headers & ~attr[i].value) { - // This isn't fatal, but packed headers are always - // preferable because they are under our control. - // When absent, the driver is generating them and some - // features may not work (e.g. VUI or SEI in H.264). - av_log(avctx, AV_LOG_WARNING, "Warning: some packed " - "headers are not supported (want %#x, got %#x).\n", - ctx->va_packed_headers, attr[i].value); - ctx->va_packed_headers &= attr[i].value; - } - ctx->config_attributes[ctx->nb_config_attributes++] = - (VAConfigAttrib) { - .type = VAConfigAttribEncPackedHeaders, - .value = ctx->va_packed_headers, - }; - break; - default: - av_assert0(0 && "Unexpected config attribute."); - } - } - - return 0; -} - static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx) { VAAPIEncodeContext *ctx = avctx->priv_data; @@ -1306,7 +1277,8 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx) ctx->va_rc_mode = VA_RC_CQP; return 0; } - if (avctx->flags & AV_CODEC_FLAG_QSCALE || + if (ctx->codec->flags & FLAG_CONSTANT_QUALITY_ONLY || + avctx->flags & AV_CODEC_FLAG_QSCALE || avctx->bit_rate <= 0) { if (rc_attr.value & VA_RC_CQP) { av_log(avctx, AV_LOG_VERBOSE, "Using constant-quality mode.\n"); @@ -1469,6 +1441,214 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx) return 0; } +static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx) +{ + VAAPIEncodeContext *ctx = avctx->priv_data; + VAStatus vas; + VAConfigAttrib attr = { VAConfigAttribEncMaxRefFrames }; + uint32_t ref_l0, ref_l1; + + vas = vaGetConfigAttributes(ctx->hwctx->display, + ctx->va_profile, + ctx->va_entrypoint, + &attr, 1); + if (vas != VA_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to query reference frames " + "attribute: %d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + if (attr.value == VA_ATTRIB_NOT_SUPPORTED) { + ref_l0 = ref_l1 = 0; + } else { + ref_l0 = attr.value & 0xffff; + ref_l1 = attr.value >> 16 & 0xffff; + } + + if (avctx->gop_size <= 1) { + av_log(avctx, AV_LOG_VERBOSE, "Using intra frames only.\n"); + ctx->gop_size = 1; + } else if (ref_l0 < 1) { + av_log(avctx, AV_LOG_ERROR, "Driver does not support any " + "reference frames.\n"); + return AVERROR(EINVAL); + } else if (ref_l1 < 1 || avctx->max_b_frames < 1) { + av_log(avctx, AV_LOG_VERBOSE, "Using intra and P-frames " + "(supported references: %d / %d).\n", ref_l0, ref_l1); + ctx->gop_size = avctx->gop_size; + ctx->p_per_i = INT_MAX; + ctx->b_per_p = 0; + } else { + av_log(avctx, AV_LOG_VERBOSE, "Using intra, P- and B-frames " + "(supported references: %d / %d).\n", ref_l0, ref_l1); + ctx->gop_size = avctx->gop_size; + ctx->p_per_i = INT_MAX; + ctx->b_per_p = avctx->max_b_frames; + } + + return 0; +} + +static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx) +{ + VAAPIEncodeContext *ctx = avctx->priv_data; + VAConfigAttrib attr[2] = { { VAConfigAttribEncMaxSlices }, + { VAConfigAttribEncSliceStructure } }; + VAStatus vas; + uint32_t max_slices, slice_structure; + int req_slices; + + if (!(ctx->codec->flags & FLAG_SLICE_CONTROL)) { + if (avctx->slices > 0) { + av_log(avctx, AV_LOG_WARNING, "Multiple slices were requested " + "but this codec does not support controlling slices.\n"); + } + return 0; + } + + ctx->slice_block_rows = (avctx->height + ctx->slice_block_height - 1) / + ctx->slice_block_height; + ctx->slice_block_cols = (avctx->width + ctx->slice_block_width - 1) / + ctx->slice_block_width; + + if (avctx->slices <= 1) { + ctx->nb_slices = 1; + ctx->slice_size = ctx->slice_block_rows; + return 0; + } + + vas = vaGetConfigAttributes(ctx->hwctx->display, + ctx->va_profile, + ctx->va_entrypoint, + attr, FF_ARRAY_ELEMS(attr)); + if (vas != VA_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to query slice " + "attributes: %d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + max_slices = attr[0].value; + slice_structure = attr[1].value; + if (max_slices == VA_ATTRIB_NOT_SUPPORTED || + slice_structure == VA_ATTRIB_NOT_SUPPORTED) { + av_log(avctx, AV_LOG_ERROR, "Driver does not support encoding " + "pictures as multiple slices.\n."); + return AVERROR(EINVAL); + } + + // For fixed-size slices currently we only support whole rows, making + // rectangular slices. This could be extended to arbitrary runs of + // blocks, but since slices tend to be a conformance requirement and + // most cases (such as broadcast or bluray) want rectangular slices + // only it would need to be gated behind another option. + if (avctx->slices > ctx->slice_block_rows) { + av_log(avctx, AV_LOG_WARNING, "Not enough rows to use " + "configured number of slices (%d < %d); using " + "maximum.\n", ctx->slice_block_rows, avctx->slices); + req_slices = ctx->slice_block_rows; + } else { + req_slices = avctx->slices; + } + if (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS || + slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) { + ctx->nb_slices = req_slices; + ctx->slice_size = ctx->slice_block_rows / ctx->nb_slices; + } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) { + int k; + for (k = 1;; k *= 2) { + if (2 * k * (req_slices - 1) + 1 >= ctx->slice_block_rows) + break; + } + ctx->nb_slices = (ctx->slice_block_rows + k - 1) / k; + ctx->slice_size = k; +#if VA_CHECK_VERSION(1, 0, 0) + } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS) { + ctx->nb_slices = ctx->slice_block_rows; + ctx->slice_size = 1; +#endif + } else { + av_log(avctx, AV_LOG_ERROR, "Driver does not support any usable " + "slice structure modes (%#x).\n", slice_structure); + return AVERROR(EINVAL); + } + + if (ctx->nb_slices > avctx->slices) { + av_log(avctx, AV_LOG_WARNING, "Slice count rounded up to " + "%d (from %d) due to driver constraints on slice " + "structure.\n", ctx->nb_slices, avctx->slices); + } + if (ctx->nb_slices > max_slices) { + av_log(avctx, AV_LOG_ERROR, "Driver does not support " + "encoding with %d slices (max %"PRIu32").\n", + ctx->nb_slices, max_slices); + return AVERROR(EINVAL); + } + + av_log(avctx, AV_LOG_VERBOSE, "Encoding pictures with %d slices " + "(default size %d block rows).\n", + ctx->nb_slices, ctx->slice_size); + return 0; +} + +static av_cold int vaapi_encode_init_packed_headers(AVCodecContext *avctx) +{ + VAAPIEncodeContext *ctx = avctx->priv_data; + VAStatus vas; + VAConfigAttrib attr = { VAConfigAttribEncPackedHeaders }; + + vas = vaGetConfigAttributes(ctx->hwctx->display, + ctx->va_profile, + ctx->va_entrypoint, + &attr, 1); + if (vas != VA_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to query packed headers " + "attribute: %d (%s).\n", vas, vaErrorStr(vas)); + return AVERROR_EXTERNAL; + } + + if (attr.value == VA_ATTRIB_NOT_SUPPORTED) { + if (ctx->desired_packed_headers) { + av_log(avctx, AV_LOG_WARNING, "Driver does not support any " + "packed headers (wanted %#x).\n", + ctx->desired_packed_headers); + } else { + av_log(avctx, AV_LOG_VERBOSE, "Driver does not support any " + "packed headers (none wanted).\n"); + } + ctx->va_packed_headers = 0; + } else { + if (ctx->desired_packed_headers & ~attr.value) { + av_log(avctx, AV_LOG_WARNING, "Driver does not support some " + "wanted packed headers (wanted %#x, found %#x).\n", + ctx->desired_packed_headers, attr.value); + } else { + av_log(avctx, AV_LOG_VERBOSE, "All wanted packed headers " + "available (wanted %#x, found %#x).\n", + ctx->desired_packed_headers, attr.value); + } + ctx->va_packed_headers = ctx->desired_packed_headers & attr.value; + } + + if (ctx->va_packed_headers) { + ctx->config_attributes[ctx->nb_config_attributes++] = + (VAConfigAttrib) { + .type = VAConfigAttribEncPackedHeaders, + .value = ctx->va_packed_headers, + }; + } + + if ( (ctx->desired_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE) && + !(ctx->va_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE) && + (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) { + av_log(avctx, AV_LOG_WARNING, "Driver does not support packed " + "sequence headers, but a global header is requested.\n"); + av_log(avctx, AV_LOG_WARNING, "No global header will be written: " + "this may result in a stream which is not usable for some " + "purposes (e.g. not muxable to some containers).\n"); + } + + return 0; +} + static av_cold int vaapi_encode_init_quality(AVCodecContext *avctx) { #if VA_CHECK_VERSION(0, 36, 0) @@ -1640,7 +1820,7 @@ static av_cold int vaapi_encode_create_recon_frames(AVCodecContext *avctx) ctx->recon_frames->height = ctx->surface_height; // At most three IDR/I/P frames and two runs of B frames can be in // flight at any one time. - ctx->recon_frames->initial_pool_size = 3 + 2 * avctx->max_b_frames; + ctx->recon_frames->initial_pool_size = 3 + 2 * ctx->b_per_p; err = av_hwframe_ctx_init(ctx->recon_frames_ref); if (err < 0) { @@ -1695,7 +1875,15 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx) if (err < 0) goto fail; - err = vaapi_encode_config_attributes(avctx); + err = vaapi_encode_init_gop_structure(avctx); + if (err < 0) + goto fail; + + err = vaapi_encode_init_slice_structure(avctx); + if (err < 0) + goto fail; + + err = vaapi_encode_init_packed_headers(avctx); if (err < 0) goto fail; @@ -1749,14 +1937,10 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx) } ctx->input_order = 0; - ctx->output_delay = avctx->max_b_frames; + ctx->output_delay = ctx->b_per_p; ctx->decode_delay = 1; ctx->output_order = - ctx->output_delay - 1; - // Currently we never generate I frames, only IDR. - ctx->p_per_i = INT_MAX; - ctx->b_per_p = avctx->max_b_frames; - if (ctx->codec->sequence_params_size > 0) { ctx->codec_sequence_params = av_mallocz(ctx->codec->sequence_params_size); @@ -1788,7 +1972,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx) ctx->issue_mode = ISSUE_MODE_MAXIMISE_THROUGHPUT; if (ctx->va_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE && - ctx->codec->write_sequence_header) { + ctx->codec->write_sequence_header && + avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { char data[MAX_PARAM_BUFFER_SIZE]; size_t bit_len = 8 * sizeof(data);