From f052ef30ef6534c811b9f349e8f17e32afbc9148 Mon Sep 17 00:00:00 2001 From: Andrey Turkin Date: Sun, 29 May 2016 13:07:34 +0300 Subject: [PATCH] avcodec/nvenc: allow configuring number of surfaces Signed-off-by: Timo Rothenpieler --- libavcodec/nvenc.c | 34 +++++++++++++++------------------- libavcodec/nvenc.h | 4 ++-- libavcodec/nvenc_h264.c | 3 ++- libavcodec/nvenc_hevc.c | 3 ++- 4 files changed, 21 insertions(+), 23 deletions(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 600054f2fb5..76a54a1a6f0 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -784,7 +784,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) NV_ENC_PRESET_CONFIG preset_config = { 0 }; NVENCSTATUS nv_status = NV_ENC_SUCCESS; AVCPBProperties *cpb_props; - int num_mbs; int res = 0; int dw, dh; @@ -842,12 +841,6 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx) ctx->init_encode_params.frameRateNum = avctx->time_base.den; ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame; - num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4); - ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48; - - if (ctx->buffer_delay >= ctx->max_surface_count) - ctx->buffer_delay = ctx->max_surface_count - 1; - ctx->init_encode_params.enableEncodeAsync = 0; ctx->init_encode_params.enablePTD = 1; @@ -976,24 +969,27 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) { NvencContext *ctx = avctx->priv_data; int i, res; + int num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4); + ctx->nb_surfaces = FFMAX((num_mbs >= 8160) ? 32 : 48, + ctx->nb_surfaces); + ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1); - ctx->surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->surfaces)); - if (!ctx->surfaces) { + ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces)); + if (!ctx->surfaces) return AVERROR(ENOMEM); - } - ctx->timestamp_list = av_fifo_alloc(ctx->max_surface_count * sizeof(int64_t)); + ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t)); if (!ctx->timestamp_list) return AVERROR(ENOMEM); - ctx->output_surface_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*)); + ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); if (!ctx->output_surface_queue) return AVERROR(ENOMEM); - ctx->output_surface_ready_queue = av_fifo_alloc(ctx->max_surface_count * sizeof(NvencSurface*)); + ctx->output_surface_ready_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); if (!ctx->output_surface_ready_queue) return AVERROR(ENOMEM); - for (i = 0; i < ctx->max_surface_count; i++) { + for (i = 0; i < ctx->nb_surfaces; i++) { if ((res = nvenc_alloc_surface(avctx, i)) < 0) return res; } @@ -1054,7 +1050,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) av_fifo_freep(&ctx->output_surface_queue); if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) { - for (i = 0; i < ctx->max_surface_count; ++i) { + for (i = 0; i < ctx->nb_surfaces; ++i) { if (ctx->surfaces[i].input_surface) { p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource); } @@ -1067,7 +1063,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) } if (ctx->surfaces) { - for (i = 0; i < ctx->max_surface_count; ++i) { + for (i = 0; i < ctx->nb_surfaces; ++i) { if (avctx->pix_fmt != AV_PIX_FMT_CUDA) p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface); av_frame_free(&ctx->surfaces[i].in_ref); @@ -1075,7 +1071,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) } } av_freep(&ctx->surfaces); - ctx->max_surface_count = 0; + ctx->nb_surfaces = 0; if (ctx->nvencoder) p_nvenc->nvEncDestroyEncoder(ctx->nvencoder); @@ -1140,7 +1136,7 @@ static NvencSurface *get_free_frame(NvencContext *ctx) { int i; - for (i = 0; i < ctx->max_surface_count; ++i) { + for (i = 0; i < ctx->nb_surfaces; ++i) { if (!ctx->surfaces[i].lockCount) { ctx->surfaces[i].lockCount = 1; return &ctx->surfaces[i]; @@ -1470,7 +1466,7 @@ static int output_ready(NvencContext *ctx, int flush) nb_ready = av_fifo_size(ctx->output_surface_ready_queue) / sizeof(NvencSurface*); nb_pending = av_fifo_size(ctx->output_surface_queue) / sizeof(NvencSurface*); - return nb_ready > 0 && (flush || nb_ready + nb_pending >= ctx->buffer_delay); + return nb_ready > 0 && (flush || nb_ready + nb_pending >= ctx->async_depth); } int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index fd44ebc9afd..2c1d8d49d5a 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -144,7 +144,7 @@ typedef struct NvencContext CUcontext cu_context; CUcontext cu_context_internal; - int max_surface_count; + int nb_surfaces; NvencSurface *surfaces; AVFifoBuffer *output_surface_queue; @@ -175,7 +175,7 @@ typedef struct NvencContext int twopass; int gpu; int flags; - int buffer_delay; + int async_depth; } NvencContext; int ff_nvenc_encode_init(AVCodecContext *avctx); diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c index 19709c6ae6f..2fab3bfae55 100644 --- a/libavcodec/nvenc_h264.c +++ b/libavcodec/nvenc_h264.c @@ -76,10 +76,11 @@ static const AVOption options[] = { { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, + { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE }, { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, - { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, + { "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { NULL } }; diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c index 659e1c94559..a1c88dbaa24 100644 --- a/libavcodec/nvenc_hevc.c +++ b/libavcodec/nvenc_hevc.c @@ -73,10 +73,11 @@ static const AVOption options[] = { { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" }, { "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" }, { "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" }, + { "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE }, { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE }, { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, - { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, + { "delay", "Delay frame output by the given amount of frames", OFFSET(async_depth), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE }, { NULL } }; -- 2.39.5