#include "libavutil/hwcontext_cuda.h"
#include "libavutil/hwcontext.h"
+#include "libavutil/cuda_check.h"
#include "libavutil/imgutils.h"
#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/pixdesc.h"
#include "internal.h"
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
+
#define NVENC_CAP 0x30
#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
return AVERROR_UNKNOWN;
}
-static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
+static int nvenc_print_error(AVCodecContext *avctx, NVENCSTATUS err,
const char *error_string)
{
const char *desc;
- int ret;
- ret = nvenc_map_error(err, &desc);
- av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err);
+ const char *details = "(no details)";
+ int ret = nvenc_map_error(err, &desc);
+
+#ifdef NVENC_HAVE_GETLASTERRORSTRING
+ NvencContext *ctx = avctx->priv_data;
+ NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
+
+ if (p_nvenc && ctx->nvencoder)
+ details = p_nvenc->nvEncGetLastErrorString(ctx->nvencoder);
+#endif
+
+ av_log(avctx, AV_LOG_ERROR, "%s: %s (%d): %s\n", error_string, desc, err, details);
+
return ret;
}
static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
{
-#if NVENCAPI_CHECK_VERSION(8, 1)
+#if NVENCAPI_CHECK_VERSION(9, 2)
+ const char *minver = "(unknown)";
+#elif NVENCAPI_CHECK_VERSION(9, 1)
+# if defined(_WIN32) || defined(__CYGWIN__)
+ const char *minver = "436.15";
+# else
+ const char *minver = "435.21";
+# endif
+#elif NVENCAPI_CHECK_VERSION(9, 0)
+# if defined(_WIN32) || defined(__CYGWIN__)
+ const char *minver = "418.81";
+# else
+ const char *minver = "418.30";
+# endif
+#elif NVENCAPI_CHECK_VERSION(8, 2)
+# if defined(_WIN32) || defined(__CYGWIN__)
+ const char *minver = "397.93";
+# else
+ const char *minver = "396.24";
+#endif
+#elif NVENCAPI_CHECK_VERSION(8, 1)
# if defined(_WIN32) || defined(__CYGWIN__)
const char *minver = "390.77";
# else
{
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
- CUresult cu_res;
if (ctx->d3d11_device)
return 0;
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
-
- return 0;
+ return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
}
static int nvenc_pop_context(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
- CUresult cu_res;
CUcontext dummy;
if (ctx->d3d11_device)
return 0;
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
-
- return 0;
+ return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
}
static av_cold int nvenc_open_session(AVCodecContext *avctx)
}
#endif
+#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
+ ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES);
+ if(avctx->refs != NV_ENC_NUM_REF_FRAMES_AUTOSELECT && ret <= 0) {
+ av_log(avctx, AV_LOG_VERBOSE, "Multiple reference frames are not supported\n");
+ return AVERROR(ENOSYS);
+ }
+#else
+ if(avctx->refs != 0) {
+ av_log(avctx, AV_LOG_VERBOSE, "Multiple reference frames need SDK 9.1 at build time\n");
+ return AVERROR(ENOSYS);
+ }
+#endif
+
ctx->support_dyn_bitrate = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
return 0;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
char name[128] = { 0};
int major, minor, ret;
- CUresult cu_res;
CUdevice cu_device;
int loglevel = AV_LOG_VERBOSE;
if (ctx->device == LIST_DEVICES)
loglevel = AV_LOG_INFO;
- cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR,
- "Cannot access the CUDA device %d\n",
- idx);
- return -1;
- }
+ ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
+ if (ret < 0)
+ return ret;
- cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx);
- return -1;
- }
+ ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
+ if (ret < 0)
+ return ret;
- cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx);
- return -1;
- }
+ ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
+ if (ret < 0)
+ return ret;
av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
if (((major << 4) | minor) < NVENC_CAP) {
if (ctx->device != idx && ctx->device != ANY_DEVICE)
return -1;
- cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
+ ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
+ if (ret < 0)
goto fail;
- }
ctx->cu_context = ctx->cu_context_internal;
+ ctx->cu_stream = NULL;
if ((ret = nvenc_pop_context(avctx)) < 0)
goto fail2;
return ret;
fail2:
- dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+ CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
ctx->cu_context_internal = NULL;
fail:
if (cuda_device_hwctx) {
ctx->cu_context = cuda_device_hwctx->cuda_ctx;
+ ctx->cu_stream = cuda_device_hwctx->stream;
}
#if CONFIG_D3D11VA
else if (d3d11_device_hwctx) {
} else {
int i, nb_devices = 0;
- if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR,
- "Cannot init CUDA\n");
+ if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
return AVERROR_UNKNOWN;
- }
- if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR,
- "Cannot enumerate the CUDA devices\n");
+ if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
return AVERROR_UNKNOWN;
- }
if (!nb_devices) {
av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
h264->outputAUD = ctx->aud;
- if (avctx->refs >= 0) {
+ if (ctx->dpb_size >= 0) {
/* 0 means "let the hardware decide" */
- h264->maxNumRefFrames = avctx->refs;
+ h264->maxNumRefFrames = ctx->dpb_size;
}
if (avctx->gop_size >= 0) {
h264->idrPeriod = cc->gopLength;
h264->useBFramesAsRef = ctx->b_ref_mode;
#endif
+#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
+ h264->numRefL0 = avctx->refs;
+ h264->numRefL1 = avctx->refs;
+#endif
+
return 0;
}
hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
hevc->outputAUD = ctx->aud;
- if (avctx->refs >= 0) {
+ if (ctx->dpb_size >= 0) {
/* 0 means "let the hardware decide" */
- hevc->maxNumRefFramesInDPB = avctx->refs;
+ hevc->maxNumRefFramesInDPB = ctx->dpb_size;
}
if (avctx->gop_size >= 0) {
hevc->idrPeriod = cc->gopLength;
hevc->tier = ctx->tier;
+#ifdef NVENC_HAVE_HEVC_BFRAME_REF_MODE
+ hevc->useBFramesAsRef = ctx->b_ref_mode;
+#endif
+
+#ifdef NVENC_HAVE_MULTIPLE_REF_FRAMES
+ hevc->numRefL0 = avctx->refs;
+ hevc->numRefL1 = avctx->refs;
+#endif
+
return 0;
}
if (ctx->bluray_compat) {
ctx->aud = 1;
- avctx->refs = FFMIN(FFMAX(avctx->refs, 0), 6);
+ ctx->dpb_size = FFMIN(FFMAX(avctx->refs, 0), 6);
avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3);
switch (avctx->codec->id) {
case AV_CODEC_ID_H264:
return res;
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
+ if (nv_status != NV_ENC_SUCCESS) {
+ nvenc_pop_context(avctx);
+ return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
+ }
+
+#ifdef NVENC_HAVE_CUSTREAM_PTR
+ if (ctx->cu_context) {
+ nv_status = p_nvenc->nvEncSetIOCudaStreams(ctx->nvencoder, &ctx->cu_stream, &ctx->cu_stream);
+ if (nv_status != NV_ENC_SUCCESS) {
+ nvenc_pop_context(avctx);
+ return nvenc_print_error(avctx, nv_status, "SetIOCudaStreams failed");
+ }
+ }
+#endif
res = nvenc_pop_context(avctx);
if (res < 0)
return res;
- if (nv_status != NV_ENC_SUCCESS) {
- return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
- }
-
if (ctx->encode_config.frameIntervalP > 1)
avctx->has_b_frames = 2;
ctx->nvencoder = NULL;
if (ctx->cu_context_internal)
- dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+ CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
ctx->cu_context = ctx->cu_context_internal = NULL;
#if CONFIG_D3D11VA
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
NVENCSTATUS nv_status;
- int i;
+ int i, first_round;
if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
- for (i = 0; i < ctx->nb_registered_frames; i++) {
- if (!ctx->registered_frames[i].mapped) {
- if (ctx->registered_frames[i].regptr) {
- nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
- if (nv_status != NV_ENC_SUCCESS)
- return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource");
- ctx->registered_frames[i].ptr = NULL;
- ctx->registered_frames[i].regptr = NULL;
+ for (first_round = 1; first_round >= 0; first_round--) {
+ for (i = 0; i < ctx->nb_registered_frames; i++) {
+ if (!ctx->registered_frames[i].mapped) {
+ if (ctx->registered_frames[i].regptr) {
+ if (first_round)
+ continue;
+ nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[i].regptr);
+ if (nv_status != NV_ENC_SUCCESS)
+ return nvenc_print_error(avctx, nv_status, "Failed unregistering unused input resource");
+ ctx->registered_frames[i].ptr = NULL;
+ ctx->registered_frames[i].regptr = NULL;
+ }
+ return i;
}
- return i;
}
}
} else {
res = nvenc_print_error(avctx, nv_status, "Failed unmapping input resource");
goto error;
}
- nv_status = p_nvenc->nvEncUnregisterResource(ctx->nvencoder, ctx->registered_frames[tmpoutsurf->reg_idx].regptr);
- if (nv_status != NV_ENC_SUCCESS) {
- res = nvenc_print_error(avctx, nv_status, "Failed unregistering input resource");
- goto error;
- }
- ctx->registered_frames[tmpoutsurf->reg_idx].ptr = NULL;
- ctx->registered_frames[tmpoutsurf->reg_idx].regptr = NULL;
} else if (ctx->registered_frames[tmpoutsurf->reg_idx].mapped < 0) {
res = AVERROR_BUG;
goto error;