#include "internal.h"
#define NVENC_CAP 0x30
-#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
- rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \
- rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)
+#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
+ rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
+ rc == NV_ENC_PARAMS_RC_CBR_HQ)
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
AV_PIX_FMT_YUV420P,
return ret;
}
+static void nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
+{
+#if defined(_WIN32) || defined(__CYGWIN__)
+ const char *minver = "378.66";
+#else
+ const char *minver = "378.13";
+#endif
+ av_log(avctx, level, "The minimum required Nvidia driver for nvenc is %s or newer\n", minver);
+}
+
static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
return ret;
ret = nvenc_load_functions(&dl_fn->nvenc_dl);
- if (ret < 0)
+ if (ret < 0) {
+ nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
return ret;
+ }
err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
if (err != NV_ENC_SUCCESS)
"Required: %d.%d Found: %d.%d\n",
NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
+ nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
return AVERROR(ENOSYS);
}
return AVERROR(ENOSYS);
}
+ ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION);
+ if (ctx->weighted_pred > 0 && ret <= 0) {
+ av_log (avctx, AV_LOG_VERBOSE, "Weighted Prediction not supported\n");
+ return AVERROR(ENOSYS);
+ }
+
+ ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_CABAC);
+ if (ctx->coder == NV_ENC_H264_ENTROPY_CODING_MODE_CABAC && ret <= 0) {
+ av_log(avctx, AV_LOG_VERBOSE, "CABAC entropy coding not supported\n");
+ return AVERROR(ENOSYS);
+ }
+
return 0;
}
return 0;
fail3:
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context_internal = NULL;
return;
}
/* fall through */
- case NV_ENC_PARAMS_RC_2_PASS_VBR:
+ case NV_ENC_PARAMS_RC_VBR_HQ:
case NV_ENC_PARAMS_RC_VBR:
set_vbr(avctx);
break;
case NV_ENC_PARAMS_RC_CBR:
- case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
- case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
+ case NV_ENC_PARAMS_RC_CBR_HQ:
+ case NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ:
break;
}
if (ctx->cbr) {
if (ctx->twopass) {
- ctx->rc = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
+ ctx->rc = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ;
} else {
ctx->rc = NV_ENC_PARAMS_RC_CBR;
}
} else if (ctx->cqp >= 0) {
ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
} else if (ctx->twopass) {
- ctx->rc = NV_ENC_PARAMS_RC_2_PASS_VBR;
+ ctx->rc = NV_ENC_PARAMS_RC_VBR_HQ;
} else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
}
}
+ if (ctx->rc >= 0 && ctx->rc & RC_MODE_DEPRECATED) {
+ av_log(avctx, AV_LOG_WARNING, "Specified rc mode is deprecated.\n");
+ av_log(avctx, AV_LOG_WARNING, "\tll_2pass_quality -> cbr_ld_hq\n");
+ av_log(avctx, AV_LOG_WARNING, "\tll_2pass_size -> cbr_hq\n");
+ av_log(avctx, AV_LOG_WARNING, "\tvbr_2pass -> vbr_hq\n");
+ av_log(avctx, AV_LOG_WARNING, "\tvbr_minqp -> (no replacement)\n");
+
+ ctx->rc &= ~RC_MODE_DEPRECATED;
+ }
+
if (ctx->flags & NVENC_LOSSLESS) {
set_lossless(avctx);
} else if (ctx->rc >= 0) {
ctx->encode_config.rcParams.zeroReorderDelay = 1;
if (ctx->quality)
- ctx->encode_config.rcParams.targetQuality = ctx->quality;
+ {
+ //convert from float to fixed point 8.8
+ int tmp_quality = (int)(ctx->quality * 256.0f);
+ ctx->encode_config.rcParams.targetQuality = (uint8_t)(tmp_quality >> 8);
+ ctx->encode_config.rcParams.targetQualityLSB = (uint8_t)(tmp_quality & 0xff);
+ }
}
static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
if (IS_CBR(cc->rcParams.rateControlMode)) {
h264->outputBufferingPeriodSEI = 1;
- h264->outputPictureTimingSEI = 1;
}
- if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_QUALITY ||
- cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP ||
- cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_VBR) {
+ h264->outputPictureTimingSEI = 1;
+
+ if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ||
+ cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CBR_HQ ||
+ cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_VBR_HQ) {
h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
}
h264->level = ctx->level;
+ if (ctx->coder >= 0)
+ h264->entropyCodingMode = ctx->coder;
+
return 0;
}
if (IS_CBR(cc->rcParams.rateControlMode)) {
hevc->outputBufferingPeriodSEI = 1;
- hevc->outputPictureTimingSEI = 1;
}
+ hevc->outputPictureTimingSEI = 1;
+
switch (ctx->profile) {
case NV_ENC_HEVC_PROFILE_MAIN:
cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props;
+ CUresult cu_res;
+ CUcontext dummy;
int res = 0;
int dw, dh;
ctx->init_encode_params.enableEncodeAsync = 0;
ctx->init_encode_params.enablePTD = 1;
+ if (ctx->weighted_pred == 1)
+ ctx->init_encode_params.enableWeightedPrediction = 1;
+
if (ctx->bluray_compat) {
ctx->aud = 1;
avctx->refs = FFMIN(FFMAX(avctx->refs, 0), 6);
if (res)
return res;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (nv_status != NV_ENC_SUCCESS) {
return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
}
}
allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
- allocSurf.width = (avctx->width + 31) & ~31;
- allocSurf.height = (avctx->height + 31) & ~31;
- allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+ allocSurf.width = avctx->width;
+ allocSurf.height = avctx->height;
allocSurf.bufferFmt = ctx->surfaces[idx].format;
nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
ctx->surfaces[idx].height = allocSurf.height;
}
- /* 1MB is large enough to hold most output frames.
- * NVENC increases this automaticaly if it is not enough. */
- allocOut.size = 1024 * 1024;
-
- allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
-
nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
if (nv_status != NV_ENC_SUCCESS) {
int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i, res;
ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
if (!ctx->output_surface_ready_queue)
return AVERROR(ENOMEM);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
for (i = 0; i < ctx->nb_surfaces; i++) {
if ((res = nvenc_alloc_surface(avctx, i)) < 0)
+ {
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
return res;
+ }
+ }
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
}
return 0;
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i;
/* the encoder has to be flushed before it can be closed */
NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
.encodePicFlags = NV_ENC_PIC_FLAG_EOS };
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
p_nvenc->nvEncEncodePicture(ctx->nvencoder, ¶ms);
}
av_freep(&ctx->surfaces);
ctx->nb_surfaces = 0;
- if (ctx->nvencoder)
+ if (ctx->nvencoder) {
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+ }
ctx->nvencoder = NULL;
if (ctx->cu_context_internal)
return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
}
-int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
- const AVFrame *frame, int *got_packet)
+int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
{
NVENCSTATUS nv_status;
CUresult cu_res;
CUcontext dummy;
- NvencSurface *tmpoutsurf, *inSurf;
+ NvencSurface *tmp_out_surf, *in_surf;
int res;
NvencContext *ctx = avctx->priv_data;
NV_ENC_PIC_PARAMS pic_params = { 0 };
pic_params.version = NV_ENC_PIC_PARAMS_VER;
+ if (!ctx->cu_context || !ctx->nvencoder)
+ return AVERROR(EINVAL);
+
+ if (ctx->encoder_flushing)
+ return AVERROR_EOF;
+
if (frame) {
- inSurf = get_free_frame(ctx);
- if (!inSurf) {
- av_log(avctx, AV_LOG_ERROR, "No free surfaces\n");
- return AVERROR_BUG;
- }
+ in_surf = get_free_frame(ctx);
+ if (!in_surf)
+ return AVERROR(EAGAIN);
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
if (cu_res != CUDA_SUCCESS) {
return AVERROR_EXTERNAL;
}
- res = nvenc_upload_frame(avctx, frame, inSurf);
+ res = nvenc_upload_frame(avctx, frame, in_surf);
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
return AVERROR_EXTERNAL;
}
- if (res) {
+ if (res)
return res;
- }
- pic_params.inputBuffer = inSurf->input_surface;
- pic_params.bufferFmt = inSurf->format;
- pic_params.inputWidth = avctx->width;
- pic_params.inputHeight = avctx->height;
- pic_params.inputPitch = inSurf->pitch;
- pic_params.outputBitstream = inSurf->output_surface;
+ pic_params.inputBuffer = in_surf->input_surface;
+ pic_params.bufferFmt = in_surf->format;
+ pic_params.inputWidth = in_surf->width;
+ pic_params.inputHeight = in_surf->height;
+ pic_params.inputPitch = in_surf->pitch;
+ pic_params.outputBitstream = in_surf->output_surface;
if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
if (frame->top_field_first)
nvenc_codec_specific_pic_params(avctx, &pic_params);
} else {
pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+ ctx->encoder_flushing = 1;
}
cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
return nvenc_print_error(avctx, nv_status, "EncodePicture failed!");
if (frame) {
- av_fifo_generic_write(ctx->output_surface_queue, &inSurf, sizeof(inSurf), NULL);
+ av_fifo_generic_write(ctx->output_surface_queue, &in_surf, sizeof(in_surf), NULL);
timestamp_queue_enqueue(ctx->timestamp_list, frame->pts);
if (ctx->initial_pts[0] == AV_NOPTS_VALUE)
/* all the pending buffers are now ready for output */
if (nv_status == NV_ENC_SUCCESS) {
while (av_fifo_size(ctx->output_surface_queue) > 0) {
- av_fifo_generic_read(ctx->output_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
- av_fifo_generic_write(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
+ av_fifo_generic_read(ctx->output_surface_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
+ av_fifo_generic_write(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
}
}
- if (output_ready(avctx, !frame)) {
- av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
+ return 0;
+}
- res = process_output_surface(avctx, pkt, tmpoutsurf);
+int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+ CUresult cu_res;
+ CUcontext dummy;
+ NvencSurface *tmp_out_surf;
+ int res;
+
+ NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+
+ if (!ctx->cu_context || !ctx->nvencoder)
+ return AVERROR(EINVAL);
+
+ if (output_ready(avctx, ctx->encoder_flushing)) {
+ av_fifo_generic_read(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ res = process_output_surface(avctx, pkt, tmp_out_surf);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
if (res)
return res;
- av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
-
- *got_packet = 1;
+ av_fifo_generic_write(ctx->unused_surface_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
+ } else if (ctx->encoder_flushing) {
+ return AVERROR_EOF;
} else {
+ return AVERROR(EAGAIN);
+ }
+
+ return 0;
+}
+
+int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+ const AVFrame *frame, int *got_packet)
+{
+ NvencContext *ctx = avctx->priv_data;
+ int res;
+
+ if (!ctx->encoder_flushing) {
+ res = ff_nvenc_send_frame(avctx, frame);
+ if (res < 0)
+ return res;
+ }
+
+ res = ff_nvenc_receive_packet(avctx, pkt);
+ if (res == AVERROR(EAGAIN) || res == AVERROR_EOF) {
*got_packet = 0;
+ } else if (res < 0) {
+ return res;
+ } else {
+ *got_packet = 1;
}
return 0;