3 * Copyright (C) 2015 Luca Barbato
4 * Copyright (C) 2015 Philip Langdale <philipl@overt.org>
5 * Copyright (C) 2014 Timo Rothenpieler <timo@rothenpieler.org>
7 * This file is part of Libav.
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include <nvEncodeAPI.h>
29 #define CUDA_LIBNAME "libcuda.so"
34 #define NVENC_LIBNAME "libnvidia-encode.so"
40 #define NVENC_LIBNAME "nvEncodeAPI64.dll"
42 #define NVENC_LIBNAME "nvEncodeAPI.dll"
45 #define dlopen(filename, flags) LoadLibrary((filename))
46 #define dlsym(handle, symbol) GetProcAddress(handle, symbol)
47 #define dlclose(handle) FreeLibrary(handle)
50 #include "libavutil/common.h"
51 #include "libavutil/hwcontext.h"
52 #include "libavutil/imgutils.h"
53 #include "libavutil/mem.h"
59 #include "libavutil/hwcontext_cuda.h"
62 #define NVENC_CAP 0x30
63 #define BITSTREAM_BUFFER_SIZE 1024 * 1024
64 #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
65 rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \
66 rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)
68 #define LOAD_LIBRARY(l, path) \
70 if (!((l) = dlopen(path, RTLD_LAZY))) { \
71 av_log(avctx, AV_LOG_ERROR, \
74 return AVERROR_UNKNOWN; \
78 #define LOAD_SYMBOL(fun, lib, symbol) \
80 if (!((fun) = dlsym(lib, symbol))) { \
81 av_log(avctx, AV_LOG_ERROR, \
84 return AVERROR_UNKNOWN; \
88 const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
92 #if NVENCAPI_MAJOR_VERSION >= 7
102 #define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010 || \
103 pix_fmt == AV_PIX_FMT_YUV444P16)
105 #define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
106 pix_fmt == AV_PIX_FMT_YUV444P16)
108 static const struct {
113 { NV_ENC_SUCCESS, 0, "success" },
114 { NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" },
115 { NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" },
116 { NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" },
117 { NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" },
118 { NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" },
119 { NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" },
120 { NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" },
121 { NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" },
122 { NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" },
123 { NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" },
124 { NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" },
125 { NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" },
126 { NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" },
127 { NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR(ENOBUFS), "not enough buffer" },
128 { NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" },
129 { NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" },
130 { NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" },
131 { NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" },
132 { NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" },
133 { NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" },
134 { NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" },
135 { NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" },
136 { NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" },
137 { NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" },
138 { NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" },
141 static int nvenc_map_error(NVENCSTATUS err, const char **desc)
144 for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
145 if (nvenc_errors[i].nverr == err) {
147 *desc = nvenc_errors[i].desc;
148 return nvenc_errors[i].averr;
152 *desc = "unknown error";
153 return AVERROR_UNKNOWN;
156 static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
157 const char *error_string)
161 ret = nvenc_map_error(err, &desc);
162 av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err);
166 static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
168 NVENCContext *ctx = avctx->priv_data;
169 NVENCLibraryContext *nvel = &ctx->nvel;
170 PNVENCODEAPICREATEINSTANCE nvenc_create_instance;
174 nvel->cu_init = cuInit;
175 nvel->cu_device_get_count = cuDeviceGetCount;
176 nvel->cu_device_get = cuDeviceGet;
177 nvel->cu_device_get_name = cuDeviceGetName;
178 nvel->cu_device_compute_capability = cuDeviceComputeCapability;
179 nvel->cu_ctx_create = cuCtxCreate_v2;
180 nvel->cu_ctx_pop_current = cuCtxPopCurrent_v2;
181 nvel->cu_ctx_destroy = cuCtxDestroy_v2;
183 LOAD_LIBRARY(nvel->cuda, CUDA_LIBNAME);
185 LOAD_SYMBOL(nvel->cu_init, nvel->cuda, "cuInit");
186 LOAD_SYMBOL(nvel->cu_device_get_count, nvel->cuda, "cuDeviceGetCount");
187 LOAD_SYMBOL(nvel->cu_device_get, nvel->cuda, "cuDeviceGet");
188 LOAD_SYMBOL(nvel->cu_device_get_name, nvel->cuda, "cuDeviceGetName");
189 LOAD_SYMBOL(nvel->cu_device_compute_capability, nvel->cuda,
190 "cuDeviceComputeCapability");
191 LOAD_SYMBOL(nvel->cu_ctx_create, nvel->cuda, "cuCtxCreate_v2");
192 LOAD_SYMBOL(nvel->cu_ctx_pop_current, nvel->cuda, "cuCtxPopCurrent_v2");
193 LOAD_SYMBOL(nvel->cu_ctx_destroy, nvel->cuda, "cuCtxDestroy_v2");
196 LOAD_LIBRARY(nvel->nvenc, NVENC_LIBNAME);
198 LOAD_SYMBOL(nvenc_create_instance, nvel->nvenc,
199 "NvEncodeAPICreateInstance");
201 nvel->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
203 err = nvenc_create_instance(&nvel->nvenc_funcs);
204 if (err != NV_ENC_SUCCESS)
205 return nvenc_print_error(avctx, err, "Cannot create the NVENC instance");
210 static int nvenc_open_session(AVCodecContext *avctx)
212 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
213 NVENCContext *ctx = avctx->priv_data;
214 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
217 params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
218 params.apiVersion = NVENCAPI_VERSION;
219 params.device = ctx->cu_context;
220 params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
222 ret = nv->nvEncOpenEncodeSessionEx(¶ms, &ctx->nvenc_ctx);
223 if (ret != NV_ENC_SUCCESS) {
224 ctx->nvenc_ctx = NULL;
225 return nvenc_print_error(avctx, ret, "Cannot open the NVENC Session");
231 static int nvenc_check_codec_support(AVCodecContext *avctx)
233 NVENCContext *ctx = avctx->priv_data;
234 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
235 int i, ret, count = 0;
238 ret = nv->nvEncGetEncodeGUIDCount(ctx->nvenc_ctx, &count);
240 if (ret != NV_ENC_SUCCESS || !count)
241 return AVERROR(ENOSYS);
243 guids = av_malloc(count * sizeof(GUID));
245 return AVERROR(ENOMEM);
247 ret = nv->nvEncGetEncodeGUIDs(ctx->nvenc_ctx, guids, count, &count);
248 if (ret != NV_ENC_SUCCESS) {
249 ret = AVERROR(ENOSYS);
253 ret = AVERROR(ENOSYS);
254 for (i = 0; i < count; i++) {
255 if (!memcmp(&guids[i], &ctx->params.encodeGUID, sizeof(*guids))) {
267 static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
269 NVENCContext *ctx = avctx->priv_data;
270 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
271 NV_ENC_CAPS_PARAM params = { 0 };
274 params.version = NV_ENC_CAPS_PARAM_VER;
275 params.capsToQuery = cap;
277 ret = nv->nvEncGetEncodeCaps(ctx->nvenc_ctx, ctx->params.encodeGUID, ¶ms, &val);
279 if (ret == NV_ENC_SUCCESS)
284 static int nvenc_check_capabilities(AVCodecContext *avctx)
286 NVENCContext *ctx = avctx->priv_data;
289 ret = nvenc_check_codec_support(avctx);
291 av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n");
295 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
296 if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P && ret <= 0) {
297 av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n");
298 return AVERROR(ENOSYS);
301 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
302 if (ret < avctx->width) {
303 av_log(avctx, AV_LOG_VERBOSE, "Width %d exceeds %d\n",
305 return AVERROR(ENOSYS);
308 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
309 if (ret < avctx->height) {
310 av_log(avctx, AV_LOG_VERBOSE, "Height %d exceeds %d\n",
312 return AVERROR(ENOSYS);
315 ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
316 if (ret < avctx->max_b_frames) {
317 av_log(avctx, AV_LOG_VERBOSE, "Max B-frames %d exceed %d\n",
318 avctx->max_b_frames, ret);
320 return AVERROR(ENOSYS);
326 static int nvenc_check_device(AVCodecContext *avctx, int idx)
328 NVENCContext *ctx = avctx->priv_data;
329 NVENCLibraryContext *nvel = &ctx->nvel;
330 char name[128] = { 0 };
331 int major, minor, ret;
334 int loglevel = AV_LOG_VERBOSE;
336 if (ctx->device == LIST_DEVICES)
337 loglevel = AV_LOG_INFO;
339 ret = nvel->cu_device_get(&cu_device, idx);
340 if (ret != CUDA_SUCCESS) {
341 av_log(avctx, AV_LOG_ERROR,
342 "Cannot access the CUDA device %d\n",
347 ret = nvel->cu_device_get_name(name, sizeof(name), cu_device);
348 if (ret != CUDA_SUCCESS)
351 ret = nvel->cu_device_compute_capability(&major, &minor, cu_device);
352 if (ret != CUDA_SUCCESS)
355 av_log(avctx, loglevel, "Device %d [%s] ", cu_device, name);
357 if (((major << 4) | minor) < NVENC_CAP)
360 ret = nvel->cu_ctx_create(&ctx->cu_context_internal, 0, cu_device);
361 if (ret != CUDA_SUCCESS)
364 ctx->cu_context = ctx->cu_context_internal;
366 ret = nvel->cu_ctx_pop_current(&dummy);
367 if (ret != CUDA_SUCCESS)
370 if ((ret = nvenc_open_session(avctx)) < 0)
373 if ((ret = nvenc_check_capabilities(avctx)) < 0)
376 av_log(avctx, loglevel, "supports NVENC\n");
378 if (ctx->device == cu_device || ctx->device == ANY_DEVICE)
382 nvel->nvenc_funcs.nvEncDestroyEncoder(ctx->nvenc_ctx);
383 ctx->nvenc_ctx = NULL;
386 nvel->cu_ctx_destroy(ctx->cu_context_internal);
387 ctx->cu_context_internal = NULL;
391 av_log(avctx, loglevel, "does not support NVENC (major %d minor %d)\n",
394 return AVERROR(ENOSYS);
397 static int nvenc_setup_device(AVCodecContext *avctx)
399 NVENCContext *ctx = avctx->priv_data;
400 NVENCLibraryContext *nvel = &ctx->nvel;
402 switch (avctx->codec->id) {
403 case AV_CODEC_ID_H264:
404 ctx->params.encodeGUID = NV_ENC_CODEC_H264_GUID;
406 case AV_CODEC_ID_HEVC:
407 ctx->params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
413 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
415 AVHWFramesContext *frames_ctx;
416 AVCUDADeviceContext *device_hwctx;
419 if (!avctx->hw_frames_ctx)
420 return AVERROR(EINVAL);
422 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
423 device_hwctx = frames_ctx->device_ctx->hwctx;
425 ctx->cu_context = device_hwctx->cuda_ctx;
427 ret = nvenc_open_session(avctx);
431 ret = nvenc_check_capabilities(avctx);
438 int i, nb_devices = 0;
440 if ((nvel->cu_init(0)) != CUDA_SUCCESS) {
441 av_log(avctx, AV_LOG_ERROR,
442 "Cannot init CUDA\n");
443 return AVERROR_UNKNOWN;
446 if ((nvel->cu_device_get_count(&nb_devices)) != CUDA_SUCCESS) {
447 av_log(avctx, AV_LOG_ERROR,
448 "Cannot enumerate the CUDA devices\n");
449 return AVERROR_UNKNOWN;
453 for (i = 0; i < nb_devices; ++i) {
454 if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
458 if (ctx->device == LIST_DEVICES)
461 return AVERROR(ENOSYS);
467 typedef struct GUIDTuple {
472 #define PRESET_ALIAS(alias, name, ...) \
473 [PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
475 #define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
477 static int nvec_map_preset(NVENCContext *ctx)
479 GUIDTuple presets[] = {
484 PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
485 PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY),
486 PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY),
487 PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS),
488 PRESET(LOSSLESS_HP, NVENC_LOSSLESS),
489 PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES),
490 PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS),
491 PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS),
495 GUIDTuple *t = &presets[ctx->preset];
497 ctx->params.presetGUID = t->guid;
498 ctx->flags = t->flags;
500 return AVERROR(EINVAL);
506 static void set_constqp(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
508 rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
509 rc->constQP.qpInterB = avctx->global_quality;
510 rc->constQP.qpInterP = avctx->global_quality;
511 rc->constQP.qpIntra = avctx->global_quality;
514 static void set_vbr(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
516 if (avctx->qmin >= 0) {
518 rc->minQP.qpInterB = avctx->qmin;
519 rc->minQP.qpInterP = avctx->qmin;
520 rc->minQP.qpIntra = avctx->qmin;
523 if (avctx->qmax >= 0) {
525 rc->maxQP.qpInterB = avctx->qmax;
526 rc->maxQP.qpInterP = avctx->qmax;
527 rc->maxQP.qpIntra = avctx->qmax;
531 static void set_lossless(AVCodecContext *avctx, NV_ENC_RC_PARAMS *rc)
533 rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
534 rc->constQP.qpInterB = 0;
535 rc->constQP.qpInterP = 0;
536 rc->constQP.qpIntra = 0;
539 static void nvenc_override_rate_control(AVCodecContext *avctx,
540 NV_ENC_RC_PARAMS *rc)
542 NVENCContext *ctx = avctx->priv_data;
545 case NV_ENC_PARAMS_RC_CONSTQP:
546 if (avctx->global_quality < 0) {
547 av_log(avctx, AV_LOG_WARNING,
548 "The constant quality rate-control requires "
549 "the 'global_quality' option set.\n");
552 set_constqp(avctx, rc);
554 case NV_ENC_PARAMS_RC_2_PASS_VBR:
555 case NV_ENC_PARAMS_RC_VBR:
556 if (avctx->qmin < 0 && avctx->qmax < 0) {
557 av_log(avctx, AV_LOG_WARNING,
558 "The variable bitrate rate-control requires "
559 "the 'qmin' and/or 'qmax' option set.\n");
562 case NV_ENC_PARAMS_RC_VBR_MINQP:
563 if (avctx->qmin < 0) {
564 av_log(avctx, AV_LOG_WARNING,
565 "The variable bitrate rate-control requires "
566 "the 'qmin' option set.\n");
571 case NV_ENC_PARAMS_RC_CBR:
573 case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
574 case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
575 if (!(ctx->flags & NVENC_LOWLATENCY)) {
576 av_log(avctx, AV_LOG_WARNING,
577 "The multipass rate-control requires "
578 "a low-latency preset.\n");
583 rc->rateControlMode = ctx->rc;
586 static void nvenc_setup_rate_control(AVCodecContext *avctx)
588 NVENCContext *ctx = avctx->priv_data;
589 NV_ENC_RC_PARAMS *rc = &ctx->config.rcParams;
591 if (avctx->bit_rate > 0)
592 rc->averageBitRate = avctx->bit_rate;
594 if (avctx->rc_max_rate > 0)
595 rc->maxBitRate = avctx->rc_max_rate;
598 nvenc_override_rate_control(avctx, rc);
599 } else if (ctx->flags & NVENC_LOSSLESS) {
600 set_lossless(avctx, rc);
601 } else if (avctx->global_quality > 0) {
602 set_constqp(avctx, rc);
603 } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
604 rc->rateControlMode = NV_ENC_PARAMS_RC_VBR;
608 if (avctx->rc_buffer_size > 0)
609 rc->vbvBufferSize = avctx->rc_buffer_size;
611 if (rc->averageBitRate > 0)
612 avctx->bit_rate = rc->averageBitRate;
614 #if NVENCAPI_MAJOR_VERSION >= 7
616 ctx->config.rcParams.enableAQ = 1;
617 ctx->config.rcParams.aqStrength = ctx->aq_strength;
618 av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n");
621 if (ctx->temporal_aq) {
622 ctx->config.rcParams.enableTemporalAQ = 1;
623 av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n");
626 if (ctx->rc_lookahead) {
627 int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) -
628 ctx->config.frameIntervalP - 4;
631 av_log(avctx, AV_LOG_WARNING,
632 "Lookahead not enabled. Increase buffer delay (-delay).\n");
634 ctx->config.rcParams.enableLookahead = 1;
635 ctx->config.rcParams.lookaheadDepth = av_clip(ctx->rc_lookahead, 0, lkd_bound);
636 ctx->config.rcParams.disableIadapt = ctx->no_scenecut;
637 ctx->config.rcParams.disableBadapt = !ctx->b_adapt;
638 av_log(avctx, AV_LOG_VERBOSE,
639 "Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n",
640 ctx->config.rcParams.lookaheadDepth,
641 ctx->config.rcParams.disableIadapt ? "disabled" : "enabled",
642 ctx->config.rcParams.disableBadapt ? "disabled" : "enabled");
646 if (ctx->strict_gop) {
647 ctx->config.rcParams.strictGOPTarget = 1;
648 av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n");
652 ctx->config.rcParams.enableNonRefP = 1;
654 if (ctx->zerolatency)
655 ctx->config.rcParams.zeroReorderDelay = 1;
658 ctx->config.rcParams.targetQuality = ctx->quality;
659 #endif /* NVENCAPI_MAJOR_VERSION >= 7 */
662 static int nvenc_setup_h264_config(AVCodecContext *avctx)
664 NVENCContext *ctx = avctx->priv_data;
665 NV_ENC_CONFIG *cc = &ctx->config;
666 NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config;
667 NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
669 vui->colourDescriptionPresentFlag = avctx->colorspace != AVCOL_SPC_UNSPECIFIED ||
670 avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
671 avctx->color_trc != AVCOL_TRC_UNSPECIFIED;
673 vui->colourMatrix = avctx->colorspace;
674 vui->colourPrimaries = avctx->color_primaries;
675 vui->transferCharacteristics = avctx->color_trc;
677 vui->videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
679 vui->videoSignalTypePresentFlag = vui->colourDescriptionPresentFlag ||
680 vui->videoFullRangeFlag;
682 h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
683 h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
686 h264->maxNumRefFrames = avctx->refs;
687 h264->idrPeriod = cc->gopLength;
690 h264->sliceModeData = FFMAX(avctx->slices, 1);
692 if (ctx->flags & NVENC_LOSSLESS)
693 h264->qpPrimeYZeroTransformBypassFlag = 1;
695 if (IS_CBR(cc->rcParams.rateControlMode)) {
696 h264->outputBufferingPeriodSEI = 1;
697 h264->outputPictureTimingSEI = 1;
701 avctx->profile = ctx->profile;
703 if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P)
704 h264->chromaFormatIDC = 3;
706 h264->chromaFormatIDC = 1;
708 switch (ctx->profile) {
709 case NV_ENC_H264_PROFILE_BASELINE:
710 cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
712 case NV_ENC_H264_PROFILE_MAIN:
713 cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
715 case NV_ENC_H264_PROFILE_HIGH:
716 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
718 case NV_ENC_H264_PROFILE_HIGH_444:
719 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
721 case NV_ENC_H264_PROFILE_CONSTRAINED_HIGH:
722 cc->profileGUID = NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID;
726 if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) {
727 cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
728 avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
731 h264->level = ctx->level;
736 static int nvenc_setup_hevc_config(AVCodecContext *avctx)
738 NVENCContext *ctx = avctx->priv_data;
739 NV_ENC_CONFIG *cc = &ctx->config;
740 NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig;
741 NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
743 vui->colourDescriptionPresentFlag = avctx->colorspace != AVCOL_SPC_UNSPECIFIED ||
744 avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
745 avctx->color_trc != AVCOL_TRC_UNSPECIFIED;
747 vui->colourMatrix = avctx->colorspace;
748 vui->colourPrimaries = avctx->color_primaries;
749 vui->transferCharacteristics = avctx->color_trc;
751 vui->videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
753 vui->videoSignalTypePresentFlag = vui->colourDescriptionPresentFlag ||
754 vui->videoFullRangeFlag;
756 hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
757 hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
760 hevc->maxNumRefFramesInDPB = avctx->refs;
761 hevc->idrPeriod = cc->gopLength;
763 if (IS_CBR(cc->rcParams.rateControlMode)) {
764 hevc->outputBufferingPeriodSEI = 1;
765 hevc->outputPictureTimingSEI = 1;
768 switch (ctx->profile) {
769 case NV_ENC_HEVC_PROFILE_MAIN:
770 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
771 avctx->profile = FF_PROFILE_HEVC_MAIN;
773 #if NVENCAPI_MAJOR_VERSION >= 7
774 case NV_ENC_HEVC_PROFILE_MAIN_10:
775 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
776 avctx->profile = FF_PROFILE_HEVC_MAIN_10;
778 case NV_ENC_HEVC_PROFILE_REXT:
779 cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
780 avctx->profile = FF_PROFILE_HEVC_REXT;
782 #endif /* NVENCAPI_MAJOR_VERSION >= 7 */
785 // force setting profile for various input formats
786 switch (ctx->data_pix_fmt) {
787 case AV_PIX_FMT_YUV420P:
788 case AV_PIX_FMT_NV12:
789 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
790 avctx->profile = FF_PROFILE_HEVC_MAIN;
792 #if NVENCAPI_MAJOR_VERSION >= 7
793 case AV_PIX_FMT_P010:
794 cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
795 avctx->profile = FF_PROFILE_HEVC_MAIN_10;
797 case AV_PIX_FMT_YUV444P:
798 case AV_PIX_FMT_YUV444P16:
799 cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
800 avctx->profile = FF_PROFILE_HEVC_REXT;
802 #endif /* NVENCAPI_MAJOR_VERSION >= 7 */
805 #if NVENCAPI_MAJOR_VERSION >= 7
806 hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
807 hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
808 #endif /* NVENCAPI_MAJOR_VERSION >= 7 */
811 hevc->sliceModeData = FFMAX(avctx->slices, 1);
814 hevc->level = ctx->level;
816 hevc->level = NV_ENC_LEVEL_AUTOSELECT;
820 hevc->tier = ctx->tier;
825 static int nvenc_setup_codec_config(AVCodecContext *avctx)
827 switch (avctx->codec->id) {
828 case AV_CODEC_ID_H264:
829 return nvenc_setup_h264_config(avctx);
830 case AV_CODEC_ID_HEVC:
831 return nvenc_setup_hevc_config(avctx);
836 static int nvenc_setup_encoder(AVCodecContext *avctx)
838 NVENCContext *ctx = avctx->priv_data;
839 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
840 NV_ENC_PRESET_CONFIG preset_cfg = { 0 };
841 AVCPBProperties *cpb_props;
844 ctx->params.version = NV_ENC_INITIALIZE_PARAMS_VER;
846 ctx->params.encodeHeight = avctx->height;
847 ctx->params.encodeWidth = avctx->width;
849 if (avctx->sample_aspect_ratio.num &&
850 avctx->sample_aspect_ratio.den &&
851 (avctx->sample_aspect_ratio.num != 1 ||
852 avctx->sample_aspect_ratio.den != 1)) {
853 av_reduce(&ctx->params.darWidth,
854 &ctx->params.darHeight,
855 avctx->width * avctx->sample_aspect_ratio.num,
856 avctx->height * avctx->sample_aspect_ratio.den,
859 ctx->params.darHeight = avctx->height;
860 ctx->params.darWidth = avctx->width;
863 // De-compensate for hardware, dubiously, trying to compensate for
864 // playback at 704 pixel width.
865 if (avctx->width == 720 && (avctx->height == 480 || avctx->height == 576)) {
866 av_reduce(&ctx->params.darWidth, &ctx->params.darHeight,
867 ctx->params.darWidth * 44,
868 ctx->params.darHeight * 45,
872 ctx->params.frameRateNum = avctx->time_base.den;
873 ctx->params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
875 ctx->params.enableEncodeAsync = 0;
876 ctx->params.enablePTD = 1;
878 ctx->params.encodeConfig = &ctx->config;
880 nvec_map_preset(ctx);
882 preset_cfg.version = NV_ENC_PRESET_CONFIG_VER;
883 preset_cfg.presetCfg.version = NV_ENC_CONFIG_VER;
885 ret = nv->nvEncGetEncodePresetConfig(ctx->nvenc_ctx,
886 ctx->params.encodeGUID,
887 ctx->params.presetGUID,
889 if (ret != NV_ENC_SUCCESS)
890 return nvenc_print_error(avctx, ret, "Cannot get the preset configuration");
892 memcpy(&ctx->config, &preset_cfg.presetCfg, sizeof(ctx->config));
894 ctx->config.version = NV_ENC_CONFIG_VER;
896 if (avctx->gop_size > 0) {
897 if (avctx->max_b_frames > 0) {
901 * 3 two B-frames, and so on. */
902 ctx->config.frameIntervalP = avctx->max_b_frames + 1;
903 } else if (avctx->max_b_frames == 0) {
904 ctx->config.frameIntervalP = 1;
906 ctx->config.gopLength = avctx->gop_size;
907 } else if (avctx->gop_size == 0) {
908 ctx->config.frameIntervalP = 0;
909 ctx->config.gopLength = 1;
912 if (ctx->config.frameIntervalP > 1)
913 avctx->max_b_frames = ctx->config.frameIntervalP - 1;
915 ctx->initial_pts[0] = AV_NOPTS_VALUE;
916 ctx->initial_pts[1] = AV_NOPTS_VALUE;
918 nvenc_setup_rate_control(avctx);
920 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
921 ctx->config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
923 ctx->config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
926 if ((ret = nvenc_setup_codec_config(avctx)) < 0)
929 ret = nv->nvEncInitializeEncoder(ctx->nvenc_ctx, &ctx->params);
930 if (ret != NV_ENC_SUCCESS)
931 return nvenc_print_error(avctx, ret, "InitializeEncoder failed");
933 cpb_props = ff_add_cpb_side_data(avctx);
935 return AVERROR(ENOMEM);
936 cpb_props->max_bitrate = avctx->rc_max_rate;
937 cpb_props->min_bitrate = avctx->rc_min_rate;
938 cpb_props->avg_bitrate = avctx->bit_rate;
939 cpb_props->buffer_size = avctx->rc_buffer_size;
944 static int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
946 NVENCContext *ctx = avctx->priv_data;
947 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
949 NV_ENC_CREATE_BITSTREAM_BUFFER out_buffer = { 0 };
951 switch (ctx->data_pix_fmt) {
952 case AV_PIX_FMT_YUV420P:
953 ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
955 case AV_PIX_FMT_NV12:
956 ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
958 case AV_PIX_FMT_YUV444P:
959 ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
961 #if NVENCAPI_MAJOR_VERSION >= 7
962 case AV_PIX_FMT_P010:
963 ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
965 case AV_PIX_FMT_YUV444P16:
966 ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
968 #endif /* NVENCAPI_MAJOR_VERSION >= 7 */
973 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
974 ctx->frames[idx].in_ref = av_frame_alloc();
975 if (!ctx->frames[idx].in_ref)
976 return AVERROR(ENOMEM);
978 NV_ENC_CREATE_INPUT_BUFFER in_buffer = { 0 };
980 in_buffer.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
982 in_buffer.width = avctx->width;
983 in_buffer.height = avctx->height;
985 in_buffer.bufferFmt = ctx->frames[idx].format;
986 in_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED;
988 ret = nv->nvEncCreateInputBuffer(ctx->nvenc_ctx, &in_buffer);
989 if (ret != NV_ENC_SUCCESS)
990 return nvenc_print_error(avctx, ret, "CreateInputBuffer failed");
992 ctx->frames[idx].in = in_buffer.inputBuffer;
995 out_buffer.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
996 /* 1MB is large enough to hold most output frames.
997 * NVENC increases this automatically if it is not enough. */
998 out_buffer.size = BITSTREAM_BUFFER_SIZE;
1000 out_buffer.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED;
1002 ret = nv->nvEncCreateBitstreamBuffer(ctx->nvenc_ctx, &out_buffer);
1003 if (ret != NV_ENC_SUCCESS)
1004 return nvenc_print_error(avctx, ret, "CreateBitstreamBuffer failed");
1006 ctx->frames[idx].out = out_buffer.bitstreamBuffer;
1011 static int nvenc_setup_surfaces(AVCodecContext *avctx)
1013 NVENCContext *ctx = avctx->priv_data;
1016 ctx->nb_surfaces = FFMAX(4 + avctx->max_b_frames,
1018 ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
1021 ctx->frames = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->frames));
1023 return AVERROR(ENOMEM);
1025 ctx->timestamps = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
1026 if (!ctx->timestamps)
1027 return AVERROR(ENOMEM);
1028 ctx->pending = av_fifo_alloc(ctx->nb_surfaces * sizeof(*ctx->frames));
1030 return AVERROR(ENOMEM);
1031 ctx->ready = av_fifo_alloc(ctx->nb_surfaces * sizeof(*ctx->frames));
1033 return AVERROR(ENOMEM);
1035 for (i = 0; i < ctx->nb_surfaces; i++) {
1036 if ((ret = nvenc_alloc_surface(avctx, i)) < 0)
1043 #define EXTRADATA_SIZE 512
1045 static int nvenc_setup_extradata(AVCodecContext *avctx)
1047 NVENCContext *ctx = avctx->priv_data;
1048 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1049 NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
1052 avctx->extradata = av_mallocz(EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
1053 if (!avctx->extradata)
1054 return AVERROR(ENOMEM);
1056 payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
1057 payload.spsppsBuffer = avctx->extradata;
1058 payload.inBufferSize = EXTRADATA_SIZE;
1059 payload.outSPSPPSPayloadSize = &avctx->extradata_size;
1061 ret = nv->nvEncGetSequenceParams(ctx->nvenc_ctx, &payload);
1062 if (ret != NV_ENC_SUCCESS)
1063 return nvenc_print_error(avctx, ret, "Cannot get the extradata");
1068 av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
1070 NVENCContext *ctx = avctx->priv_data;
1071 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1074 /* the encoder has to be flushed before it can be closed */
1075 if (ctx->nvenc_ctx) {
1076 NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
1077 .encodePicFlags = NV_ENC_PIC_FLAG_EOS };
1079 nv->nvEncEncodePicture(ctx->nvenc_ctx, ¶ms);
1082 av_fifo_free(ctx->timestamps);
1083 av_fifo_free(ctx->pending);
1084 av_fifo_free(ctx->ready);
1087 for (i = 0; i < ctx->nb_surfaces; ++i) {
1088 if (avctx->pix_fmt != AV_PIX_FMT_CUDA) {
1089 nv->nvEncDestroyInputBuffer(ctx->nvenc_ctx, ctx->frames[i].in);
1090 } else if (ctx->frames[i].in) {
1091 nv->nvEncUnmapInputResource(ctx->nvenc_ctx, ctx->frames[i].in_map.mappedResource);
1094 av_frame_free(&ctx->frames[i].in_ref);
1095 nv->nvEncDestroyBitstreamBuffer(ctx->nvenc_ctx, ctx->frames[i].out);
1098 for (i = 0; i < ctx->nb_registered_frames; i++) {
1099 if (ctx->registered_frames[i].regptr)
1100 nv->nvEncUnregisterResource(ctx->nvenc_ctx, ctx->registered_frames[i].regptr);
1102 ctx->nb_registered_frames = 0;
1104 av_freep(&ctx->frames);
1107 nv->nvEncDestroyEncoder(ctx->nvenc_ctx);
1109 if (ctx->cu_context_internal)
1110 ctx->nvel.cu_ctx_destroy(ctx->cu_context_internal);
1112 if (ctx->nvel.nvenc)
1113 dlclose(ctx->nvel.nvenc);
1117 dlclose(ctx->nvel.cuda);
1123 av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
1125 NVENCContext *ctx = avctx->priv_data;
1128 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1129 AVHWFramesContext *frames_ctx;
1130 if (!avctx->hw_frames_ctx) {
1131 av_log(avctx, AV_LOG_ERROR,
1132 "hw_frames_ctx must be set when using GPU frames as input\n");
1133 return AVERROR(EINVAL);
1135 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
1136 ctx->data_pix_fmt = frames_ctx->sw_format;
1138 ctx->data_pix_fmt = avctx->pix_fmt;
1141 if ((ret = nvenc_load_libraries(avctx)) < 0)
1144 if ((ret = nvenc_setup_device(avctx)) < 0)
1147 if ((ret = nvenc_setup_encoder(avctx)) < 0)
1150 if ((ret = nvenc_setup_surfaces(avctx)) < 0)
1153 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1154 if ((ret = nvenc_setup_extradata(avctx)) < 0)
1161 static NVENCFrame *get_free_frame(NVENCContext *ctx)
1165 for (i = 0; i < ctx->nb_surfaces; i++) {
1166 if (!ctx->frames[i].locked) {
1167 ctx->frames[i].locked = 1;
1168 return &ctx->frames[i];
1175 static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame)
1177 uint8_t *buf = in->bufferDataPtr;
1178 int off = frame->height * in->pitch;
1180 switch (frame->format) {
1181 case AV_PIX_FMT_YUV420P:
1182 av_image_copy_plane(buf, in->pitch,
1183 frame->data[0], frame->linesize[0],
1184 frame->width, frame->height);
1187 av_image_copy_plane(buf, in->pitch >> 1,
1188 frame->data[2], frame->linesize[2],
1189 frame->width >> 1, frame->height >> 1);
1193 av_image_copy_plane(buf, in->pitch >> 1,
1194 frame->data[1], frame->linesize[1],
1195 frame->width >> 1, frame->height >> 1);
1197 case AV_PIX_FMT_NV12:
1198 av_image_copy_plane(buf, in->pitch,
1199 frame->data[0], frame->linesize[0],
1200 frame->width, frame->height);
1203 av_image_copy_plane(buf, in->pitch,
1204 frame->data[1], frame->linesize[1],
1205 frame->width, frame->height >> 1);
1207 case AV_PIX_FMT_P010:
1208 av_image_copy_plane(buf, in->pitch,
1209 frame->data[0], frame->linesize[0],
1210 frame->width << 1, frame->height);
1213 av_image_copy_plane(buf, in->pitch,
1214 frame->data[1], frame->linesize[1],
1215 frame->width << 1, frame->height >> 1);
1217 case AV_PIX_FMT_YUV444P:
1218 av_image_copy_plane(buf, in->pitch,
1219 frame->data[0], frame->linesize[0],
1220 frame->width, frame->height);
1223 av_image_copy_plane(buf, in->pitch,
1224 frame->data[1], frame->linesize[1],
1225 frame->width, frame->height);
1228 av_image_copy_plane(buf, in->pitch,
1229 frame->data[2], frame->linesize[2],
1230 frame->width, frame->height);
1232 case AV_PIX_FMT_YUV444P16:
1233 av_image_copy_plane(buf, in->pitch,
1234 frame->data[0], frame->linesize[0],
1235 frame->width << 1, frame->height);
1238 av_image_copy_plane(buf, in->pitch,
1239 frame->data[1], frame->linesize[1],
1240 frame->width << 1, frame->height);
1243 av_image_copy_plane(buf, in->pitch,
1244 frame->data[2], frame->linesize[2],
1245 frame->width << 1, frame->height);
1254 static int nvenc_find_free_reg_resource(AVCodecContext *avctx)
1256 NVENCContext *ctx = avctx->priv_data;
1257 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1260 if (ctx->nb_registered_frames == FF_ARRAY_ELEMS(ctx->registered_frames)) {
1261 for (i = 0; i < ctx->nb_registered_frames; i++) {
1262 if (!ctx->registered_frames[i].mapped) {
1263 if (ctx->registered_frames[i].regptr) {
1264 nv->nvEncUnregisterResource(ctx->nvenc_ctx,
1265 ctx->registered_frames[i].regptr);
1266 ctx->registered_frames[i].regptr = NULL;
1272 return ctx->nb_registered_frames++;
1275 av_log(avctx, AV_LOG_ERROR, "Too many registered CUDA frames\n");
1276 return AVERROR(ENOMEM);
1279 static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
1281 NVENCContext *ctx = avctx->priv_data;
1282 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1283 AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
1284 NV_ENC_REGISTER_RESOURCE reg;
1287 for (i = 0; i < ctx->nb_registered_frames; i++) {
1288 if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0])
1292 idx = nvenc_find_free_reg_resource(avctx);
1296 reg.version = NV_ENC_REGISTER_RESOURCE_VER;
1297 reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
1298 reg.width = frames_ctx->width;
1299 reg.height = frames_ctx->height;
1300 reg.bufferFormat = ctx->frames[0].format;
1301 reg.pitch = frame->linesize[0];
1302 reg.resourceToRegister = frame->data[0];
1304 ret = nv->nvEncRegisterResource(ctx->nvenc_ctx, ®);
1305 if (ret != NV_ENC_SUCCESS) {
1306 nvenc_print_error(avctx, ret, "Error registering an input resource");
1307 return AVERROR_UNKNOWN;
1310 ctx->registered_frames[idx].ptr = (CUdeviceptr)frame->data[0];
1311 ctx->registered_frames[idx].regptr = reg.registeredResource;
1315 static int nvenc_upload_frame(AVCodecContext *avctx, const AVFrame *frame,
1316 NVENCFrame *nvenc_frame)
1318 NVENCContext *ctx = avctx->priv_data;
1319 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1322 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1325 ret = nvenc_register_frame(avctx, frame);
1327 av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n");
1332 ret = av_frame_ref(nvenc_frame->in_ref, frame);
1336 nvenc_frame->in_map.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
1337 nvenc_frame->in_map.registeredResource = ctx->registered_frames[reg_idx].regptr;
1339 ret = nv->nvEncMapInputResource(ctx->nvenc_ctx, &nvenc_frame->in_map);
1340 if (ret != NV_ENC_SUCCESS) {
1341 av_frame_unref(nvenc_frame->in_ref);
1342 return nvenc_print_error(avctx, ret, "Error mapping an input resource");
1345 ctx->registered_frames[reg_idx].mapped = 1;
1346 nvenc_frame->reg_idx = reg_idx;
1347 nvenc_frame->in = nvenc_frame->in_map.mappedResource;
1349 NV_ENC_LOCK_INPUT_BUFFER params = { 0 };
1351 params.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
1352 params.inputBuffer = nvenc_frame->in;
1354 ret = nv->nvEncLockInputBuffer(ctx->nvenc_ctx, ¶ms);
1355 if (ret != NV_ENC_SUCCESS)
1356 return nvenc_print_error(avctx, ret, "Cannot lock the buffer");
1358 ret = nvenc_copy_frame(¶ms, frame);
1360 nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, nvenc_frame->in);
1364 ret = nv->nvEncUnlockInputBuffer(ctx->nvenc_ctx, nvenc_frame->in);
1365 if (ret != NV_ENC_SUCCESS)
1366 return nvenc_print_error(avctx, ret, "Cannot unlock the buffer");
1372 static void nvenc_codec_specific_pic_params(AVCodecContext *avctx,
1373 NV_ENC_PIC_PARAMS *params)
1375 NVENCContext *ctx = avctx->priv_data;
1377 switch (avctx->codec->id) {
1378 case AV_CODEC_ID_H264:
1379 params->codecPicParams.h264PicParams.sliceMode =
1380 ctx->config.encodeCodecConfig.h264Config.sliceMode;
1381 params->codecPicParams.h264PicParams.sliceModeData =
1382 ctx->config.encodeCodecConfig.h264Config.sliceModeData;
1384 case AV_CODEC_ID_HEVC:
1385 params->codecPicParams.hevcPicParams.sliceMode =
1386 ctx->config.encodeCodecConfig.hevcConfig.sliceMode;
1387 params->codecPicParams.hevcPicParams.sliceModeData =
1388 ctx->config.encodeCodecConfig.hevcConfig.sliceModeData;
1393 static inline int nvenc_enqueue_timestamp(AVFifoBuffer *f, int64_t pts)
1395 return av_fifo_generic_write(f, &pts, sizeof(pts), NULL);
1398 static inline int nvenc_dequeue_timestamp(AVFifoBuffer *f, int64_t *pts)
1400 return av_fifo_generic_read(f, pts, sizeof(*pts), NULL);
1403 static int nvenc_set_timestamp(AVCodecContext *avctx,
1404 NV_ENC_LOCK_BITSTREAM *params,
1407 NVENCContext *ctx = avctx->priv_data;
1409 pkt->pts = params->outputTimeStamp;
1410 pkt->duration = params->outputDuration;
1412 /* generate the first dts by linearly extrapolating the
1413 * first two pts values to the past */
1414 if (avctx->max_b_frames > 0 && !ctx->first_packet_output &&
1415 ctx->initial_pts[1] != AV_NOPTS_VALUE) {
1416 int64_t ts0 = ctx->initial_pts[0], ts1 = ctx->initial_pts[1];
1419 if ((ts0 < 0 && ts1 > INT64_MAX + ts0) ||
1420 (ts0 > 0 && ts1 < INT64_MIN + ts0))
1421 return AVERROR(ERANGE);
1424 if ((delta < 0 && ts0 > INT64_MAX + delta) ||
1425 (delta > 0 && ts0 < INT64_MIN + delta))
1426 return AVERROR(ERANGE);
1427 pkt->dts = ts0 - delta;
1429 ctx->first_packet_output = 1;
1432 return nvenc_dequeue_timestamp(ctx->timestamps, &pkt->dts);
1435 static int nvenc_get_output(AVCodecContext *avctx, AVPacket *pkt)
1437 NVENCContext *ctx = avctx->priv_data;
1438 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1439 NV_ENC_LOCK_BITSTREAM params = { 0 };
1443 ret = av_fifo_generic_read(ctx->ready, &frame, sizeof(frame), NULL);
1447 params.version = NV_ENC_LOCK_BITSTREAM_VER;
1448 params.outputBitstream = frame->out;
1450 ret = nv->nvEncLockBitstream(ctx->nvenc_ctx, ¶ms);
1452 return nvenc_print_error(avctx, ret, "Cannot lock the bitstream");
1454 ret = ff_alloc_packet(pkt, params.bitstreamSizeInBytes);
1458 memcpy(pkt->data, params.bitstreamBufferPtr, pkt->size);
1460 ret = nv->nvEncUnlockBitstream(ctx->nvenc_ctx, frame->out);
1462 return nvenc_print_error(avctx, ret, "Cannot unlock the bitstream");
1464 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
1465 nv->nvEncUnmapInputResource(ctx->nvenc_ctx, frame->in_map.mappedResource);
1466 av_frame_unref(frame->in_ref);
1467 ctx->registered_frames[frame->reg_idx].mapped = 0;
1474 ret = nvenc_set_timestamp(avctx, ¶ms, pkt);
1478 switch (params.pictureType) {
1479 case NV_ENC_PIC_TYPE_IDR:
1480 pkt->flags |= AV_PKT_FLAG_KEY;
1481 #if FF_API_CODED_FRAME
1482 FF_DISABLE_DEPRECATION_WARNINGS
1483 case NV_ENC_PIC_TYPE_INTRA_REFRESH:
1484 case NV_ENC_PIC_TYPE_I:
1485 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1487 case NV_ENC_PIC_TYPE_P:
1488 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
1490 case NV_ENC_PIC_TYPE_B:
1491 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
1493 case NV_ENC_PIC_TYPE_BI:
1494 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
1496 FF_ENABLE_DEPRECATION_WARNINGS
1503 static int output_ready(AVCodecContext *avctx, int flush)
1505 NVENCContext *ctx = avctx->priv_data;
1506 int nb_ready, nb_pending;
1508 /* when B-frames are enabled, we wait for two initial timestamps to
1509 * calculate the first dts */
1510 if (!flush && avctx->max_b_frames > 0 &&
1511 (ctx->initial_pts[0] == AV_NOPTS_VALUE || ctx->initial_pts[1] == AV_NOPTS_VALUE))
1514 nb_ready = av_fifo_size(ctx->ready) / sizeof(NVENCFrame*);
1515 nb_pending = av_fifo_size(ctx->pending) / sizeof(NVENCFrame*);
1517 return nb_ready > 0;
1518 return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
1521 int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1522 const AVFrame *frame, int *got_packet)
1524 NVENCContext *ctx = avctx->priv_data;
1525 NV_ENCODE_API_FUNCTION_LIST *nv = &ctx->nvel.nvenc_funcs;
1526 NV_ENC_PIC_PARAMS params = { 0 };
1527 NVENCFrame *nvenc_frame = NULL;
1530 params.version = NV_ENC_PIC_PARAMS_VER;
1533 nvenc_frame = get_free_frame(ctx);
1535 av_log(avctx, AV_LOG_ERROR, "No free surfaces\n");
1539 ret = nvenc_upload_frame(avctx, frame, nvenc_frame);
1543 params.inputBuffer = nvenc_frame->in;
1544 params.bufferFmt = nvenc_frame->format;
1545 params.inputWidth = frame->width;
1546 params.inputHeight = frame->height;
1547 params.outputBitstream = nvenc_frame->out;
1548 params.inputTimeStamp = frame->pts;
1550 if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
1551 if (frame->top_field_first)
1552 params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
1554 params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
1556 params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
1559 nvenc_codec_specific_pic_params(avctx, ¶ms);
1561 ret = nvenc_enqueue_timestamp(ctx->timestamps, frame->pts);
1565 if (ctx->initial_pts[0] == AV_NOPTS_VALUE)
1566 ctx->initial_pts[0] = frame->pts;
1567 else if (ctx->initial_pts[1] == AV_NOPTS_VALUE)
1568 ctx->initial_pts[1] = frame->pts;
1570 params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
1573 enc_ret = nv->nvEncEncodePicture(ctx->nvenc_ctx, ¶ms);
1574 if (enc_ret != NV_ENC_SUCCESS &&
1575 enc_ret != NV_ENC_ERR_NEED_MORE_INPUT)
1576 return nvenc_print_error(avctx, enc_ret, "Error encoding the frame");
1579 ret = av_fifo_generic_write(ctx->pending, &nvenc_frame, sizeof(nvenc_frame), NULL);
1584 /* all the pending buffers are now ready for output */
1585 if (enc_ret == NV_ENC_SUCCESS) {
1586 while (av_fifo_size(ctx->pending) > 0) {
1587 av_fifo_generic_read(ctx->pending, &nvenc_frame, sizeof(nvenc_frame), NULL);
1588 av_fifo_generic_write(ctx->ready, &nvenc_frame, sizeof(nvenc_frame), NULL);
1592 if (output_ready(avctx, !frame)) {
1593 ret = nvenc_get_output(avctx, pkt);