AV_PIX_FMT_0RGB32,
AV_PIX_FMT_0BGR32,
AV_PIX_FMT_CUDA,
+#if CONFIG_D3D11VA
+ AV_PIX_FMT_D3D11,
+#endif
AV_PIX_FMT_NONE
};
uint32_t nvenc_max_ver;
int ret;
- ret = cuda_load_functions(&dl_fn->cuda_dl);
+ ret = cuda_load_functions(&dl_fn->cuda_dl, avctx);
if (ret < 0)
return ret;
- ret = nvenc_load_functions(&dl_fn->nvenc_dl);
+ ret = nvenc_load_functions(&dl_fn->nvenc_dl, avctx);
if (ret < 0) {
nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
return ret;
return 0;
}
+static int nvenc_push_context(AVCodecContext *avctx)
+{
+ NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+ CUresult cu_res;
+
+ if (ctx->d3d11_device)
+ return 0;
+
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static int nvenc_pop_context(AVCodecContext *avctx)
+{
+ NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
+
+ if (ctx->d3d11_device)
+ return 0;
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
static av_cold int nvenc_open_session(AVCodecContext *avctx)
{
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
params.apiVersion = NVENCAPI_VERSION;
- params.device = ctx->cu_context;
- params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+ if (ctx->d3d11_device) {
+ params.device = ctx->d3d11_device;
+ params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
+ } else {
+ params.device = ctx->cu_context;
+ params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+ }
ret = p_nvenc->nvEncOpenEncodeSessionEx(¶ms, &ctx->nvencoder);
if (ret != NV_ENC_SUCCESS) {
int major, minor, ret;
CUresult cu_res;
CUdevice cu_device;
- CUcontext dummy;
int loglevel = AV_LOG_VERBOSE;
if (ctx->device == LIST_DEVICES)
ctx->cu_context = ctx->cu_context_internal;
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
+ if ((ret = nvenc_pop_context(avctx)) < 0)
goto fail2;
- }
if ((ret = nvenc_open_session(avctx)) < 0)
goto fail2;
return 0;
fail3:
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ if ((ret = nvenc_push_context(avctx)) < 0)
+ return ret;
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ if ((ret = nvenc_pop_context(avctx)) < 0)
+ return ret;
fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
return AVERROR_BUG;
}
- if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
AVHWFramesContext *frames_ctx;
AVHWDeviceContext *hwdev_ctx;
- AVCUDADeviceContext *device_hwctx;
+ AVCUDADeviceContext *cuda_device_hwctx = NULL;
+#if CONFIG_D3D11VA
+ AVD3D11VADeviceContext *d3d11_device_hwctx = NULL;
+#endif
int ret;
if (avctx->hw_frames_ctx) {
frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
- device_hwctx = frames_ctx->device_ctx->hwctx;
+ if (frames_ctx->format == AV_PIX_FMT_CUDA)
+ cuda_device_hwctx = frames_ctx->device_ctx->hwctx;
+#if CONFIG_D3D11VA
+ else if (frames_ctx->format == AV_PIX_FMT_D3D11)
+ d3d11_device_hwctx = frames_ctx->device_ctx->hwctx;
+#endif
+ else
+ return AVERROR(EINVAL);
} else if (avctx->hw_device_ctx) {
hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
- device_hwctx = hwdev_ctx->hwctx;
+ if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA)
+ cuda_device_hwctx = hwdev_ctx->hwctx;
+#if CONFIG_D3D11VA
+ else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA)
+ d3d11_device_hwctx = hwdev_ctx->hwctx;
+#endif
+ else
+ return AVERROR(EINVAL);
} else {
return AVERROR(EINVAL);
}
- ctx->cu_context = device_hwctx->cuda_ctx;
+ if (cuda_device_hwctx) {
+ ctx->cu_context = cuda_device_hwctx->cuda_ctx;
+ }
+#if CONFIG_D3D11VA
+ else if (d3d11_device_hwctx) {
+ ctx->d3d11_device = d3d11_device_hwctx->device;
+ ID3D11Device_AddRef(ctx->d3d11_device);
+ }
+#endif
ret = nvenc_open_session(avctx);
if (ret < 0)
NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props;
- CUresult cu_res;
- CUcontext dummy;
int res = 0;
int dw, dh;
if (res)
return res;
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_push_context(avctx);
+ if (res < 0)
+ return res;
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_pop_context(avctx);
+ if (res < 0)
+ return res;
if (nv_status != NV_ENC_SUCCESS) {
return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
- if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
ctx->surfaces[idx].in_ref = av_frame_alloc();
if (!ctx->surfaces[idx].in_ref)
return AVERROR(ENOMEM);
nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
if (nv_status != NV_ENC_SUCCESS) {
int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer failed");
- if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
+ if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[idx].input_surface);
av_frame_free(&ctx->surfaces[idx].in_ref);
return err;
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
- NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
- CUresult cu_res;
- CUcontext dummy;
- int i, res;
+ int i, res = 0, res2;
ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
if (!ctx->surfaces)
if (!ctx->output_surface_ready_queue)
return AVERROR(ENOMEM);
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_push_context(avctx);
+ if (res < 0)
+ return res;
for (i = 0; i < ctx->nb_surfaces; i++) {
if ((res = nvenc_alloc_surface(avctx, i)) < 0)
- {
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
- return res;
- }
+ goto fail;
}
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+fail:
+ res2 = nvenc_pop_context(avctx);
+ if (res2 < 0)
+ return res2;
- return 0;
+ return res;
}
static av_cold int nvenc_setup_extradata(AVCodecContext *avctx)
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
- CUresult cu_res;
- CUcontext dummy;
- int i;
+ int i, res;
/* the encoder has to be flushed before it can be closed */
if (ctx->nvencoder) {
NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
.encodePicFlags = NV_ENC_PIC_FLAG_EOS };
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_push_context(avctx);
+ if (res < 0)
+ return res;
p_nvenc->nvEncEncodePicture(ctx->nvencoder, ¶ms);
}
av_fifo_freep(&ctx->output_surface_queue);
av_fifo_freep(&ctx->unused_surface_queue);
- if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ if (ctx->surfaces && (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11)) {
for (i = 0; i < ctx->nb_surfaces; ++i) {
if (ctx->surfaces[i].input_surface) {
p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, ctx->surfaces[i].in_map.mappedResource);
if (ctx->surfaces) {
for (i = 0; i < ctx->nb_surfaces; ++i) {
- if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
+ if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != AV_PIX_FMT_D3D11)
p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->surfaces[i].input_surface);
av_frame_free(&ctx->surfaces[i].in_ref);
p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->surfaces[i].output_surface);
if (ctx->nvencoder) {
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_pop_context(avctx);
+ if (res < 0)
+ return res;
}
ctx->nvencoder = NULL;
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context = ctx->cu_context_internal = NULL;
+#if CONFIG_D3D11VA
+ if (ctx->d3d11_device) {
+ ID3D11Device_Release(ctx->d3d11_device);
+ ctx->d3d11_device = NULL;
+ }
+#endif
+
nvenc_free_functions(&dl_fn->nvenc_dl);
cuda_free_functions(&dl_fn->cuda_dl);
NvencContext *ctx = avctx->priv_data;
int ret;
- if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
AVHWFramesContext *frames_ctx;
if (!avctx->hw_frames_ctx) {
av_log(avctx, AV_LOG_ERROR,
return AVERROR(EINVAL);
}
frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+ if (frames_ctx->format != avctx->pix_fmt) {
+ av_log(avctx, AV_LOG_ERROR,
+ "hw_frames_ctx must match the GPU frame type\n");
+ return AVERROR(EINVAL);
+ }
ctx->data_pix_fmt = frames_ctx->sw_format;
} else {
ctx->data_pix_fmt = avctx->pix_fmt;
int i, idx, ret;
for (i = 0; i < ctx->nb_registered_frames; i++) {
- if (ctx->registered_frames[i].ptr == (CUdeviceptr)frame->data[0])
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0])
+ return i;
+ else if (avctx->pix_fmt == AV_PIX_FMT_D3D11 && ctx->registered_frames[i].ptr == frame->data[0] && ctx->registered_frames[i].ptr_index == (intptr_t)frame->data[1])
return i;
}
return idx;
reg.version = NV_ENC_REGISTER_RESOURCE_VER;
- reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
reg.width = frames_ctx->width;
reg.height = frames_ctx->height;
reg.pitch = frame->linesize[0];
reg.resourceToRegister = frame->data[0];
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
+ }
+ else if (avctx->pix_fmt == AV_PIX_FMT_D3D11) {
+ reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX;
+ reg.subResourceIndex = (intptr_t)frame->data[1];
+ }
+
reg.bufferFormat = nvenc_map_buffer_format(frames_ctx->sw_format);
if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
return AVERROR_UNKNOWN;
}
- ctx->registered_frames[idx].ptr = (CUdeviceptr)frame->data[0];
- ctx->registered_frames[idx].regptr = reg.registeredResource;
+ ctx->registered_frames[idx].ptr = frame->data[0];
+ ctx->registered_frames[idx].ptr_index = reg.subResourceIndex;
+ ctx->registered_frames[idx].regptr = reg.registeredResource;
return idx;
}
int res;
NVENCSTATUS nv_status;
- if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
int reg_idx = nvenc_register_frame(avctx, frame);
if (reg_idx < 0) {
- av_log(avctx, AV_LOG_ERROR, "Could not register an input CUDA frame\n");
+ av_log(avctx, AV_LOG_ERROR, "Could not register an input HW frame\n");
return reg_idx;
}
}
slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
- if (!slice_offsets)
+ if (!slice_offsets) {
+ res = AVERROR(ENOMEM);
goto error;
+ }
lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
nvenc_print_error(avctx, nv_status, "Failed unlocking bitstream buffer, expect the gates of mordor to open");
- if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+ if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
p_nvenc->nvEncUnmapInputResource(ctx->nvencoder, tmpoutsurf->in_map.mappedResource);
av_frame_unref(tmpoutsurf->in_ref);
ctx->registered_frames[tmpoutsurf->reg_idx].mapped = 0;
int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
{
NVENCSTATUS nv_status;
- CUresult cu_res;
- CUcontext dummy;
NvencSurface *tmp_out_surf, *in_surf;
- int res;
+ int res, res2;
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENC_PIC_PARAMS pic_params = { 0 };
pic_params.version = NV_ENC_PIC_PARAMS_VER;
- if (!ctx->cu_context || !ctx->nvencoder)
+ if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
return AVERROR(EINVAL);
if (ctx->encoder_flushing)
if (!in_surf)
return AVERROR(EAGAIN);
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_push_context(avctx);
+ if (res < 0)
+ return res;
res = nvenc_upload_frame(avctx, frame, in_surf);
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res2 = nvenc_pop_context(avctx);
+ if (res2 < 0)
+ return res2;
if (res)
return res;
ctx->encoder_flushing = 1;
}
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_push_context(avctx);
+ if (res < 0)
+ return res;
nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_pop_context(avctx);
+ if (res < 0)
+ return res;
if (nv_status != NV_ENC_SUCCESS &&
nv_status != NV_ENC_ERR_NEED_MORE_INPUT)
int ff_nvenc_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
{
- CUresult cu_res;
- CUcontext dummy;
NvencSurface *tmp_out_surf;
- int res;
+ int res, res2;
NvencContext *ctx = avctx->priv_data;
- NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
- if (!ctx->cu_context || !ctx->nvencoder)
+ if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
return AVERROR(EINVAL);
if (output_ready(avctx, ctx->encoder_flushing)) {
av_fifo_generic_read(ctx->output_surface_ready_queue, &tmp_out_surf, sizeof(tmp_out_surf), NULL);
- cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res = nvenc_push_context(avctx);
+ if (res < 0)
+ return res;
res = process_output_surface(avctx, pkt, tmp_out_surf);
- cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
- if (cu_res != CUDA_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
- return AVERROR_EXTERNAL;
- }
+ res2 = nvenc_pop_context(avctx);
+ if (res2 < 0)
+ return res2;
if (res)
return res;