* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "compat/cuda/dynlink_loader.h"
+
#include "libavutil/buffer.h"
#include "libavutil/mathematics.h"
#include "libavutil/hwcontext.h"
-#include "libavutil/hwcontext_cuda.h"
+#include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/fifo.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
#include "avcodec.h"
#include "internal.h"
-#include "compat/cuda/nvcuvid.h"
-
-#define MAX_FRAME_COUNT 25
-
typedef struct CuvidContext
{
AVClass *avclass;
CUvideoparser cuparser;
char *cu_gpu;
+ int nb_surfaces;
AVBufferRef *hwdevice;
AVBufferRef *hwframe;
CUVIDPARSERPARAMS cuparseinfo;
CUVIDEOFORMATEX cuparse_ext;
+
+ CudaFunctions *cudl;
+ CuvidFunctions *cvdl;
} CuvidContext;
typedef struct CuvidParsedFrame
static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
{
+ CuvidContext *ctx = avctx->priv_data;
const char *err_name;
const char *err_string;
if (err == CUDA_SUCCESS)
return 0;
- cuGetErrorName(err, &err_name);
- cuGetErrorString(err, &err_string);
+ ctx->cudl->cuGetErrorName(err, &err_name);
+ ctx->cudl->cuGetErrorString(err, &err_string);
av_log(avctx, AV_LOG_ERROR, "%s failed", func);
if (err_name && err_string)
CuvidContext *ctx = avctx->priv_data;
AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
CUVIDDECODECREATEINFO cuinfo;
+ int surface_fmt;
+
+ enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
+ AV_PIX_FMT_NONE, // Will be updated below
+ AV_PIX_FMT_NONE };
av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
ctx->internal_error = 0;
+ switch (format->bit_depth_luma_minus8) {
+ case 0: // 8-bit
+ pix_fmts[1] = AV_PIX_FMT_NV12;
+ break;
+ case 2: // 10-bit
+ pix_fmts[1] = AV_PIX_FMT_P010;
+ break;
+ case 4: // 12-bit
+ pix_fmts[1] = AV_PIX_FMT_P016;
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
+ format->bit_depth_luma_minus8 + 8);
+ ctx->internal_error = AVERROR(EINVAL);
+ return 0;
+ }
+ surface_fmt = ff_get_format(avctx, pix_fmts);
+ if (surface_fmt < 0) {
+ av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
+ ctx->internal_error = AVERROR(EINVAL);
+ return 0;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
+ av_get_pix_fmt_name(avctx->pix_fmt),
+ av_get_pix_fmt_name(surface_fmt),
+ av_get_pix_fmt_name(avctx->sw_pix_fmt));
+
+ avctx->pix_fmt = surface_fmt;
+
avctx->width = format->display_area.right;
avctx->height = format->display_area.bottom;
if (ctx->cudecoder) {
av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
- ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder));
+ ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
if (ctx->internal_error < 0)
return 0;
ctx->cudecoder = NULL;
hwframe_ctx->width < avctx->width ||
hwframe_ctx->height < avctx->height ||
hwframe_ctx->format != AV_PIX_FMT_CUDA ||
- hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
+ hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
ctx->internal_error = AVERROR(EINVAL);
return 0;
cuinfo.CodecType = ctx->codec_type = format->codec;
cuinfo.ChromaFormat = format->chroma_format;
- cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+
+ switch (avctx->sw_pix_fmt) {
+ case AV_PIX_FMT_NV12:
+ cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+ break;
+ case AV_PIX_FMT_P010:
+ case AV_PIX_FMT_P016:
+ cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
+ ctx->internal_error = AVERROR(EINVAL);
+ return 0;
+ }
cuinfo.ulWidth = avctx->coded_width;
cuinfo.ulHeight = avctx->coded_height;
cuinfo.target_rect.right = cuinfo.ulWidth;
cuinfo.target_rect.bottom = cuinfo.ulHeight;
- cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
+ cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
cuinfo.ulNumOutputSurfaces = 1;
cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
- ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
+ ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
if (ctx->internal_error < 0)
return 0;
if (!hwframe_ctx->pool) {
hwframe_ctx->format = AV_PIX_FMT_CUDA;
- hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
+ hwframe_ctx->sw_format = avctx->sw_pix_fmt;
hwframe_ctx->width = avctx->width;
hwframe_ctx->height = avctx->height;
av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
- ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
+ ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
if (ctx->internal_error < 0)
return 0;
{
AVCodecContext *avctx = opaque;
CuvidContext *ctx = avctx->priv_data;
- CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 };
+ CuvidParsedFrame parsed_frame = { { 0 } };
+ parsed_frame.dispinfo = *dispinfo;
ctx->internal_error = 0;
if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
if (is_flush && avpkt && avpkt->size)
return AVERROR_EOF;
- if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
+ if ((av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces && avpkt && avpkt->size)
return AVERROR(EAGAIN);
if (ctx->bsf && avpkt && avpkt->size) {
avpkt = &filtered_packet;
}
- ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+ ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
if (ret < 0) {
av_packet_unref(&filtered_packet);
return ret;
ctx->decoder_flushing = 1;
}
- ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
+ ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
av_packet_unref(&filtered_packet);
}
error:
- eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+ eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
if (eret < 0)
return eret;
return ret;
}
- ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+ ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
if (ret < 0)
return ret;
params.second_field = parsed_frame.second_field;
params.top_field_first = parsed_frame.dispinfo.top_field_first;
- ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms));
+ ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms));
if (ret < 0)
goto error;
.Height = avctx->height >> (i ? 1 : 0),
};
- ret = CHECK_CU(cuMemcpy2D(&cpy));
+ ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
if (ret < 0)
goto error;
offset += avctx->coded_height;
}
- } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
+ } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
+ avctx->pix_fmt == AV_PIX_FMT_P010 ||
+ avctx->pix_fmt == AV_PIX_FMT_P016) {
AVFrame *tmp_frame = av_frame_alloc();
if (!tmp_frame) {
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
av_frame_free(&tmp_frame);
goto error;
}
-
av_frame_free(&tmp_frame);
} else {
ret = AVERROR_BUG;
error:
if (mapped_frame)
- eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
+ eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
- eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+ eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
if (eret < 0)
return eret;
av_bsf_free(&ctx->bsf);
if (ctx->cuparser)
- cuvidDestroyVideoParser(ctx->cuparser);
+ ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
if (ctx->cudecoder)
- cuvidDestroyDecoder(ctx->cudecoder);
+ ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
+
+ ctx->cudl = NULL;
av_buffer_unref(&ctx->hwframe);
av_buffer_unref(&ctx->hwdevice);
+ cuvid_free_functions(&ctx->cvdl);
+
return 0;
}
-static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
+static int cuvid_test_dummy_decoder(AVCodecContext *avctx,
+ const CUVIDPARSERPARAMS *cuparseinfo,
+ int probed_width,
+ int probed_height)
{
+ CuvidContext *ctx = avctx->priv_data;
CUVIDDECODECREATEINFO cuinfo;
CUvideodecoder cudec = 0;
int ret = 0;
cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
- cuinfo.ulWidth = 1280;
- cuinfo.ulHeight = 720;
+ cuinfo.ulWidth = probed_width;
+ cuinfo.ulHeight = probed_height;
cuinfo.ulTargetWidth = cuinfo.ulWidth;
cuinfo.ulTargetHeight = cuinfo.ulHeight;
cuinfo.target_rect.right = cuinfo.ulWidth;
cuinfo.target_rect.bottom = cuinfo.ulHeight;
- cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
+ cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
cuinfo.ulNumOutputSurfaces = 1;
cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
cuinfo.bitDepthMinus8 = 0;
cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
- ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
+ ret = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&cudec, &cuinfo));
if (ret < 0)
return ret;
- ret = CHECK_CU(cuvidDestroyDecoder(cudec));
+ ret = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(cudec));
if (ret < 0)
return ret;
AV_PIX_FMT_NV12,
AV_PIX_FMT_NONE };
+ int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
+ int probed_height = avctx->coded_height ? avctx->coded_height : 720;
+
+ // Accelerated transcoding scenarios with 'ffmpeg' require that the
+ // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
+ // pix_fmt for non-accelerated transcoding, do not need to be correct
+ // but need to be set to something. We arbitrarily pick NV12.
ret = ff_get_format(avctx, pix_fmts);
if (ret < 0) {
av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
return ret;
}
+ avctx->pix_fmt = ret;
+
+ ret = cuvid_load_functions(&ctx->cvdl);
+ if (ret < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
+ goto error;
+ }
- ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
+ ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
if (!ctx->frame_queue) {
ret = AVERROR(ENOMEM);
goto error;
}
- avctx->pix_fmt = ret;
-
if (avctx->hw_frames_ctx) {
ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
if (!ctx->hwframe) {
device_ctx = hwframe_ctx->device_ctx;
device_hwctx = device_ctx->hwctx;
+
cuda_ctx = device_hwctx->cuda_ctx;
+ ctx->cudl = device_hwctx->internal->cuda_dl;
memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
}
- ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
+ ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
ctx->cuparseinfo.ulMaxDisplayDelay = 4;
ctx->cuparseinfo.pUserData = avctx;
ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
- ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+ ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
if (ret < 0)
goto error;
- ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo);
+ ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo,
+ probed_width,
+ probed_height);
if (ret < 0)
goto error;
- ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
+ ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
if (ret < 0)
goto error;
seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
if (seq_pkt.payload && seq_pkt.payload_size) {
- ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+ ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
if (ret < 0)
goto error;
}
- ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+ ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
if (ret < 0)
goto error;
CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
int ret;
- ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+ ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
if (ret < 0)
goto error;
av_fifo_freep(&ctx->frame_queue);
- ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
+ ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
if (!ctx->frame_queue) {
av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
return;
}
if (ctx->cudecoder) {
- cuvidDestroyDecoder(ctx->cudecoder);
+ ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
ctx->cudecoder = NULL;
}
if (ctx->cuparser) {
- cuvidDestroyVideoParser(ctx->cuparser);
+ ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
ctx->cuparser = NULL;
}
- ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
+ ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
if (ret < 0)
goto error;
seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
if (seq_pkt.payload && seq_pkt.payload_size) {
- ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+ ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
if (ret < 0)
goto error;
}
- ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+ ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
if (ret < 0)
goto error;
{ "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
{ "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
{ "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
+ { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
{ NULL }
};
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
AV_PIX_FMT_NV12, \
+ AV_PIX_FMT_P010, \
+ AV_PIX_FMT_P016, \
AV_PIX_FMT_NONE }, \
};