X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fcuvid.c;h=9b35476f2a51cb30b0a4a0c68a79c0fc07e6c066;hb=78d16eb45217f7ce811d1b05afe56427dd40021b;hp=eafce0ac19074cdc67275477b551592d629840e4;hpb=5a447edd475a7ae672f385ca0c76a5fc5a56ac1f;p=ffmpeg diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c index eafce0ac190..9b35476f2a5 100644 --- a/libavcodec/cuvid.c +++ b/libavcodec/cuvid.c @@ -19,21 +19,20 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "compat/cuda/dynlink_loader.h" + #include "libavutil/buffer.h" #include "libavutil/mathematics.h" #include "libavutil/hwcontext.h" -#include "libavutil/hwcontext_cuda.h" +#include "libavutil/hwcontext_cuda_internal.h" #include "libavutil/fifo.h" #include "libavutil/log.h" #include "libavutil/opt.h" +#include "libavutil/pixdesc.h" #include "avcodec.h" #include "internal.h" -#include "compat/cuda/nvcuvid.h" - -#define MAX_FRAME_COUNT 25 - typedef struct CuvidContext { AVClass *avclass; @@ -42,6 +41,7 @@ typedef struct CuvidContext CUvideoparser cuparser; char *cu_gpu; + int nb_surfaces; AVBufferRef *hwdevice; AVBufferRef *hwframe; @@ -61,6 +61,9 @@ typedef struct CuvidContext CUVIDPARSERPARAMS cuparseinfo; CUVIDEOFORMATEX cuparse_ext; + + CudaFunctions *cudl; + CuvidFunctions *cvdl; } CuvidContext; typedef struct CuvidParsedFrame @@ -72,6 +75,7 @@ typedef struct CuvidParsedFrame static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) { + CuvidContext *ctx = avctx->priv_data; const char *err_name; const char *err_string; @@ -80,8 +84,8 @@ static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) if (err == CUDA_SUCCESS) return 0; - cuGetErrorName(err, &err_name); - cuGetErrorString(err, &err_string); + ctx->cudl->cuGetErrorName(err, &err_name); + ctx->cudl->cuGetErrorString(err, &err_string); av_log(avctx, AV_LOG_ERROR, "%s failed", func); if (err_name && err_string) @@ -99,11 +103,46 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form CuvidContext *ctx = avctx->priv_data; AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data; CUVIDDECODECREATEINFO cuinfo; + int surface_fmt; + + enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA, + AV_PIX_FMT_NONE, // Will be updated below + AV_PIX_FMT_NONE }; av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence); ctx->internal_error = 0; + switch (format->bit_depth_luma_minus8) { + case 0: // 8-bit + pix_fmts[1] = AV_PIX_FMT_NV12; + break; + case 2: // 10-bit + pix_fmts[1] = AV_PIX_FMT_P010; + break; + case 4: // 12-bit + pix_fmts[1] = AV_PIX_FMT_P016; + break; + default: + av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n", + format->bit_depth_luma_minus8 + 8); + ctx->internal_error = AVERROR(EINVAL); + return 0; + } + surface_fmt = ff_get_format(avctx, pix_fmts); + if (surface_fmt < 0) { + av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt); + ctx->internal_error = AVERROR(EINVAL); + return 0; + } + + av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n", + av_get_pix_fmt_name(avctx->pix_fmt), + av_get_pix_fmt_name(surface_fmt), + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + + avctx->pix_fmt = surface_fmt; + avctx->width = format->display_area.right; avctx->height = format->display_area.bottom; @@ -142,7 +181,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form if (ctx->cudecoder) { av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n"); - ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder)); + ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder)); if (ctx->internal_error < 0) return 0; ctx->cudecoder = NULL; @@ -152,7 +191,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form hwframe_ctx->width < avctx->width || hwframe_ctx->height < avctx->height || hwframe_ctx->format != AV_PIX_FMT_CUDA || - hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) { + hwframe_ctx->sw_format != avctx->sw_pix_fmt)) { av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n"); ctx->internal_error = AVERROR(EINVAL); return 0; @@ -173,7 +212,20 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.CodecType = ctx->codec_type = format->codec; cuinfo.ChromaFormat = format->chroma_format; - cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; + + switch (avctx->sw_pix_fmt) { + case AV_PIX_FMT_NV12: + cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; + break; + case AV_PIX_FMT_P010: + case AV_PIX_FMT_P016: + cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n"); + ctx->internal_error = AVERROR(EINVAL); + return 0; + } cuinfo.ulWidth = avctx->coded_width; cuinfo.ulHeight = avctx->coded_height; @@ -185,7 +237,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.target_rect.right = cuinfo.ulWidth; cuinfo.target_rect.bottom = cuinfo.ulHeight; - cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT; + cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8; @@ -199,13 +251,13 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1}); - ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); + ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo)); if (ctx->internal_error < 0) return 0; if (!hwframe_ctx->pool) { hwframe_ctx->format = AV_PIX_FMT_CUDA; - hwframe_ctx->sw_format = AV_PIX_FMT_NV12; + hwframe_ctx->sw_format = avctx->sw_pix_fmt; hwframe_ctx->width = avctx->width; hwframe_ctx->height = avctx->height; @@ -225,7 +277,7 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n"); - ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams)); + ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams)); if (ctx->internal_error < 0) return 0; @@ -236,8 +288,9 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF { AVCodecContext *avctx = opaque; CuvidContext *ctx = avctx->priv_data; - CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 }; + CuvidParsedFrame parsed_frame = { { 0 } }; + parsed_frame.dispinfo = *dispinfo; ctx->internal_error = 0; if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) { @@ -268,7 +321,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) if (is_flush && avpkt && avpkt->size) return AVERROR_EOF; - if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size) + if ((av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces && avpkt && avpkt->size) return AVERROR(EAGAIN); if (ctx->bsf && avpkt && avpkt->size) { @@ -291,7 +344,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) avpkt = &filtered_packet; } - ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) { av_packet_unref(&filtered_packet); return ret; @@ -315,7 +368,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) ctx->decoder_flushing = 1; } - ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt)); + ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt)); av_packet_unref(&filtered_packet); @@ -330,7 +383,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) } error: - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); + eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; @@ -359,7 +412,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) return ret; } - ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; @@ -377,7 +430,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) params.second_field = parsed_frame.second_field; params.top_field_first = parsed_frame.dispinfo.top_field_first; - ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); + ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); if (ret < 0) goto error; @@ -407,13 +460,15 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) .Height = avctx->height >> (i ? 1 : 0), }; - ret = CHECK_CU(cuMemcpy2D(&cpy)); + ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy)); if (ret < 0) goto error; offset += avctx->coded_height; } - } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || + avctx->pix_fmt == AV_PIX_FMT_P010 || + avctx->pix_fmt == AV_PIX_FMT_P016) { AVFrame *tmp_frame = av_frame_alloc(); if (!tmp_frame) { av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); @@ -443,7 +498,6 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) av_frame_free(&tmp_frame); goto error; } - av_frame_free(&tmp_frame); } else { ret = AVERROR_BUG; @@ -492,9 +546,9 @@ FF_ENABLE_DEPRECATION_WARNINGS error: if (mapped_frame) - eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame)); + eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame)); - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); + eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; @@ -543,19 +597,27 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx) av_bsf_free(&ctx->bsf); if (ctx->cuparser) - cuvidDestroyVideoParser(ctx->cuparser); + ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser); if (ctx->cudecoder) - cuvidDestroyDecoder(ctx->cudecoder); + ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder); + + ctx->cudl = NULL; av_buffer_unref(&ctx->hwframe); av_buffer_unref(&ctx->hwdevice); + cuvid_free_functions(&ctx->cvdl); + return 0; } -static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo) +static int cuvid_test_dummy_decoder(AVCodecContext *avctx, + const CUVIDPARSERPARAMS *cuparseinfo, + int probed_width, + int probed_height) { + CuvidContext *ctx = avctx->priv_data; CUVIDDECODECREATEINFO cuinfo; CUvideodecoder cudec = 0; int ret = 0; @@ -566,8 +628,8 @@ static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cu cuinfo.ChromaFormat = cudaVideoChromaFormat_420; cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12; - cuinfo.ulWidth = 1280; - cuinfo.ulHeight = 720; + cuinfo.ulWidth = probed_width; + cuinfo.ulHeight = probed_height; cuinfo.ulTargetWidth = cuinfo.ulWidth; cuinfo.ulTargetHeight = cuinfo.ulHeight; @@ -576,18 +638,18 @@ static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cu cuinfo.target_rect.right = cuinfo.ulWidth; cuinfo.target_rect.bottom = cuinfo.ulHeight; - cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT; + cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces; cuinfo.ulNumOutputSurfaces = 1; cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; cuinfo.bitDepthMinus8 = 0; cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; - ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo)); + ret = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&cudec, &cuinfo)); if (ret < 0) return ret; - ret = CHECK_CU(cuvidDestroyDecoder(cudec)); + ret = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(cudec)); if (ret < 0) return ret; @@ -610,20 +672,32 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) AV_PIX_FMT_NV12, AV_PIX_FMT_NONE }; + int probed_width = avctx->coded_width ? avctx->coded_width : 1280; + int probed_height = avctx->coded_height ? avctx->coded_height : 720; + + // Accelerated transcoding scenarios with 'ffmpeg' require that the + // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the + // pix_fmt for non-accelerated transcoding, do not need to be correct + // but need to be set to something. We arbitrarily pick NV12. ret = ff_get_format(avctx, pix_fmts); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret); return ret; } + avctx->pix_fmt = ret; + + ret = cuvid_load_functions(&ctx->cvdl); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n"); + goto error; + } - ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame)); + ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame)); if (!ctx->frame_queue) { ret = AVERROR(ENOMEM); goto error; } - avctx->pix_fmt = ret; - if (avctx->hw_frames_ctx) { ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx); if (!ctx->hwframe) { @@ -655,7 +729,9 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) device_ctx = hwframe_ctx->device_ctx; device_hwctx = device_ctx->hwctx; + cuda_ctx = device_hwctx->cuda_ctx; + ctx->cudl = device_hwctx->internal->cuda_dl; memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo)); memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext)); @@ -743,22 +819,24 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size)); } - ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT; + ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces; ctx->cuparseinfo.ulMaxDisplayDelay = 4; ctx->cuparseinfo.pUserData = avctx; ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence; ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode; ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display; - ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) goto error; - ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo); + ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo, + probed_width, + probed_height); if (ret < 0) goto error; - ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo)); + ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo)); if (ret < 0) goto error; @@ -766,12 +844,12 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length; if (seq_pkt.payload && seq_pkt.payload_size) { - ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt)); + ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt)); if (ret < 0) goto error; } - ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy)); if (ret < 0) goto error; @@ -796,29 +874,29 @@ static void cuvid_flush(AVCodecContext *avctx) CUVIDSOURCEDATAPACKET seq_pkt = { 0 }; int ret; - ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) goto error; av_fifo_freep(&ctx->frame_queue); - ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame)); + ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame)); if (!ctx->frame_queue) { av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n"); return; } if (ctx->cudecoder) { - cuvidDestroyDecoder(ctx->cudecoder); + ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder); ctx->cudecoder = NULL; } if (ctx->cuparser) { - cuvidDestroyVideoParser(ctx->cuparser); + ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser); ctx->cuparser = NULL; } - ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo)); + ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo)); if (ret < 0) goto error; @@ -826,12 +904,12 @@ static void cuvid_flush(AVCodecContext *avctx) seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length; if (seq_pkt.payload && seq_pkt.payload_size) { - ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt)); + ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt)); if (ret < 0) goto error; } - ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy)); if (ret < 0) goto error; @@ -851,6 +929,7 @@ static const AVOption options[] = { { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" }, { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" }, { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD }, + { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD }, { NULL } }; @@ -883,6 +962,8 @@ static const AVOption options[] = { .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \ AV_PIX_FMT_NV12, \ + AV_PIX_FMT_P010, \ + AV_PIX_FMT_P016, \ AV_PIX_FMT_NONE }, \ };