X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fcuviddec.c;h=bce584c9c60bb6cdb2c1f48ffd8980ef36e07120;hb=c5324d92c5f206dcdc2cf93ae237eaa7c1ad0a40;hp=4d3caf924ed3aea1951c810a15ca22d36d510421;hpb=ef71ef5f30ddf1cd61e46628a04608892caf76d2;p=ffmpeg diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 4d3caf924ed..bce584c9c60 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -25,6 +25,7 @@ #include "libavutil/mathematics.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/fifo.h" #include "libavutil/log.h" #include "libavutil/opt.h" @@ -32,9 +33,15 @@ #include "avcodec.h" #include "decode.h" -#include "hwaccel.h" +#include "hwconfig.h" +#include "nvdec.h" #include "internal.h" +#if !NVDECAPI_CHECK_VERSION(9, 0) +#define cudaVideoSurfaceFormat_YUV444 2 +#define cudaVideoSurfaceFormat_YUV444_16Bit 3 +#endif + typedef struct CuvidContext { AVClass *avclass; @@ -63,13 +70,12 @@ typedef struct CuvidContext AVBufferRef *hwdevice; AVBufferRef *hwframe; - AVBSFContext *bsf; - AVFifoBuffer *frame_queue; int deint_mode; int deint_mode_current; int64_t prev_pts; + int progressive_sequence; int internal_error; int decoder_flushing; @@ -95,29 +101,7 @@ typedef struct CuvidParsedFrame int is_deinterlacing; } CuvidParsedFrame; -static int check_cu(AVCodecContext *avctx, CUresult err, const char *func) -{ - CuvidContext *ctx = avctx->priv_data; - const char *err_name; - const char *err_string; - - av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); - - if (err == CUDA_SUCCESS) - return 0; - - ctx->cudl->cuGetErrorName(err, &err_name); - ctx->cudl->cuGetErrorString(err, &err_string); - - av_log(avctx, AV_LOG_ERROR, "%s failed", func); - if (err_name && err_string) - av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); - av_log(avctx, AV_LOG_ERROR, "\n"); - - return AVERROR_EXTERNAL; -} - -#define CHECK_CU(x) check_cu(avctx, (x), #x) +#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x) static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format) { @@ -127,6 +111,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form CUVIDDECODECAPS *caps = NULL; CUVIDDECODECREATEINFO cuinfo; int surface_fmt; + int chroma_444; int old_width = avctx->width; int old_height = avctx->height; @@ -169,17 +154,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.target_rect.right = cuinfo.ulTargetWidth; cuinfo.target_rect.bottom = cuinfo.ulTargetHeight; + chroma_444 = format->chroma_format == cudaVideoChromaFormat_444; + switch (format->bit_depth_luma_minus8) { case 0: // 8-bit - pix_fmts[1] = AV_PIX_FMT_NV12; + pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12; caps = &ctx->caps8; break; case 2: // 10-bit - pix_fmts[1] = AV_PIX_FMT_P010; + pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010; caps = &ctx->caps10; break; case 4: // 12-bit - pix_fmts[1] = AV_PIX_FMT_P016; + pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016; caps = &ctx->caps12; break; default: @@ -228,6 +215,8 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form ? cudaVideoDeinterlaceMode_Weave : ctx->deint_mode; + ctx->progressive_sequence = format->progressive_sequence; + if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT; else @@ -282,12 +271,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form return 0; } - if (format->chroma_format != cudaVideoChromaFormat_420) { - av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); - ctx->internal_error = AVERROR(EINVAL); - return 0; - } - ctx->chroma_format = format->chroma_format; cuinfo.CodecType = ctx->codec_type = format->codec; @@ -301,8 +284,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form case AV_PIX_FMT_P016: cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; break; + case AV_PIX_FMT_YUV444P: + cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444; + break; + case AV_PIX_FMT_YUV444P16: + cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit; + break; default: - av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n"); + av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); ctx->internal_error = AVERROR(EINVAL); return 0; } @@ -360,6 +350,9 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF parsed_frame.dispinfo = *dispinfo; ctx->internal_error = 0; + // For some reason, dispinfo->progressive_frame is sometimes wrong. + parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence; + if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) { av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); } else { @@ -378,7 +371,11 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx) { CuvidContext *ctx = avctx->priv_data; - return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces; + int delay = ctx->cuparseinfo.ulMaxDisplayDelay; + if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field) + delay *= 2; + + return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces; } static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) @@ -388,8 +385,6 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUVIDSOURCEDATAPACKET cupkt; - AVPacket filter_packet = { 0 }; - AVPacket filtered_packet = { 0 }; int ret = 0, eret = 0, is_flush = ctx->decoder_flushing; av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n"); @@ -400,29 +395,8 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size) return AVERROR(EAGAIN); - if (ctx->bsf && avpkt && avpkt->size) { - if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) { - av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n"); - return ret; - } - - if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) { - av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n"); - av_packet_unref(&filter_packet); - return ret; - } - - if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) { - av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n"); - return ret; - } - - avpkt = &filtered_packet; - } - ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) { - av_packet_unref(&filtered_packet); return ret; } @@ -446,8 +420,6 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt) ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt)); - av_packet_unref(&filtered_packet); - if (ret < 0) goto error; @@ -507,6 +479,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) return ret; if (av_fifo_size(ctx->frame_queue)) { + const AVPixFmtDescriptor *pixdesc; CuvidParsedFrame parsed_frame; CUVIDPROCPARAMS params; unsigned int pitch = 0; @@ -537,7 +510,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) goto error; } - for (i = 0; i < 2; i++) { + pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + + for (i = 0; i < pixdesc->nb_components; i++) { + int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0); CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, @@ -547,22 +523,21 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), - .Height = avctx->height >> (i ? 1 : 0), + .Height = height, }; ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream)); if (ret < 0) goto error; - offset += avctx->height; + offset += height; } - - ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream)); - if (ret < 0) - goto error; - } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || - avctx->pix_fmt == AV_PIX_FMT_P010 || - avctx->pix_fmt == AV_PIX_FMT_P016) { + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || + avctx->pix_fmt == AV_PIX_FMT_P010 || + avctx->pix_fmt == AV_PIX_FMT_P016 || + avctx->pix_fmt == AV_PIX_FMT_YUV444P || + avctx->pix_fmt == AV_PIX_FMT_YUV444P16) { + unsigned int offset = 0; AVFrame *tmp_frame = av_frame_alloc(); if (!tmp_frame) { av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); @@ -570,15 +545,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) goto error; } + pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + tmp_frame->format = AV_PIX_FMT_CUDA; tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe); - tmp_frame->data[0] = (uint8_t*)mapped_frame; - tmp_frame->linesize[0] = pitch; - tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->height * pitch); - tmp_frame->linesize[1] = pitch; tmp_frame->width = avctx->width; tmp_frame->height = avctx->height; + /* + * Note that the following logic would not work for three plane + * YUV420 because the pitch value is different for the chroma + * planes. + */ + for (i = 0; i < pixdesc->nb_components; i++) { + tmp_frame->data[i] = (uint8_t*)mapped_frame + offset; + tmp_frame->linesize[i] = pitch; + offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0)); + } + ret = ff_get_buffer(avctx, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n"); @@ -688,9 +672,6 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx) av_fifo_freep(&ctx->frame_queue); - if (ctx->bsf) - av_bsf_free(&ctx->bsf); - if (ctx->cuparser) ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser); @@ -794,6 +775,12 @@ static int cuvid_test_capabilities(AVCodecContext *avctx, return AVERROR(EINVAL); } + if ((probed_width * probed_height) / 256 > caps->nMaxMBCount) { + av_log(avctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n", + (int)(probed_width * probed_height) / 256, caps->nMaxMBCount); + return AVERROR(EINVAL); + } + return 0; } @@ -806,7 +793,6 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) CUVIDSOURCEDATAPACKET seq_pkt; CUcontext cuda_ctx = NULL; CUcontext dummy; - const AVBitStreamFilter *bsf; int ret = 0; enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA, @@ -959,28 +945,12 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx) return AVERROR_BUG; } - if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) { - if (avctx->codec->id == AV_CODEC_ID_H264) - bsf = av_bsf_get_by_name("h264_mp4toannexb"); - else - bsf = av_bsf_get_by_name("hevc_mp4toannexb"); - - if (!bsf) { - ret = AVERROR_BSF_NOT_FOUND; - goto error; - } - if (ret = av_bsf_alloc(bsf, &ctx->bsf)) { - goto error; - } - if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) { - av_bsf_free(&ctx->bsf); - goto error; - } - - ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size; + if (avctx->codec->bsfs) { + const AVCodecParameters *par = avctx->internal->bsf->par_out; + ctx->cuparse_ext.format.seqhdr_data_length = par->extradata_size; memcpy(ctx->cuparse_ext.raw_seqhdr_data, - ctx->bsf->par_out->extradata, - FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size)); + par->extradata, + FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), par->extradata_size)); } else if (avctx->extradata_size > 0) { ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size; memcpy(ctx->cuparse_ext.raw_seqhdr_data, @@ -1125,7 +1095,7 @@ static const AVCodecHWConfigInternal *cuvid_hw_configs[] = { NULL }; -#define DEFINE_CUVID_CODEC(x, X) \ +#define DEFINE_CUVID_CODEC(x, X, bsf_name) \ static const AVClass x##_cuvid_class = { \ .class_name = #x "_cuvid", \ .item_name = av_default_item_name, \ @@ -1144,6 +1114,7 @@ static const AVCodecHWConfigInternal *cuvid_hw_configs[] = { .decode = cuvid_decode_frame, \ .receive_frame = cuvid_output_frame, \ .flush = cuvid_flush, \ + .bsfs = bsf_name, \ .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \ .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \ AV_PIX_FMT_NV12, \ @@ -1155,37 +1126,37 @@ static const AVCodecHWConfigInternal *cuvid_hw_configs[] = { }; #if CONFIG_HEVC_CUVID_DECODER -DEFINE_CUVID_CODEC(hevc, HEVC) +DEFINE_CUVID_CODEC(hevc, HEVC, "hevc_mp4toannexb") #endif #if CONFIG_H264_CUVID_DECODER -DEFINE_CUVID_CODEC(h264, H264) +DEFINE_CUVID_CODEC(h264, H264, "h264_mp4toannexb") #endif #if CONFIG_MJPEG_CUVID_DECODER -DEFINE_CUVID_CODEC(mjpeg, MJPEG) +DEFINE_CUVID_CODEC(mjpeg, MJPEG, NULL) #endif #if CONFIG_MPEG1_CUVID_DECODER -DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO) +DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO, NULL) #endif #if CONFIG_MPEG2_CUVID_DECODER -DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO) +DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO, NULL) #endif #if CONFIG_MPEG4_CUVID_DECODER -DEFINE_CUVID_CODEC(mpeg4, MPEG4) +DEFINE_CUVID_CODEC(mpeg4, MPEG4, NULL) #endif #if CONFIG_VP8_CUVID_DECODER -DEFINE_CUVID_CODEC(vp8, VP8) +DEFINE_CUVID_CODEC(vp8, VP8, NULL) #endif #if CONFIG_VP9_CUVID_DECODER -DEFINE_CUVID_CODEC(vp9, VP9) +DEFINE_CUVID_CODEC(vp9, VP9, NULL) #endif #if CONFIG_VC1_CUVID_DECODER -DEFINE_CUVID_CODEC(vc1, VC1) +DEFINE_CUVID_CODEC(vc1, VC1, NULL) #endif