X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fcuviddec.c;h=bce584c9c60bb6cdb2c1f48ffd8980ef36e07120;hb=c5324d92c5f206dcdc2cf93ae237eaa7c1ad0a40;hp=4d3caf924ed3aea1951c810a15ca22d36d510421;hpb=ef71ef5f30ddf1cd61e46628a04608892caf76d2;p=ffmpeg

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 4d3caf924ed..bce584c9c60 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -25,6 +25,7 @@
 #include "libavutil/mathematics.h"
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
 #include "libavutil/fifo.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
@@ -32,9 +33,15 @@
 
 #include "avcodec.h"
 #include "decode.h"
-#include "hwaccel.h"
+#include "hwconfig.h"
+#include "nvdec.h"
 #include "internal.h"
 
+#if !NVDECAPI_CHECK_VERSION(9, 0)
+#define cudaVideoSurfaceFormat_YUV444 2
+#define cudaVideoSurfaceFormat_YUV444_16Bit 3
+#endif
+
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -63,13 +70,12 @@ typedef struct CuvidContext
     AVBufferRef *hwdevice;
     AVBufferRef *hwframe;
 
-    AVBSFContext *bsf;
-
     AVFifoBuffer *frame_queue;
 
     int deint_mode;
     int deint_mode_current;
     int64_t prev_pts;
+    int progressive_sequence;
 
     int internal_error;
     int decoder_flushing;
@@ -95,29 +101,7 @@ typedef struct CuvidParsedFrame
     int is_deinterlacing;
 } CuvidParsedFrame;
 
-static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
-{
-    CuvidContext *ctx = avctx->priv_data;
-    const char *err_name;
-    const char *err_string;
-
-    av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
-
-    if (err == CUDA_SUCCESS)
-        return 0;
-
-    ctx->cudl->cuGetErrorName(err, &err_name);
-    ctx->cudl->cuGetErrorString(err, &err_string);
-
-    av_log(avctx, AV_LOG_ERROR, "%s failed", func);
-    if (err_name && err_string)
-        av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
-    av_log(avctx, AV_LOG_ERROR, "\n");
-
-    return AVERROR_EXTERNAL;
-}
-
-#define CHECK_CU(x) check_cu(avctx, (x), #x)
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 
 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 {
@@ -127,6 +111,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     CUVIDDECODECAPS *caps = NULL;
     CUVIDDECODECREATEINFO cuinfo;
     int surface_fmt;
+    int chroma_444;
 
     int old_width = avctx->width;
     int old_height = avctx->height;
@@ -169,17 +154,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     cuinfo.target_rect.right = cuinfo.ulTargetWidth;
     cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
 
+    chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
+
     switch (format->bit_depth_luma_minus8) {
     case 0: // 8-bit
-        pix_fmts[1] = AV_PIX_FMT_NV12;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
         caps = &ctx->caps8;
         break;
     case 2: // 10-bit
-        pix_fmts[1] = AV_PIX_FMT_P010;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
         caps = &ctx->caps10;
         break;
     case 4: // 12-bit
-        pix_fmts[1] = AV_PIX_FMT_P016;
+        pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
         caps = &ctx->caps12;
         break;
     default:
@@ -228,6 +215,8 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
                               ? cudaVideoDeinterlaceMode_Weave
                               : ctx->deint_mode;
 
+    ctx->progressive_sequence = format->progressive_sequence;
+
     if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
         avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
     else
@@ -282,12 +271,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
         return 0;
     }
 
-    if (format->chroma_format != cudaVideoChromaFormat_420) {
-        av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
-        ctx->internal_error = AVERROR(EINVAL);
-        return 0;
-    }
-
     ctx->chroma_format = format->chroma_format;
 
     cuinfo.CodecType = ctx->codec_type = format->codec;
@@ -301,8 +284,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     case AV_PIX_FMT_P016:
         cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
         break;
+    case AV_PIX_FMT_YUV444P:
+        cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
+        break;
+    case AV_PIX_FMT_YUV444P16:
+        cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
+        break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
+        av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
+               av_get_pix_fmt_name(avctx->sw_pix_fmt));
         ctx->internal_error = AVERROR(EINVAL);
         return 0;
     }
@@ -360,6 +350,9 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
     parsed_frame.dispinfo = *dispinfo;
     ctx->internal_error = 0;
 
+    // For some reason, dispinfo->progressive_frame is sometimes wrong.
+    parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
+
     if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
     } else {
@@ -378,7 +371,11 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx)
 {
     CuvidContext *ctx = avctx->priv_data;
 
-    return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces;
+    int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
+    if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
+        delay *= 2;
+
+    return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
 }
 
 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
@@ -388,8 +385,6 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
     CUVIDSOURCEDATAPACKET cupkt;
-    AVPacket filter_packet = { 0 };
-    AVPacket filtered_packet = { 0 };
     int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
 
     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
@@ -400,29 +395,8 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
     if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size)
         return AVERROR(EAGAIN);
 
-    if (ctx->bsf && avpkt && avpkt->size) {
-        if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
-            return ret;
-        }
-
-        if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
-            av_packet_unref(&filter_packet);
-            return ret;
-        }
-
-        if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
-            return ret;
-        }
-
-        avpkt = &filtered_packet;
-    }
-
     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
     if (ret < 0) {
-        av_packet_unref(&filtered_packet);
         return ret;
     }
 
@@ -446,8 +420,6 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 
     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 
-    av_packet_unref(&filtered_packet);
-
     if (ret < 0)
         goto error;
 
@@ -507,6 +479,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
         return ret;
 
     if (av_fifo_size(ctx->frame_queue)) {
+        const AVPixFmtDescriptor *pixdesc;
         CuvidParsedFrame parsed_frame;
         CUVIDPROCPARAMS params;
         unsigned int pitch = 0;
@@ -537,7 +510,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                 goto error;
             }
 
-            for (i = 0; i < 2; i++) {
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+            for (i = 0; i < pixdesc->nb_components; i++) {
+                int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
                 CUDA_MEMCPY2D cpy = {
                     .srcMemoryType = CU_MEMORYTYPE_DEVICE,
                     .dstMemoryType = CU_MEMORYTYPE_DEVICE,
@@ -547,22 +523,21 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                     .dstPitch      = frame->linesize[i],
                     .srcY          = offset,
                     .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
-                    .Height        = avctx->height >> (i ? 1 : 0),
+                    .Height        = height,
                 };
 
                 ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
                 if (ret < 0)
                     goto error;
 
-                offset += avctx->height;
+                offset += height;
             }
-
-            ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
-            if (ret < 0)
-                goto error;
-        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
-                   avctx->pix_fmt == AV_PIX_FMT_P010 ||
-                   avctx->pix_fmt == AV_PIX_FMT_P016) {
+        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12      ||
+                   avctx->pix_fmt == AV_PIX_FMT_P010      ||
+                   avctx->pix_fmt == AV_PIX_FMT_P016      ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P   ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
+            unsigned int offset = 0;
             AVFrame *tmp_frame = av_frame_alloc();
             if (!tmp_frame) {
                 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
@@ -570,15 +545,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                 goto error;
             }
 
+            pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
             tmp_frame->format        = AV_PIX_FMT_CUDA;
             tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
-            tmp_frame->data[0]       = (uint8_t*)mapped_frame;
-            tmp_frame->linesize[0]   = pitch;
-            tmp_frame->data[1]       = (uint8_t*)(mapped_frame + avctx->height * pitch);
-            tmp_frame->linesize[1]   = pitch;
             tmp_frame->width         = avctx->width;
             tmp_frame->height        = avctx->height;
 
+            /*
+             * Note that the following logic would not work for three plane
+             * YUV420 because the pitch value is different for the chroma
+             * planes.
+             */
+            for (i = 0; i < pixdesc->nb_components; i++) {
+                tmp_frame->data[i]     = (uint8_t*)mapped_frame + offset;
+                tmp_frame->linesize[i] = pitch;
+                offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
+            }
+
             ret = ff_get_buffer(avctx, frame, 0);
             if (ret < 0) {
                 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
@@ -688,9 +672,6 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx)
 
     av_fifo_freep(&ctx->frame_queue);
 
-    if (ctx->bsf)
-        av_bsf_free(&ctx->bsf);
-
     if (ctx->cuparser)
         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
 
@@ -794,6 +775,12 @@ static int cuvid_test_capabilities(AVCodecContext *avctx,
         return AVERROR(EINVAL);
     }
 
+    if ((probed_width * probed_height) / 256 > caps->nMaxMBCount) {
+        av_log(avctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
+               (int)(probed_width * probed_height) / 256, caps->nMaxMBCount);
+        return AVERROR(EINVAL);
+    }
+
     return 0;
 }
 
@@ -806,7 +793,6 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     CUVIDSOURCEDATAPACKET seq_pkt;
     CUcontext cuda_ctx = NULL;
     CUcontext dummy;
-    const AVBitStreamFilter *bsf;
     int ret = 0;
 
     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
@@ -959,28 +945,12 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
         return AVERROR_BUG;
     }
 
-    if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
-        if (avctx->codec->id == AV_CODEC_ID_H264)
-            bsf = av_bsf_get_by_name("h264_mp4toannexb");
-        else
-            bsf = av_bsf_get_by_name("hevc_mp4toannexb");
-
-        if (!bsf) {
-            ret = AVERROR_BSF_NOT_FOUND;
-            goto error;
-        }
-        if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
-            goto error;
-        }
-        if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
-            av_bsf_free(&ctx->bsf);
-            goto error;
-        }
-
-        ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
+    if (avctx->codec->bsfs) {
+        const AVCodecParameters *par = avctx->internal->bsf->par_out;
+        ctx->cuparse_ext.format.seqhdr_data_length = par->extradata_size;
         memcpy(ctx->cuparse_ext.raw_seqhdr_data,
-               ctx->bsf->par_out->extradata,
-               FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
+               par->extradata,
+               FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), par->extradata_size));
     } else if (avctx->extradata_size > 0) {
         ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
         memcpy(ctx->cuparse_ext.raw_seqhdr_data,
@@ -1125,7 +1095,7 @@ static const AVCodecHWConfigInternal *cuvid_hw_configs[] = {
     NULL
 };
 
-#define DEFINE_CUVID_CODEC(x, X) \
+#define DEFINE_CUVID_CODEC(x, X, bsf_name) \
     static const AVClass x##_cuvid_class = { \
         .class_name = #x "_cuvid", \
         .item_name = av_default_item_name, \
@@ -1144,6 +1114,7 @@ static const AVCodecHWConfigInternal *cuvid_hw_configs[] = {
         .decode         = cuvid_decode_frame, \
         .receive_frame  = cuvid_output_frame, \
         .flush          = cuvid_flush, \
+        .bsfs           = bsf_name, \
         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
                                                         AV_PIX_FMT_NV12, \
@@ -1155,37 +1126,37 @@ static const AVCodecHWConfigInternal *cuvid_hw_configs[] = {
     };
 
 #if CONFIG_HEVC_CUVID_DECODER
-DEFINE_CUVID_CODEC(hevc, HEVC)
+DEFINE_CUVID_CODEC(hevc, HEVC, "hevc_mp4toannexb")
 #endif
 
 #if CONFIG_H264_CUVID_DECODER
-DEFINE_CUVID_CODEC(h264, H264)
+DEFINE_CUVID_CODEC(h264, H264, "h264_mp4toannexb")
 #endif
 
 #if CONFIG_MJPEG_CUVID_DECODER
-DEFINE_CUVID_CODEC(mjpeg, MJPEG)
+DEFINE_CUVID_CODEC(mjpeg, MJPEG, NULL)
 #endif
 
 #if CONFIG_MPEG1_CUVID_DECODER
-DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
+DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO, NULL)
 #endif
 
 #if CONFIG_MPEG2_CUVID_DECODER
-DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
+DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO, NULL)
 #endif
 
 #if CONFIG_MPEG4_CUVID_DECODER
-DEFINE_CUVID_CODEC(mpeg4, MPEG4)
+DEFINE_CUVID_CODEC(mpeg4, MPEG4, NULL)
 #endif
 
 #if CONFIG_VP8_CUVID_DECODER
-DEFINE_CUVID_CODEC(vp8, VP8)
+DEFINE_CUVID_CODEC(vp8, VP8, NULL)
 #endif
 
 #if CONFIG_VP9_CUVID_DECODER
-DEFINE_CUVID_CODEC(vp9, VP9)
+DEFINE_CUVID_CODEC(vp9, VP9, NULL)
 #endif
 
 #if CONFIG_VC1_CUVID_DECODER
-DEFINE_CUVID_CODEC(vc1, VC1)
+DEFINE_CUVID_CODEC(vc1, VC1, NULL)
 #endif