#include "libavutil/mathematics.h"
#include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda_internal.h"
+#include "libavutil/cuda_check.h"
#include "libavutil/fifo.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "avcodec.h"
#include "decode.h"
-#include "hwaccel.h"
+#include "hwconfig.h"
+#include "nvdec.h"
#include "internal.h"
+#if !NVDECAPI_CHECK_VERSION(9, 0)
+#define cudaVideoSurfaceFormat_YUV444 2
+#define cudaVideoSurfaceFormat_YUV444_16Bit 3
+#endif
+
typedef struct CuvidContext
{
AVClass *avclass;
AVBufferRef *hwdevice;
AVBufferRef *hwframe;
- AVBSFContext *bsf;
-
AVFifoBuffer *frame_queue;
int deint_mode;
int deint_mode_current;
int64_t prev_pts;
+ int progressive_sequence;
int internal_error;
int decoder_flushing;
int is_deinterlacing;
} CuvidParsedFrame;
-static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
-{
- CuvidContext *ctx = avctx->priv_data;
- const char *err_name;
- const char *err_string;
-
- av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
-
- if (err == CUDA_SUCCESS)
- return 0;
-
- ctx->cudl->cuGetErrorName(err, &err_name);
- ctx->cudl->cuGetErrorString(err, &err_string);
-
- av_log(avctx, AV_LOG_ERROR, "%s failed", func);
- if (err_name && err_string)
- av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
- av_log(avctx, AV_LOG_ERROR, "\n");
-
- return AVERROR_EXTERNAL;
-}
-
-#define CHECK_CU(x) check_cu(avctx, (x), #x)
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
{
CUVIDDECODECAPS *caps = NULL;
CUVIDDECODECREATEINFO cuinfo;
int surface_fmt;
+ int chroma_444;
int old_width = avctx->width;
int old_height = avctx->height;
cuinfo.target_rect.right = cuinfo.ulTargetWidth;
cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
+ chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
+
switch (format->bit_depth_luma_minus8) {
case 0: // 8-bit
- pix_fmts[1] = AV_PIX_FMT_NV12;
+ pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
caps = &ctx->caps8;
break;
case 2: // 10-bit
- pix_fmts[1] = AV_PIX_FMT_P010;
+ pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
caps = &ctx->caps10;
break;
case 4: // 12-bit
- pix_fmts[1] = AV_PIX_FMT_P016;
+ pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
caps = &ctx->caps12;
break;
default:
? cudaVideoDeinterlaceMode_Weave
: ctx->deint_mode;
+ ctx->progressive_sequence = format->progressive_sequence;
+
if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
else
return 0;
}
- if (format->chroma_format != cudaVideoChromaFormat_420) {
- av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
- ctx->internal_error = AVERROR(EINVAL);
- return 0;
- }
-
ctx->chroma_format = format->chroma_format;
cuinfo.CodecType = ctx->codec_type = format->codec;
case AV_PIX_FMT_P016:
cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
break;
+ case AV_PIX_FMT_YUV444P:
+ cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
+ break;
+ case AV_PIX_FMT_YUV444P16:
+ cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
+ break;
default:
- av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
+ av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
+ av_get_pix_fmt_name(avctx->sw_pix_fmt));
ctx->internal_error = AVERROR(EINVAL);
return 0;
}
parsed_frame.dispinfo = *dispinfo;
ctx->internal_error = 0;
+ // For some reason, dispinfo->progressive_frame is sometimes wrong.
+ parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
+
if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
} else {
{
CuvidContext *ctx = avctx->priv_data;
- return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces;
+ int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
+ if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
+ delay *= 2;
+
+ return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
}
static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
CUVIDSOURCEDATAPACKET cupkt;
- AVPacket filter_packet = { 0 };
- AVPacket filtered_packet = { 0 };
int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size)
return AVERROR(EAGAIN);
- if (ctx->bsf && avpkt && avpkt->size) {
- if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
- av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
- return ret;
- }
-
- if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
- av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
- av_packet_unref(&filter_packet);
- return ret;
- }
-
- if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
- av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
- return ret;
- }
-
- avpkt = &filtered_packet;
- }
-
ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
if (ret < 0) {
- av_packet_unref(&filtered_packet);
return ret;
}
ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
- av_packet_unref(&filtered_packet);
-
if (ret < 0)
goto error;
return ret;
if (av_fifo_size(ctx->frame_queue)) {
+ const AVPixFmtDescriptor *pixdesc;
CuvidParsedFrame parsed_frame;
CUVIDPROCPARAMS params;
unsigned int pitch = 0;
goto error;
}
- for (i = 0; i < 2; i++) {
+ pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+ for (i = 0; i < pixdesc->nb_components; i++) {
+ int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
.dstPitch = frame->linesize[i],
.srcY = offset,
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
- .Height = avctx->height >> (i ? 1 : 0),
+ .Height = height,
};
ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
if (ret < 0)
goto error;
- offset += avctx->height;
+ offset += height;
}
-
- ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream));
- if (ret < 0)
- goto error;
- } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
- avctx->pix_fmt == AV_PIX_FMT_P010 ||
- avctx->pix_fmt == AV_PIX_FMT_P016) {
+ } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
+ avctx->pix_fmt == AV_PIX_FMT_P010 ||
+ avctx->pix_fmt == AV_PIX_FMT_P016 ||
+ avctx->pix_fmt == AV_PIX_FMT_YUV444P ||
+ avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
+ unsigned int offset = 0;
AVFrame *tmp_frame = av_frame_alloc();
if (!tmp_frame) {
av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
goto error;
}
+ pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
tmp_frame->format = AV_PIX_FMT_CUDA;
tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
- tmp_frame->data[0] = (uint8_t*)mapped_frame;
- tmp_frame->linesize[0] = pitch;
- tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->height * pitch);
- tmp_frame->linesize[1] = pitch;
tmp_frame->width = avctx->width;
tmp_frame->height = avctx->height;
+ /*
+ * Note that the following logic would not work for three plane
+ * YUV420 because the pitch value is different for the chroma
+ * planes.
+ */
+ for (i = 0; i < pixdesc->nb_components; i++) {
+ tmp_frame->data[i] = (uint8_t*)mapped_frame + offset;
+ tmp_frame->linesize[i] = pitch;
+ offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
+ }
+
ret = ff_get_buffer(avctx, frame, 0);
if (ret < 0) {
av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
av_fifo_freep(&ctx->frame_queue);
- if (ctx->bsf)
- av_bsf_free(&ctx->bsf);
-
if (ctx->cuparser)
ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
return AVERROR(EINVAL);
}
+ if ((probed_width * probed_height) / 256 > caps->nMaxMBCount) {
+ av_log(avctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
+ (int)(probed_width * probed_height) / 256, caps->nMaxMBCount);
+ return AVERROR(EINVAL);
+ }
+
return 0;
}
CUVIDSOURCEDATAPACKET seq_pkt;
CUcontext cuda_ctx = NULL;
CUcontext dummy;
- const AVBitStreamFilter *bsf;
int ret = 0;
enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
return AVERROR_BUG;
}
- if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
- if (avctx->codec->id == AV_CODEC_ID_H264)
- bsf = av_bsf_get_by_name("h264_mp4toannexb");
- else
- bsf = av_bsf_get_by_name("hevc_mp4toannexb");
-
- if (!bsf) {
- ret = AVERROR_BSF_NOT_FOUND;
- goto error;
- }
- if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
- goto error;
- }
- if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
- av_bsf_free(&ctx->bsf);
- goto error;
- }
-
- ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
+ if (avctx->codec->bsfs) {
+ const AVCodecParameters *par = avctx->internal->bsf->par_out;
+ ctx->cuparse_ext.format.seqhdr_data_length = par->extradata_size;
memcpy(ctx->cuparse_ext.raw_seqhdr_data,
- ctx->bsf->par_out->extradata,
- FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
+ par->extradata,
+ FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), par->extradata_size));
} else if (avctx->extradata_size > 0) {
ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
memcpy(ctx->cuparse_ext.raw_seqhdr_data,
NULL
};
-#define DEFINE_CUVID_CODEC(x, X) \
+#define DEFINE_CUVID_CODEC(x, X, bsf_name) \
static const AVClass x##_cuvid_class = { \
.class_name = #x "_cuvid", \
.item_name = av_default_item_name, \
.decode = cuvid_decode_frame, \
.receive_frame = cuvid_output_frame, \
.flush = cuvid_flush, \
+ .bsfs = bsf_name, \
.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
AV_PIX_FMT_NV12, \
};
#if CONFIG_HEVC_CUVID_DECODER
-DEFINE_CUVID_CODEC(hevc, HEVC)
+DEFINE_CUVID_CODEC(hevc, HEVC, "hevc_mp4toannexb")
#endif
#if CONFIG_H264_CUVID_DECODER
-DEFINE_CUVID_CODEC(h264, H264)
+DEFINE_CUVID_CODEC(h264, H264, "h264_mp4toannexb")
#endif
#if CONFIG_MJPEG_CUVID_DECODER
-DEFINE_CUVID_CODEC(mjpeg, MJPEG)
+DEFINE_CUVID_CODEC(mjpeg, MJPEG, NULL)
#endif
#if CONFIG_MPEG1_CUVID_DECODER
-DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
+DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO, NULL)
#endif
#if CONFIG_MPEG2_CUVID_DECODER
-DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
+DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO, NULL)
#endif
#if CONFIG_MPEG4_CUVID_DECODER
-DEFINE_CUVID_CODEC(mpeg4, MPEG4)
+DEFINE_CUVID_CODEC(mpeg4, MPEG4, NULL)
#endif
#if CONFIG_VP8_CUVID_DECODER
-DEFINE_CUVID_CODEC(vp8, VP8)
+DEFINE_CUVID_CODEC(vp8, VP8, NULL)
#endif
#if CONFIG_VP9_CUVID_DECODER
-DEFINE_CUVID_CODEC(vp9, VP9)
+DEFINE_CUVID_CODEC(vp9, VP9, NULL)
#endif
#if CONFIG_VC1_CUVID_DECODER
-DEFINE_CUVID_CODEC(vc1, VC1)
+DEFINE_CUVID_CODEC(vc1, VC1, NULL)
#endif