#include "libavutil/hwcontext_cuda.h"
#include "libavutil/fifo.h"
#include "libavutil/log.h"
+#include "libavutil/opt.h"
#include "avcodec.h"
#include "internal.h"
-#include <nvcuvid.h>
+#include "compat/cuda/nvcuvid.h"
-#define MAX_FRAME_COUNT 20
+#define MAX_FRAME_COUNT 25
typedef struct CuvidContext
{
+ AVClass *avclass;
+
CUvideodecoder cudecoder;
CUvideoparser cuparser;
+ char *cu_gpu;
+
AVBufferRef *hwdevice;
AVBufferRef *hwframe;
AVFifoBuffer *frame_queue;
+ int deint_mode;
+ int64_t prev_pts;
+
int internal_error;
- int ever_flushed;
+ int decoder_flushing;
cudaVideoCodec codec_type;
cudaVideoChromaFormat chroma_format;
CUVIDEOFORMATEX cuparse_ext;
} CuvidContext;
+typedef struct CuvidParsedFrame
+{
+ CUVIDPARSERDISPINFO dispinfo;
+ int second_field;
+ int is_deinterlacing;
+} CuvidParsedFrame;
+
static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
{
const char *err_name;
AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
CUVIDDECODECREATEINFO cuinfo;
- av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback\n");
+ av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
ctx->internal_error = 0;
(AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
(AVRational){ avctx->width, avctx->height }));
- if (!format->progressive_sequence)
+ if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
else
avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
return 0;
}
- if (hwframe_ctx->pool && !ctx->ever_flushed) {
- av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized\n");
+ if (hwframe_ctx->pool && (
+ hwframe_ctx->width < avctx->width ||
+ hwframe_ctx->height < avctx->height ||
+ hwframe_ctx->format != AV_PIX_FMT_CUDA ||
+ hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
+ av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
+ ctx->internal_error = AVERROR(EINVAL);
+ return 0;
+ }
+
+ if (format->chroma_format != cudaVideoChromaFormat_420) {
+ av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
ctx->internal_error = AVERROR(EINVAL);
return 0;
}
cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
cuinfo.ulNumOutputSurfaces = 1;
cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
+ cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
- cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
+ if (format->progressive_sequence) {
+ ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
+ } else {
+ cuinfo.DeinterlaceMode = ctx->deint_mode;
+ }
+
+ if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
+ avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
if (ctx->internal_error < 0)
if (!hwframe_ctx->pool) {
hwframe_ctx->format = AV_PIX_FMT_CUDA;
hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
- hwframe_ctx->width = FFALIGN(avctx->coded_width, 32);
- hwframe_ctx->height = FFALIGN(avctx->coded_height, 32);
+ hwframe_ctx->width = avctx->width;
+ hwframe_ctx->height = avctx->height;
if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
{
AVCodecContext *avctx = opaque;
CuvidContext *ctx = avctx->priv_data;
-
- av_log(avctx, AV_LOG_TRACE, "pfnDisplayPicture\n");
+ CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 };
ctx->internal_error = 0;
- av_fifo_generic_write(ctx->frame_queue, dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL);
+ if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
+ av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
+ } else {
+ parsed_frame.is_deinterlacing = 1;
+ av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
+ parsed_frame.second_field = 1;
+ av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
+ }
return 1;
}
-static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
+static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
{
CuvidContext *ctx = avctx->priv_data;
AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
- AVFrame *frame = data;
CUVIDSOURCEDATAPACKET cupkt;
AVPacket filter_packet = { 0 };
AVPacket filtered_packet = { 0 };
- CUdeviceptr mapped_frame = 0;
- int ret = 0, eret = 0;
+ int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
- if (ctx->bsf && avpkt->size) {
+ av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
+
+ if (is_flush && avpkt && avpkt->size)
+ return AVERROR_EOF;
+
+ if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
+ return AVERROR(EAGAIN);
+
+ if (ctx->bsf && avpkt && avpkt->size) {
if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
return ret;
memset(&cupkt, 0, sizeof(cupkt));
- if (avpkt->size) {
+ if (avpkt && avpkt->size) {
cupkt.payload_size = avpkt->size;
cupkt.payload = avpkt->data;
if (avpkt->pts != AV_NOPTS_VALUE) {
cupkt.flags = CUVID_PKT_TIMESTAMP;
- cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->time_base, (AVRational){1, 10000000});
+ if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
+ cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
+ else
+ cupkt.timestamp = avpkt->pts;
}
} else {
cupkt.flags = CUVID_PKT_ENDOFSTREAM;
+ ctx->decoder_flushing = 1;
}
ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
av_packet_unref(&filtered_packet);
- if (ret < 0) {
- if (ctx->internal_error)
- ret = ctx->internal_error;
+ if (ret < 0)
+ goto error;
+
+ // cuvidParseVideoData doesn't return an error just because stuff failed...
+ if (ctx->internal_error) {
+ av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
+ ret = ctx->internal_error;
goto error;
}
+error:
+ eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+
+ if (eret < 0)
+ return eret;
+ else if (ret < 0)
+ return ret;
+ else if (is_flush)
+ return AVERROR_EOF;
+ else
+ return 0;
+}
+
+static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+ CuvidContext *ctx = avctx->priv_data;
+ AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
+ AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
+ CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
+ CUdeviceptr mapped_frame = 0;
+ int ret = 0, eret = 0;
+
+ av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
+
+ if (ctx->decoder_flushing) {
+ ret = cuvid_decode_packet(avctx, NULL);
+ if (ret < 0 && ret != AVERROR_EOF)
+ return ret;
+ }
+
+ ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+ if (ret < 0)
+ return ret;
+
if (av_fifo_size(ctx->frame_queue)) {
- CUVIDPARSERDISPINFO dispinfo;
+ CuvidParsedFrame parsed_frame;
CUVIDPROCPARAMS params;
unsigned int pitch = 0;
int offset = 0;
int i;
- av_fifo_generic_read(ctx->frame_queue, &dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL);
+ av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
memset(¶ms, 0, sizeof(params));
- params.progressive_frame = dispinfo.progressive_frame;
- params.second_field = 0;
- params.top_field_first = dispinfo.top_field_first;
+ params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
+ params.second_field = parsed_frame.second_field;
+ params.top_field_first = parsed_frame.dispinfo.top_field_first;
- ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, dispinfo.picture_index, &mapped_frame, &pitch, ¶ms));
+ ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms));
if (ret < 0)
goto error;
.dstPitch = frame->linesize[i],
.srcY = offset,
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
- .Height = avctx->coded_height >> (i ? 1 : 0),
+ .Height = avctx->height >> (i ? 1 : 0),
};
ret = CHECK_CU(cuMemcpy2D(&cpy));
frame->width = avctx->width;
frame->height = avctx->height;
- frame->pts = av_rescale_q(dispinfo.timestamp, (AVRational){1, 10000000}, avctx->time_base);
+ if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
+ frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
+ else
+ frame->pts = parsed_frame.dispinfo.timestamp;
+
+ if (parsed_frame.second_field) {
+ if (ctx->prev_pts == INT64_MIN) {
+ ctx->prev_pts = frame->pts;
+ frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
+ } else {
+ int pts_diff = (frame->pts - ctx->prev_pts) / 2;
+ ctx->prev_pts = frame->pts;
+ frame->pts += pts_diff;
+ }
+ }
/* CUVIDs opaque reordering breaks the internal pkt logic.
* So set pkt_pts and clear all the other pkt_ fields.
*/
+#if FF_API_PKT_PTS
+FF_DISABLE_DEPRECATION_WARNINGS
frame->pkt_pts = frame->pts;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
av_frame_set_pkt_pos(frame, -1);
av_frame_set_pkt_duration(frame, 0);
av_frame_set_pkt_size(frame, -1);
- frame->interlaced_frame = !dispinfo.progressive_frame;
-
- if (!dispinfo.progressive_frame)
- frame->top_field_first = dispinfo.top_field_first;
+ frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
- *got_frame = 1;
+ if (frame->interlaced_frame)
+ frame->top_field_first = parsed_frame.dispinfo.top_field_first;
+ } else if (ctx->decoder_flushing) {
+ ret = AVERROR_EOF;
} else {
- *got_frame = 0;
+ ret = AVERROR(EAGAIN);
}
error:
return ret;
}
+static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
+{
+ CuvidContext *ctx = avctx->priv_data;
+ AVFrame *frame = data;
+ int ret = 0;
+
+ av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
+
+ if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
+ av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (!ctx->decoder_flushing) {
+ ret = cuvid_decode_packet(avctx, avpkt);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = cuvid_output_frame(avctx, frame);
+ if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
+ *got_frame = 0;
+ } else if (ret < 0) {
+ return ret;
+ } else {
+ *got_frame = 1;
+ }
+
+ return 0;
+}
+
static av_cold int cuvid_decode_end(AVCodecContext *avctx)
{
CuvidContext *ctx = avctx->priv_data;
return 0;
}
-static void cuvid_ctx_free(AVHWDeviceContext *ctx)
-{
- AVCUDADeviceContext *hwctx = ctx->hwctx;
- cuCtxDestroy(hwctx->cuda_ctx);
-}
-
static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
{
CUVIDDECODECREATEINFO cuinfo;
cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
cuinfo.ulNumOutputSurfaces = 1;
cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
+ cuinfo.bitDepthMinus8 = 0;
cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
AVHWDeviceContext *device_ctx;
AVHWFramesContext *hwframe_ctx;
CUVIDSOURCEDATAPACKET seq_pkt;
- CUdevice device;
CUcontext cuda_ctx = NULL;
CUcontext dummy;
const AVBitStreamFilter *bsf;
return ret;
}
- ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CUVIDPARSERDISPINFO));
+ ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
if (!ctx->frame_queue) {
ret = AVERROR(ENOMEM);
goto error;
ret = AVERROR(ENOMEM);
goto error;
}
-
- device_ctx = hwframe_ctx->device_ctx;
- device_hwctx = device_ctx->hwctx;
- cuda_ctx = device_hwctx->cuda_ctx;
} else {
- ctx->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
- if (!ctx->hwdevice) {
- av_log(avctx, AV_LOG_ERROR, "Error allocating hwdevice\n");
- ret = AVERROR(ENOMEM);
- goto error;
- }
-
- ret = CHECK_CU(cuInit(0));
+ ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
if (ret < 0)
goto error;
- ret = CHECK_CU(cuDeviceGet(&device, 0));
- if (ret < 0)
- goto error;
-
- ret = CHECK_CU(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device));
- if (ret < 0)
- goto error;
-
- device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
- device_ctx->free = cuvid_ctx_free;
-
- device_hwctx = device_ctx->hwctx;
- device_hwctx->cuda_ctx = cuda_ctx;
-
- ret = CHECK_CU(cuCtxPopCurrent(&dummy));
- if (ret < 0)
- goto error;
-
- ret = av_hwdevice_ctx_init(ctx->hwdevice);
- if (ret < 0) {
- av_log(avctx, AV_LOG_ERROR, "av_hwdevice_ctx_init failed\n");
- goto error;
- }
-
ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
if (!ctx->hwframe) {
av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
ret = AVERROR(ENOMEM);
goto error;
}
+
+ hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
}
+ device_ctx = hwframe_ctx->device_ctx;
+ device_hwctx = device_ctx->hwctx;
+ cuda_ctx = device_hwctx->cuda_ctx;
+
memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
memset(&seq_pkt, 0, sizeof(seq_pkt));
if (ret < 0)
goto error;
- ctx->ever_flushed = 0;
+ ctx->prev_pts = INT64_MIN;
+
+ if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
+ av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
return 0;
AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
+ CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
int ret;
- ctx->ever_flushed = 1;
-
ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
if (ret < 0)
goto error;
av_fifo_freep(&ctx->frame_queue);
- ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CUVIDPARSERDISPINFO));
+ ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
if (!ctx->frame_queue) {
av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
return;
if (ret < 0)
goto error;
+ seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
+ seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
+
+ if (seq_pkt.payload && seq_pkt.payload_size) {
+ ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+ if (ret < 0)
+ goto error;
+ }
+
ret = CHECK_CU(cuCtxPopCurrent(&dummy));
if (ret < 0)
goto error;
+ ctx->prev_pts = INT64_MIN;
+ ctx->decoder_flushing = 0;
+
return;
error:
av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
}
+#define OFFSET(x) offsetof(CuvidContext, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+ { "deint", "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
+ { "weave", "Weave deinterlacing (do nothing)", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0, VD, "deint" },
+ { "bob", "Bob deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0, VD, "deint" },
+ { "adaptive", "Adaptive deinterlacing", 0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
+ { "gpu", "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
+ { NULL }
+};
+
#define DEFINE_CUVID_CODEC(x, X) \
+ static const AVClass x##_cuvid_class = { \
+ .class_name = #x "_cuvid", \
+ .item_name = av_default_item_name, \
+ .option = options, \
+ .version = LIBAVUTIL_VERSION_INT, \
+ }; \
AVHWAccel ff_##x##_cuvid_hwaccel = { \
.name = #x "_cuvid", \
.type = AVMEDIA_TYPE_VIDEO, \
.type = AVMEDIA_TYPE_VIDEO, \
.id = AV_CODEC_ID_##X, \
.priv_data_size = sizeof(CuvidContext), \
+ .priv_class = &x##_cuvid_class, \
.init = cuvid_decode_init, \
.close = cuvid_decode_end, \
.decode = cuvid_decode_frame, \
+ .send_packet = cuvid_decode_packet, \
+ .receive_frame = cuvid_output_frame, \
.flush = cuvid_flush, \
- .capabilities = AV_CODEC_CAP_DELAY, \
+ .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
AV_PIX_FMT_NV12, \
AV_PIX_FMT_NONE }, \