3 * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/buffer.h"
23 #include "libavutil/mathematics.h"
24 #include "libavutil/hwcontext.h"
25 #include "libavutil/hwcontext_cuda.h"
26 #include "libavutil/fifo.h"
27 #include "libavutil/log.h"
34 #define MAX_FRAME_COUNT 20
36 typedef struct CuvidContext
38 CUvideodecoder cudecoder;
39 CUvideoparser cuparser;
41 AVBufferRef *hwdevice;
46 AVFifoBuffer *frame_queue;
50 cudaVideoCodec codec_type;
51 cudaVideoChromaFormat chroma_format;
54 static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
57 const char *err_string;
59 av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
61 if (err == CUDA_SUCCESS)
64 cuGetErrorName(err, &err_name);
65 cuGetErrorString(err, &err_string);
67 av_log(avctx, AV_LOG_ERROR, "%s failed", func);
68 if (err_name && err_string)
69 av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
70 av_log(avctx, AV_LOG_ERROR, "\n");
72 return AVERROR_EXTERNAL;
75 #define CHECK_CU(x) check_cu(avctx, (x), #x)
77 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
79 AVCodecContext *avctx = opaque;
80 CuvidContext *ctx = avctx->priv_data;
81 AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
82 CUVIDDECODECREATEINFO cuinfo;
84 av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback\n");
86 ctx->internal_error = 0;
88 avctx->width = format->display_area.right;
89 avctx->height = format->display_area.bottom;
91 ff_set_sar(avctx, av_div_q(
92 (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
93 (AVRational){ avctx->width, avctx->height }));
95 if (!format->progressive_sequence)
96 avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
98 avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
100 if (format->video_signal_description.video_full_range_flag)
101 avctx->color_range = AVCOL_RANGE_JPEG;
103 avctx->color_range = AVCOL_RANGE_MPEG;
105 avctx->color_primaries = format->video_signal_description.color_primaries;
106 avctx->color_trc = format->video_signal_description.transfer_characteristics;
107 avctx->colorspace = format->video_signal_description.matrix_coefficients;
110 avctx->bit_rate = format->bitrate;
112 if (format->frame_rate.numerator && format->frame_rate.denominator) {
113 avctx->framerate.num = format->frame_rate.numerator;
114 avctx->framerate.den = format->frame_rate.denominator;
118 && avctx->coded_width == format->coded_width
119 && avctx->coded_height == format->coded_height
120 && ctx->chroma_format == format->chroma_format
121 && ctx->codec_type == format->codec)
124 if (ctx->cudecoder) {
125 av_log(avctx, AV_LOG_ERROR, "re-initializing decoder is not supported\n");
126 ctx->internal_error = AVERROR(EINVAL);
130 if (hwframe_ctx->pool) {
131 av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized\n");
132 ctx->internal_error = AVERROR(EINVAL);
136 avctx->coded_width = format->coded_width;
137 avctx->coded_height = format->coded_height;
139 ctx->chroma_format = format->chroma_format;
141 memset(&cuinfo, 0, sizeof(cuinfo));
143 cuinfo.CodecType = ctx->codec_type = format->codec;
144 cuinfo.ChromaFormat = format->chroma_format;
145 cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
147 cuinfo.ulWidth = avctx->coded_width;
148 cuinfo.ulHeight = avctx->coded_height;
149 cuinfo.ulTargetWidth = cuinfo.ulWidth;
150 cuinfo.ulTargetHeight = cuinfo.ulHeight;
152 cuinfo.target_rect.left = 0;
153 cuinfo.target_rect.top = 0;
154 cuinfo.target_rect.right = cuinfo.ulWidth;
155 cuinfo.target_rect.bottom = cuinfo.ulHeight;
157 cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
158 cuinfo.ulNumOutputSurfaces = 1;
159 cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
161 cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
163 ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
164 if (ctx->internal_error < 0)
167 hwframe_ctx->format = AV_PIX_FMT_CUDA;
168 hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
169 hwframe_ctx->width = FFALIGN(avctx->coded_width, 32);
170 hwframe_ctx->height = FFALIGN(avctx->coded_height, 32);
172 if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
173 av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
180 static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
182 AVCodecContext *avctx = opaque;
183 CuvidContext *ctx = avctx->priv_data;
185 av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
187 ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
188 if (ctx->internal_error < 0)
194 static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
196 AVCodecContext *avctx = opaque;
197 CuvidContext *ctx = avctx->priv_data;
199 av_log(avctx, AV_LOG_TRACE, "pfnDisplayPicture\n");
201 ctx->internal_error = 0;
203 av_fifo_generic_write(ctx->frame_queue, dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL);
208 static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
210 CuvidContext *ctx = avctx->priv_data;
211 AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
212 AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
213 CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
214 AVFrame *frame = data;
215 CUVIDSOURCEDATAPACKET cupkt;
216 AVPacket filter_packet = { 0 };
217 AVPacket filtered_packet = { 0 };
218 CUdeviceptr mapped_frame = 0;
219 int ret = 0, eret = 0;
221 if (ctx->bsf && avpkt->size) {
222 if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
223 av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
227 if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
228 av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
229 av_packet_unref(&filter_packet);
233 if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
234 av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
238 avpkt = &filtered_packet;
241 ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
243 av_packet_unref(&filtered_packet);
247 memset(&cupkt, 0, sizeof(cupkt));
250 cupkt.payload_size = avpkt->size;
251 cupkt.payload = avpkt->data;
253 if (avpkt->pts != AV_NOPTS_VALUE) {
254 cupkt.flags = CUVID_PKT_TIMESTAMP;
255 cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->time_base, (AVRational){1, 10000000});
258 cupkt.flags = CUVID_PKT_ENDOFSTREAM;
261 ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
263 av_packet_unref(&filtered_packet);
266 if (ctx->internal_error)
267 ret = ctx->internal_error;
271 if (av_fifo_size(ctx->frame_queue)) {
272 CUVIDPARSERDISPINFO dispinfo;
273 CUVIDPROCPARAMS params;
274 unsigned int pitch = 0;
278 av_fifo_generic_read(ctx->frame_queue, &dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL);
280 memset(¶ms, 0, sizeof(params));
281 params.progressive_frame = dispinfo.progressive_frame;
282 params.second_field = 0;
283 params.top_field_first = dispinfo.top_field_first;
285 ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, dispinfo.picture_index, &mapped_frame, &pitch, ¶ms));
289 if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
290 ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
292 av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
296 ret = ff_decode_frame_props(avctx, frame);
298 av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
302 for (i = 0; i < 2; i++) {
303 CUDA_MEMCPY2D cpy = {
304 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
305 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
306 .srcDevice = mapped_frame,
307 .dstDevice = (CUdeviceptr)frame->data[i],
309 .dstPitch = frame->linesize[i],
311 .WidthInBytes = FFMIN(pitch, frame->linesize[i]),
312 .Height = avctx->coded_height >> (i ? 1 : 0),
315 ret = CHECK_CU(cuMemcpy2D(&cpy));
319 offset += avctx->coded_height;
321 } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
322 AVFrame *tmp_frame = av_frame_alloc();
324 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
325 ret = AVERROR(ENOMEM);
329 tmp_frame->format = AV_PIX_FMT_CUDA;
330 tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
331 tmp_frame->data[0] = (uint8_t*)mapped_frame;
332 tmp_frame->linesize[0] = pitch;
333 tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->coded_height * pitch);
334 tmp_frame->linesize[1] = pitch;
335 tmp_frame->width = avctx->width;
336 tmp_frame->height = avctx->height;
338 ret = ff_get_buffer(avctx, frame, 0);
340 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
341 av_frame_free(&tmp_frame);
345 ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
347 av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
348 av_frame_free(&tmp_frame);
352 av_frame_free(&tmp_frame);
358 frame->width = avctx->width;
359 frame->height = avctx->height;
360 frame->pts = av_rescale_q(dispinfo.timestamp, (AVRational){1, 10000000}, avctx->time_base);
362 /* CUVIDs opaque reordering breaks the internal pkt logic.
363 * So set pkt_pts and clear all the other pkt_ fields.
365 frame->pkt_pts = frame->pts;
366 av_frame_set_pkt_pos(frame, -1);
367 av_frame_set_pkt_duration(frame, 0);
368 av_frame_set_pkt_size(frame, -1);
370 frame->interlaced_frame = !dispinfo.progressive_frame;
372 if (!dispinfo.progressive_frame)
373 frame->top_field_first = dispinfo.top_field_first;
382 eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
384 eret = CHECK_CU(cuCtxPopCurrent(&dummy));
392 static av_cold int cuvid_decode_end(AVCodecContext *avctx)
394 CuvidContext *ctx = avctx->priv_data;
396 av_fifo_freep(&ctx->frame_queue);
399 av_bsf_free(&ctx->bsf);
402 cuvidDestroyVideoParser(ctx->cuparser);
405 cuvidDestroyDecoder(ctx->cudecoder);
407 av_buffer_unref(&ctx->hwframe);
408 av_buffer_unref(&ctx->hwdevice);
413 static void cuvid_ctx_free(AVHWDeviceContext *ctx)
415 AVCUDADeviceContext *hwctx = ctx->hwctx;
416 cuCtxDestroy(hwctx->cuda_ctx);
419 static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
421 CUVIDDECODECREATEINFO cuinfo;
422 CUvideodecoder cudec = 0;
425 memset(&cuinfo, 0, sizeof(cuinfo));
427 cuinfo.CodecType = cuparseinfo->CodecType;
428 cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
429 cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
431 cuinfo.ulWidth = 1280;
432 cuinfo.ulHeight = 720;
433 cuinfo.ulTargetWidth = cuinfo.ulWidth;
434 cuinfo.ulTargetHeight = cuinfo.ulHeight;
436 cuinfo.target_rect.left = 0;
437 cuinfo.target_rect.top = 0;
438 cuinfo.target_rect.right = cuinfo.ulWidth;
439 cuinfo.target_rect.bottom = cuinfo.ulHeight;
441 cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
442 cuinfo.ulNumOutputSurfaces = 1;
443 cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
445 cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
447 ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
451 ret = CHECK_CU(cuvidDestroyDecoder(cudec));
458 static av_cold int cuvid_decode_init(AVCodecContext *avctx)
460 CuvidContext *ctx = avctx->priv_data;
461 AVCUDADeviceContext *device_hwctx;
462 AVHWDeviceContext *device_ctx;
463 AVHWFramesContext *hwframe_ctx;
464 CUVIDPARSERPARAMS cuparseinfo;
465 CUVIDEOFORMATEX cuparse_ext;
466 CUVIDSOURCEDATAPACKET seq_pkt;
468 CUcontext cuda_ctx = NULL;
470 const AVBitStreamFilter *bsf;
473 enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
477 ret = ff_get_format(avctx, pix_fmts);
479 av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
483 ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CUVIDPARSERDISPINFO));
484 if (!ctx->frame_queue) {
485 ret = AVERROR(ENOMEM);
489 avctx->pix_fmt = ret;
491 if (avctx->hw_frames_ctx) {
492 ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
494 ret = AVERROR(ENOMEM);
498 hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
500 ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
501 if (!ctx->hwdevice) {
502 ret = AVERROR(ENOMEM);
506 device_ctx = hwframe_ctx->device_ctx;
507 device_hwctx = device_ctx->hwctx;
508 cuda_ctx = device_hwctx->cuda_ctx;
510 ctx->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
511 if (!ctx->hwdevice) {
512 av_log(avctx, AV_LOG_ERROR, "Error allocating hwdevice\n");
513 ret = AVERROR(ENOMEM);
517 ret = CHECK_CU(cuInit(0));
521 ret = CHECK_CU(cuDeviceGet(&device, 0));
525 ret = CHECK_CU(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, device));
529 device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
530 device_ctx->free = cuvid_ctx_free;
532 device_hwctx = device_ctx->hwctx;
533 device_hwctx->cuda_ctx = cuda_ctx;
535 ret = CHECK_CU(cuCtxPopCurrent(&dummy));
539 ret = av_hwdevice_ctx_init(ctx->hwdevice);
541 av_log(avctx, AV_LOG_ERROR, "av_hwdevice_ctx_init failed\n");
545 ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
547 av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
548 ret = AVERROR(ENOMEM);
553 memset(&cuparseinfo, 0, sizeof(cuparseinfo));
554 memset(&cuparse_ext, 0, sizeof(cuparse_ext));
555 memset(&seq_pkt, 0, sizeof(seq_pkt));
557 cuparseinfo.pExtVideoInfo = &cuparse_ext;
559 switch (avctx->codec->id) {
560 #if CONFIG_H264_CUVID_DECODER
561 case AV_CODEC_ID_H264:
562 cuparseinfo.CodecType = cudaVideoCodec_H264;
565 #if CONFIG_HEVC_CUVID_DECODER
566 case AV_CODEC_ID_HEVC:
567 cuparseinfo.CodecType = cudaVideoCodec_HEVC;
570 #if CONFIG_VP8_CUVID_DECODER
571 case AV_CODEC_ID_VP8:
572 cuparseinfo.CodecType = cudaVideoCodec_VP8;
575 #if CONFIG_VP9_CUVID_DECODER
576 case AV_CODEC_ID_VP9:
577 cuparseinfo.CodecType = cudaVideoCodec_VP9;
580 #if CONFIG_VC1_CUVID_DECODER
581 case AV_CODEC_ID_VC1:
582 cuparseinfo.CodecType = cudaVideoCodec_VC1;
586 av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
590 if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
591 if (avctx->codec->id == AV_CODEC_ID_H264)
592 bsf = av_bsf_get_by_name("h264_mp4toannexb");
594 bsf = av_bsf_get_by_name("hevc_mp4toannexb");
597 ret = AVERROR_BSF_NOT_FOUND;
600 if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
603 if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
604 av_bsf_free(&ctx->bsf);
608 cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
609 memcpy(cuparse_ext.raw_seqhdr_data,
610 ctx->bsf->par_out->extradata,
611 FFMIN(sizeof(cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
612 } else if (avctx->extradata_size > 0) {
613 cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
614 memcpy(cuparse_ext.raw_seqhdr_data,
616 FFMIN(sizeof(cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
619 cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
620 cuparseinfo.ulMaxDisplayDelay = 4;
621 cuparseinfo.pUserData = avctx;
622 cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
623 cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
624 cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
626 ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
630 ret = cuvid_test_dummy_decoder(avctx, &cuparseinfo);
634 ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &cuparseinfo));
638 seq_pkt.payload = cuparse_ext.raw_seqhdr_data;
639 seq_pkt.payload_size = cuparse_ext.format.seqhdr_data_length;
641 if (seq_pkt.payload && seq_pkt.payload_size) {
642 ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
647 ret = CHECK_CU(cuCtxPopCurrent(&dummy));
654 cuvid_decode_end(avctx);
658 #define DEFINE_CUVID_CODEC(x, X) \
659 AVHWAccel ff_##x##_cuvid_hwaccel = { \
660 .name = #x "_cuvid", \
661 .type = AVMEDIA_TYPE_VIDEO, \
662 .id = AV_CODEC_ID_##X, \
663 .pix_fmt = AV_PIX_FMT_CUDA, \
665 AVCodec ff_##x##_cuvid_decoder = { \
666 .name = #x "_cuvid", \
667 .long_name = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
668 .type = AVMEDIA_TYPE_VIDEO, \
669 .id = AV_CODEC_ID_##X, \
670 .priv_data_size = sizeof(CuvidContext), \
671 .init = cuvid_decode_init, \
672 .close = cuvid_decode_end, \
673 .decode = cuvid_decode_frame, \
674 .capabilities = AV_CODEC_CAP_DELAY, \
675 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
680 #if CONFIG_HEVC_CUVID_DECODER
681 DEFINE_CUVID_CODEC(hevc, HEVC)
684 #if CONFIG_H264_CUVID_DECODER
685 DEFINE_CUVID_CODEC(h264, H264)
688 #if CONFIG_VP8_CUVID_DECODER
689 DEFINE_CUVID_CODEC(vp8, VP8)
692 #if CONFIG_VP9_CUVID_DECODER
693 DEFINE_CUVID_CODEC(vp9, VP9)
696 #if CONFIG_VC1_CUVID_DECODER
697 DEFINE_CUVID_CODEC(vc1, VC1)