git.sesse.net Git - ffmpeg/blob - libavcodec/cuviddec.c

   1 /*
   2  * Nvidia CUVID decoder
   3  * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "compat/cuda/dynlink_loader.h"
  23
  24 #include "libavutil/buffer.h"
  25 #include "libavutil/mathematics.h"
  26 #include "libavutil/hwcontext.h"
  27 #include "libavutil/hwcontext_cuda_internal.h"
  28 #include "libavutil/cuda_check.h"
  29 #include "libavutil/fifo.h"
  30 #include "libavutil/log.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33
  34 #include "avcodec.h"
  35 #include "decode.h"
  36 #include "hwconfig.h"
  37 #include "nvdec.h"
  38 #include "internal.h"
  39
  40 #if !NVDECAPI_CHECK_VERSION(9, 0)
  41 #define cudaVideoSurfaceFormat_YUV444 2
  42 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
  43 #endif
  44
  45 #if NVDECAPI_CHECK_VERSION(11, 0)
  46 #define CUVID_HAS_AV1_SUPPORT
  47 #endif
  48
  49 typedef struct CuvidContext
  50 {
  51     AVClass *avclass;
  52
  53     CUvideodecoder cudecoder;
  54     CUvideoparser cuparser;
  55
  56     char *cu_gpu;
  57     int nb_surfaces;
  58     int drop_second_field;
  59     char *crop_expr;
  60     char *resize_expr;
  61
  62     struct {
  63         int left;
  64         int top;
  65         int right;
  66         int bottom;
  67     } crop;
  68
  69     struct {
  70         int width;
  71         int height;
  72     } resize;
  73
  74     AVBufferRef *hwdevice;
  75     AVBufferRef *hwframe;
  76
  77     AVFifoBuffer *frame_queue;
  78
  79     int deint_mode;
  80     int deint_mode_current;
  81     int64_t prev_pts;
  82     int progressive_sequence;
  83
  84     int internal_error;
  85     int decoder_flushing;
  86
  87     int *key_frame;
  88
  89     cudaVideoCodec codec_type;
  90     cudaVideoChromaFormat chroma_format;
  91
  92     CUVIDDECODECAPS caps8, caps10, caps12;
  93
  94     CUVIDPARSERPARAMS cuparseinfo;
  95     CUVIDEOFORMATEX *cuparse_ext;
  96
  97     CudaFunctions *cudl;
  98     CuvidFunctions *cvdl;
  99 } CuvidContext;
 100
 101 typedef struct CuvidParsedFrame
 102 {
 103     CUVIDPARSERDISPINFO dispinfo;
 104     int second_field;
 105     int is_deinterlacing;
 106 } CuvidParsedFrame;
 107
 108 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 109
 110 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 111 {
 112     AVCodecContext *avctx = opaque;
 113     CuvidContext *ctx = avctx->priv_data;
 114     AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 115     CUVIDDECODECAPS *caps = NULL;
 116     CUVIDDECODECREATEINFO cuinfo;
 117     int surface_fmt;
 118     int chroma_444;
 119
 120     int old_width = avctx->width;
 121     int old_height = avctx->height;
 122
 123     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
 124                                        AV_PIX_FMT_NONE,  // Will be updated below
 125                                        AV_PIX_FMT_NONE };
 126
 127     av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
 128
 129     memset(&cuinfo, 0, sizeof(cuinfo));
 130
 131     ctx->internal_error = 0;
 132
 133     avctx->coded_width = cuinfo.ulWidth = format->coded_width;
 134     avctx->coded_height = cuinfo.ulHeight = format->coded_height;
 135
 136     // apply cropping
 137     cuinfo.display_area.left = format->display_area.left + ctx->crop.left;
 138     cuinfo.display_area.top = format->display_area.top + ctx->crop.top;
 139     cuinfo.display_area.right = format->display_area.right - ctx->crop.right;
 140     cuinfo.display_area.bottom = format->display_area.bottom - ctx->crop.bottom;
 141
 142     // width and height need to be set before calling ff_get_format
 143     if (ctx->resize_expr) {
 144         avctx->width = ctx->resize.width;
 145         avctx->height = ctx->resize.height;
 146     } else {
 147         avctx->width = cuinfo.display_area.right - cuinfo.display_area.left;
 148         avctx->height = cuinfo.display_area.bottom - cuinfo.display_area.top;
 149     }
 150
 151     // target width/height need to be multiples of two
 152     cuinfo.ulTargetWidth = avctx->width = (avctx->width + 1) & ~1;
 153     cuinfo.ulTargetHeight = avctx->height = (avctx->height + 1) & ~1;
 154
 155     // aspect ratio conversion, 1:1, depends on scaled resolution
 156     cuinfo.target_rect.left = 0;
 157     cuinfo.target_rect.top = 0;
 158     cuinfo.target_rect.right = cuinfo.ulTargetWidth;
 159     cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
 160
 161     chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
 162
 163     switch (format->bit_depth_luma_minus8) {
 164     case 0: // 8-bit
 165         pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
 166         caps = &ctx->caps8;
 167         break;
 168     case 2: // 10-bit
 169         pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
 170         caps = &ctx->caps10;
 171         break;
 172     case 4: // 12-bit
 173         pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
 174         caps = &ctx->caps12;
 175         break;
 176     default:
 177         break;
 178     }
 179
 180     if (!caps || !caps->bIsSupported) {
 181         av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
 182                format->bit_depth_luma_minus8 + 8);
 183         ctx->internal_error = AVERROR(EINVAL);
 184         return 0;
 185     }
 186
 187     surface_fmt = ff_get_format(avctx, pix_fmts);
 188     if (surface_fmt < 0) {
 189         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
 190         ctx->internal_error = AVERROR(EINVAL);
 191         return 0;
 192     }
 193
 194     av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
 195            av_get_pix_fmt_name(avctx->pix_fmt),
 196            av_get_pix_fmt_name(surface_fmt),
 197            av_get_pix_fmt_name(avctx->sw_pix_fmt));
 198
 199     avctx->pix_fmt = surface_fmt;
 200
 201     // Update our hwframe ctx, as the get_format callback might have refreshed it!
 202     if (avctx->hw_frames_ctx) {
 203         av_buffer_unref(&ctx->hwframe);
 204
 205         ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
 206         if (!ctx->hwframe) {
 207             ctx->internal_error = AVERROR(ENOMEM);
 208             return 0;
 209         }
 210
 211         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 212     }
 213
 214     ff_set_sar(avctx, av_div_q(
 215         (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
 216         (AVRational){ avctx->width, avctx->height }));
 217
 218     ctx->deint_mode_current = format->progressive_sequence
 219                               ? cudaVideoDeinterlaceMode_Weave
 220                               : ctx->deint_mode;
 221
 222     ctx->progressive_sequence = format->progressive_sequence;
 223
 224     if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
 225         avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
 226     else
 227         avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
 228
 229     if (format->video_signal_description.video_full_range_flag)
 230         avctx->color_range = AVCOL_RANGE_JPEG;
 231     else
 232         avctx->color_range = AVCOL_RANGE_MPEG;
 233
 234     avctx->color_primaries = format->video_signal_description.color_primaries;
 235     avctx->color_trc = format->video_signal_description.transfer_characteristics;
 236     avctx->colorspace = format->video_signal_description.matrix_coefficients;
 237
 238     if (format->bitrate)
 239         avctx->bit_rate = format->bitrate;
 240
 241     if (format->frame_rate.numerator && format->frame_rate.denominator) {
 242         avctx->framerate.num = format->frame_rate.numerator;
 243         avctx->framerate.den = format->frame_rate.denominator;
 244     }
 245
 246     if (ctx->cudecoder
 247             && avctx->coded_width == format->coded_width
 248             && avctx->coded_height == format->coded_height
 249             && avctx->width == old_width
 250             && avctx->height == old_height
 251             && ctx->chroma_format == format->chroma_format
 252             && ctx->codec_type == format->codec)
 253         return 1;
 254
 255     if (ctx->cudecoder) {
 256         av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
 257         ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
 258         if (ctx->internal_error < 0)
 259             return 0;
 260         ctx->cudecoder = NULL;
 261     }
 262
 263     if (hwframe_ctx->pool && (
 264             hwframe_ctx->width < avctx->width ||
 265             hwframe_ctx->height < avctx->height ||
 266             hwframe_ctx->format != AV_PIX_FMT_CUDA ||
 267             hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
 268         av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
 269         av_log(avctx, AV_LOG_DEBUG, "width: %d <-> %d\n", hwframe_ctx->width, avctx->width);
 270         av_log(avctx, AV_LOG_DEBUG, "height: %d <-> %d\n", hwframe_ctx->height, avctx->height);
 271         av_log(avctx, AV_LOG_DEBUG, "format: %s <-> cuda\n", av_get_pix_fmt_name(hwframe_ctx->format));
 272         av_log(avctx, AV_LOG_DEBUG, "sw_format: %s <-> %s\n",
 273                av_get_pix_fmt_name(hwframe_ctx->sw_format), av_get_pix_fmt_name(avctx->sw_pix_fmt));
 274         ctx->internal_error = AVERROR(EINVAL);
 275         return 0;
 276     }
 277
 278     ctx->chroma_format = format->chroma_format;
 279
 280     cuinfo.CodecType = ctx->codec_type = format->codec;
 281     cuinfo.ChromaFormat = format->chroma_format;
 282
 283     switch (avctx->sw_pix_fmt) {
 284     case AV_PIX_FMT_NV12:
 285         cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
 286         break;
 287     case AV_PIX_FMT_P010:
 288     case AV_PIX_FMT_P016:
 289         cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
 290         break;
 291     case AV_PIX_FMT_YUV444P:
 292         cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
 293         break;
 294     case AV_PIX_FMT_YUV444P16:
 295         cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
 296         break;
 297     default:
 298         av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
 299                av_get_pix_fmt_name(avctx->sw_pix_fmt));
 300         ctx->internal_error = AVERROR(EINVAL);
 301         return 0;
 302     }
 303
 304     cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
 305     cuinfo.ulNumOutputSurfaces = 1;
 306     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
 307     cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
 308     cuinfo.DeinterlaceMode = ctx->deint_mode_current;
 309
 310     if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
 311         avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
 312
 313     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
 314     if (ctx->internal_error < 0)
 315         return 0;
 316
 317     if (!hwframe_ctx->pool) {
 318         hwframe_ctx->format = AV_PIX_FMT_CUDA;
 319         hwframe_ctx->sw_format = avctx->sw_pix_fmt;
 320         hwframe_ctx->width = avctx->width;
 321         hwframe_ctx->height = avctx->height;
 322
 323         if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
 324             av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
 325             return 0;
 326         }
 327     }
 328
 329     return 1;
 330 }
 331
 332 static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
 333 {
 334     AVCodecContext *avctx = opaque;
 335     CuvidContext *ctx = avctx->priv_data;
 336
 337     av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
 338
 339     ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
 340
 341     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
 342     if (ctx->internal_error < 0)
 343         return 0;
 344
 345     return 1;
 346 }
 347
 348 static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
 349 {
 350     AVCodecContext *avctx = opaque;
 351     CuvidContext *ctx = avctx->priv_data;
 352     CuvidParsedFrame parsed_frame = { { 0 } };
 353
 354     parsed_frame.dispinfo = *dispinfo;
 355     ctx->internal_error = 0;
 356
 357     // For some reason, dispinfo->progressive_frame is sometimes wrong.
 358     parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
 359
 360     if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
 361         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 362     } else {
 363         parsed_frame.is_deinterlacing = 1;
 364         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 365         if (!ctx->drop_second_field) {
 366             parsed_frame.second_field = 1;
 367             av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 368         }
 369     }
 370
 371     return 1;
 372 }
 373
 374 static int cuvid_is_buffer_full(AVCodecContext *avctx)
 375 {
 376     CuvidContext *ctx = avctx->priv_data;
 377
 378     int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
 379     if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
 380         delay *= 2;
 381
 382     return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
 383 }
 384
 385 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 386 {
 387     CuvidContext *ctx = avctx->priv_data;
 388     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 389     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 390     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 391     CUVIDSOURCEDATAPACKET cupkt;
 392     int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
 393
 394     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
 395
 396     if (is_flush && avpkt && avpkt->size)
 397         return AVERROR_EOF;
 398
 399     if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size)
 400         return AVERROR(EAGAIN);
 401
 402     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 403     if (ret < 0) {
 404         return ret;
 405     }
 406
 407     memset(&cupkt, 0, sizeof(cupkt));
 408
 409     if (avpkt && avpkt->size) {
 410         cupkt.payload_size = avpkt->size;
 411         cupkt.payload = avpkt->data;
 412
 413         if (avpkt->pts != AV_NOPTS_VALUE) {
 414             cupkt.flags = CUVID_PKT_TIMESTAMP;
 415             if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
 416                 cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
 417             else
 418                 cupkt.timestamp = avpkt->pts;
 419         }
 420     } else {
 421         cupkt.flags = CUVID_PKT_ENDOFSTREAM;
 422         ctx->decoder_flushing = 1;
 423     }
 424
 425     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 426
 427     if (ret < 0)
 428         goto error;
 429
 430     // cuvidParseVideoData doesn't return an error just because stuff failed...
 431     if (ctx->internal_error) {
 432         av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
 433         ret = ctx->internal_error;
 434         goto error;
 435     }
 436
 437 error:
 438     eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 439
 440     if (eret < 0)
 441         return eret;
 442     else if (ret < 0)
 443         return ret;
 444     else if (is_flush)
 445         return AVERROR_EOF;
 446     else
 447         return 0;
 448 }
 449
 450 static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
 451 {
 452     CuvidContext *ctx = avctx->priv_data;
 453     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 454     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 455     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 456     CUdeviceptr mapped_frame = 0;
 457     int ret = 0, eret = 0;
 458
 459     av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
 460
 461     if (ctx->decoder_flushing) {
 462         ret = cuvid_decode_packet(avctx, NULL);
 463         if (ret < 0 && ret != AVERROR_EOF)
 464             return ret;
 465     }
 466
 467     if (!cuvid_is_buffer_full(avctx)) {
 468         AVPacket pkt = {0};
 469         ret = ff_decode_get_packet(avctx, &pkt);
 470         if (ret < 0 && ret != AVERROR_EOF)
 471             return ret;
 472         ret = cuvid_decode_packet(avctx, &pkt);
 473         av_packet_unref(&pkt);
 474         // cuvid_is_buffer_full() should avoid this.
 475         if (ret == AVERROR(EAGAIN))
 476             ret = AVERROR_EXTERNAL;
 477         if (ret < 0 && ret != AVERROR_EOF)
 478             return ret;
 479     }
 480
 481     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 482     if (ret < 0)
 483         return ret;
 484
 485     if (av_fifo_size(ctx->frame_queue)) {
 486         const AVPixFmtDescriptor *pixdesc;
 487         CuvidParsedFrame parsed_frame;
 488         CUVIDPROCPARAMS params;
 489         unsigned int pitch = 0;
 490         int offset = 0;
 491         int i;
 492
 493         av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 494
 495         memset(&params, 0, sizeof(params));
 496         params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
 497         params.second_field = parsed_frame.second_field;
 498         params.top_field_first = parsed_frame.dispinfo.top_field_first;
 499
 500         ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
 501         if (ret < 0)
 502             goto error;
 503
 504         if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
 505             ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
 506             if (ret < 0) {
 507                 av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
 508                 goto error;
 509             }
 510
 511             ret = ff_decode_frame_props(avctx, frame);
 512             if (ret < 0) {
 513                 av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
 514                 goto error;
 515             }
 516
 517             pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
 518
 519             for (i = 0; i < pixdesc->nb_components; i++) {
 520                 int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
 521                 CUDA_MEMCPY2D cpy = {
 522                     .srcMemoryType = CU_MEMORYTYPE_DEVICE,
 523                     .dstMemoryType = CU_MEMORYTYPE_DEVICE,
 524                     .srcDevice     = mapped_frame,
 525                     .dstDevice     = (CUdeviceptr)frame->data[i],
 526                     .srcPitch      = pitch,
 527                     .dstPitch      = frame->linesize[i],
 528                     .srcY          = offset,
 529                     .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
 530                     .Height        = height,
 531                 };
 532
 533                 ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
 534                 if (ret < 0)
 535                     goto error;
 536
 537                 offset += height;
 538             }
 539         } else if (avctx->pix_fmt == AV_PIX_FMT_NV12      ||
 540                    avctx->pix_fmt == AV_PIX_FMT_P010      ||
 541                    avctx->pix_fmt == AV_PIX_FMT_P016      ||
 542                    avctx->pix_fmt == AV_PIX_FMT_YUV444P   ||
 543                    avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
 544             unsigned int offset = 0;
 545             AVFrame *tmp_frame = av_frame_alloc();
 546             if (!tmp_frame) {
 547                 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
 548                 ret = AVERROR(ENOMEM);
 549                 goto error;
 550             }
 551
 552             pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
 553
 554             tmp_frame->format        = AV_PIX_FMT_CUDA;
 555             tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
 556             tmp_frame->width         = avctx->width;
 557             tmp_frame->height        = avctx->height;
 558
 559             /*
 560              * Note that the following logic would not work for three plane
 561              * YUV420 because the pitch value is different for the chroma
 562              * planes.
 563              */
 564             for (i = 0; i < pixdesc->nb_components; i++) {
 565                 tmp_frame->data[i]     = (uint8_t*)mapped_frame + offset;
 566                 tmp_frame->linesize[i] = pitch;
 567                 offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
 568             }
 569
 570             ret = ff_get_buffer(avctx, frame, 0);
 571             if (ret < 0) {
 572                 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
 573                 av_frame_free(&tmp_frame);
 574                 goto error;
 575             }
 576
 577             ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
 578             if (ret) {
 579                 av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
 580                 av_frame_free(&tmp_frame);
 581                 goto error;
 582             }
 583             av_frame_free(&tmp_frame);
 584         } else {
 585             ret = AVERROR_BUG;
 586             goto error;
 587         }
 588
 589         frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index];
 590         frame->width = avctx->width;
 591         frame->height = avctx->height;
 592         if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
 593             frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
 594         else
 595             frame->pts = parsed_frame.dispinfo.timestamp;
 596
 597         if (parsed_frame.second_field) {
 598             if (ctx->prev_pts == INT64_MIN) {
 599                 ctx->prev_pts = frame->pts;
 600                 frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
 601             } else {
 602                 int pts_diff = (frame->pts - ctx->prev_pts) / 2;
 603                 ctx->prev_pts = frame->pts;
 604                 frame->pts += pts_diff;
 605             }
 606         }
 607
 608         /* CUVIDs opaque reordering breaks the internal pkt logic.
 609          * So set pkt_pts and clear all the other pkt_ fields.
 610          */
 611 #if FF_API_PKT_PTS
 612 FF_DISABLE_DEPRECATION_WARNINGS
 613         frame->pkt_pts = frame->pts;
 614 FF_ENABLE_DEPRECATION_WARNINGS
 615 #endif
 616         frame->pkt_pos = -1;
 617         frame->pkt_duration = 0;
 618         frame->pkt_size = -1;
 619
 620         frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
 621
 622         if (frame->interlaced_frame)
 623             frame->top_field_first = parsed_frame.dispinfo.top_field_first;
 624     } else if (ctx->decoder_flushing) {
 625         ret = AVERROR_EOF;
 626     } else {
 627         ret = AVERROR(EAGAIN);
 628     }
 629
 630 error:
 631     if (mapped_frame)
 632         eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
 633
 634     eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 635
 636     if (eret < 0)
 637         return eret;
 638     else
 639         return ret;
 640 }
 641
 642 static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
 643 {
 644     CuvidContext *ctx = avctx->priv_data;
 645     AVFrame *frame = data;
 646     int ret = 0;
 647
 648     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
 649
 650     if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) {
 651         av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
 652         return AVERROR(EINVAL);
 653     }
 654
 655     if (!ctx->decoder_flushing) {
 656         ret = cuvid_decode_packet(avctx, avpkt);
 657         if (ret < 0)
 658             return ret;
 659     }
 660
 661     ret = cuvid_output_frame(avctx, frame);
 662     if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 663         *got_frame = 0;
 664     } else if (ret < 0) {
 665         return ret;
 666     } else {
 667         *got_frame = 1;
 668     }
 669
 670     return 0;
 671 }
 672
 673 static av_cold int cuvid_decode_end(AVCodecContext *avctx)
 674 {
 675     CuvidContext *ctx = avctx->priv_data;
 676
 677     av_fifo_freep(&ctx->frame_queue);
 678
 679     if (ctx->cuparser)
 680         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
 681
 682     if (ctx->cudecoder)
 683         ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
 684
 685     ctx->cudl = NULL;
 686
 687     av_buffer_unref(&ctx->hwframe);
 688     av_buffer_unref(&ctx->hwdevice);
 689
 690     av_freep(&ctx->key_frame);
 691     av_freep(&ctx->cuparse_ext);
 692
 693     cuvid_free_functions(&ctx->cvdl);
 694
 695     return 0;
 696 }
 697
 698 static int cuvid_test_capabilities(AVCodecContext *avctx,
 699                                    const CUVIDPARSERPARAMS *cuparseinfo,
 700                                    int probed_width,
 701                                    int probed_height,
 702                                    int bit_depth)
 703 {
 704     CuvidContext *ctx = avctx->priv_data;
 705     CUVIDDECODECAPS *caps;
 706     int res8 = 0, res10 = 0, res12 = 0;
 707
 708     if (!ctx->cvdl->cuvidGetDecoderCaps) {
 709         av_log(avctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
 710         av_log(avctx, AV_LOG_WARNING, "The minimum required version is "
 711 #if defined(_WIN32) || defined(__CYGWIN__)
 712             "378.66"
 713 #else
 714             "378.13"
 715 #endif
 716             ". Continuing blind.\n");
 717         ctx->caps8.bIsSupported = ctx->caps10.bIsSupported = 1;
 718         // 12 bit was not supported before the capability check was introduced, so disable it.
 719         ctx->caps12.bIsSupported = 0;
 720         return 0;
 721     }
 722
 723     ctx->caps8.eCodecType = ctx->caps10.eCodecType = ctx->caps12.eCodecType
 724         = cuparseinfo->CodecType;
 725     ctx->caps8.eChromaFormat = ctx->caps10.eChromaFormat = ctx->caps12.eChromaFormat
 726         = cudaVideoChromaFormat_420;
 727
 728     ctx->caps8.nBitDepthMinus8 = 0;
 729     ctx->caps10.nBitDepthMinus8 = 2;
 730     ctx->caps12.nBitDepthMinus8 = 4;
 731
 732     res8 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps8));
 733     res10 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps10));
 734     res12 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps12));
 735
 736     av_log(avctx, AV_LOG_VERBOSE, "CUVID capabilities for %s:\n", avctx->codec->name);
 737     av_log(avctx, AV_LOG_VERBOSE, "8 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
 738            ctx->caps8.bIsSupported, ctx->caps8.nMinWidth, ctx->caps8.nMaxWidth, ctx->caps8.nMinHeight, ctx->caps8.nMaxHeight);
 739     av_log(avctx, AV_LOG_VERBOSE, "10 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
 740            ctx->caps10.bIsSupported, ctx->caps10.nMinWidth, ctx->caps10.nMaxWidth, ctx->caps10.nMinHeight, ctx->caps10.nMaxHeight);
 741     av_log(avctx, AV_LOG_VERBOSE, "12 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
 742            ctx->caps12.bIsSupported, ctx->caps12.nMinWidth, ctx->caps12.nMaxWidth, ctx->caps12.nMinHeight, ctx->caps12.nMaxHeight);
 743
 744     switch (bit_depth) {
 745     case 10:
 746         caps = &ctx->caps10;
 747         if (res10 < 0)
 748             return res10;
 749         break;
 750     case 12:
 751         caps = &ctx->caps12;
 752         if (res12 < 0)
 753             return res12;
 754         break;
 755     default:
 756         caps = &ctx->caps8;
 757         if (res8 < 0)
 758             return res8;
 759     }
 760
 761     if (!ctx->caps8.bIsSupported) {
 762         av_log(avctx, AV_LOG_ERROR, "Codec %s is not supported.\n", avctx->codec->name);
 763         return AVERROR(EINVAL);
 764     }
 765
 766     if (!caps->bIsSupported) {
 767         av_log(avctx, AV_LOG_ERROR, "Bit depth %d is not supported.\n", bit_depth);
 768         return AVERROR(EINVAL);
 769     }
 770
 771     if (probed_width > caps->nMaxWidth || probed_width < caps->nMinWidth) {
 772         av_log(avctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
 773                probed_width, caps->nMinWidth, caps->nMaxWidth);
 774         return AVERROR(EINVAL);
 775     }
 776
 777     if (probed_height > caps->nMaxHeight || probed_height < caps->nMinHeight) {
 778         av_log(avctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
 779                probed_height, caps->nMinHeight, caps->nMaxHeight);
 780         return AVERROR(EINVAL);
 781     }
 782
 783     if ((probed_width * probed_height) / 256 > caps->nMaxMBCount) {
 784         av_log(avctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
 785                (int)(probed_width * probed_height) / 256, caps->nMaxMBCount);
 786         return AVERROR(EINVAL);
 787     }
 788
 789     return 0;
 790 }
 791
 792 static av_cold int cuvid_decode_init(AVCodecContext *avctx)
 793 {
 794     CuvidContext *ctx = avctx->priv_data;
 795     AVCUDADeviceContext *device_hwctx;
 796     AVHWDeviceContext *device_ctx;
 797     AVHWFramesContext *hwframe_ctx;
 798     CUVIDSOURCEDATAPACKET seq_pkt;
 799     CUcontext cuda_ctx = NULL;
 800     CUcontext dummy;
 801     uint8_t *extradata;
 802     int extradata_size;
 803     int ret = 0;
 804
 805     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
 806                                        AV_PIX_FMT_NV12,
 807                                        AV_PIX_FMT_NONE };
 808
 809     int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
 810     int probed_height = avctx->coded_height ? avctx->coded_height : 720;
 811     int probed_bit_depth = 8;
 812
 813     const AVPixFmtDescriptor *probe_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 814     if (probe_desc && probe_desc->nb_components)
 815         probed_bit_depth = probe_desc->comp[0].depth;
 816
 817     // Accelerated transcoding scenarios with 'ffmpeg' require that the
 818     // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
 819     // pix_fmt for non-accelerated transcoding, do not need to be correct
 820     // but need to be set to something. We arbitrarily pick NV12.
 821     ret = ff_get_format(avctx, pix_fmts);
 822     if (ret < 0) {
 823         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
 824         return ret;
 825     }
 826     avctx->pix_fmt = ret;
 827
 828     if (ctx->resize_expr && sscanf(ctx->resize_expr, "%dx%d",
 829                                    &ctx->resize.width, &ctx->resize.height) != 2) {
 830         av_log(avctx, AV_LOG_ERROR, "Invalid resize expressions\n");
 831         ret = AVERROR(EINVAL);
 832         goto error;
 833     }
 834
 835     if (ctx->crop_expr && sscanf(ctx->crop_expr, "%dx%dx%dx%d",
 836                                  &ctx->crop.top, &ctx->crop.bottom,
 837                                  &ctx->crop.left, &ctx->crop.right) != 4) {
 838         av_log(avctx, AV_LOG_ERROR, "Invalid cropping expressions\n");
 839         ret = AVERROR(EINVAL);
 840         goto error;
 841     }
 842
 843     ret = cuvid_load_functions(&ctx->cvdl, avctx);
 844     if (ret < 0) {
 845         av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
 846         goto error;
 847     }
 848
 849     ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
 850     if (!ctx->frame_queue) {
 851         ret = AVERROR(ENOMEM);
 852         goto error;
 853     }
 854
 855     if (avctx->hw_frames_ctx) {
 856         ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
 857         if (!ctx->hwframe) {
 858             ret = AVERROR(ENOMEM);
 859             goto error;
 860         }
 861
 862         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 863
 864         ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
 865         if (!ctx->hwdevice) {
 866             ret = AVERROR(ENOMEM);
 867             goto error;
 868         }
 869     } else {
 870         if (avctx->hw_device_ctx) {
 871             ctx->hwdevice = av_buffer_ref(avctx->hw_device_ctx);
 872             if (!ctx->hwdevice) {
 873                 ret = AVERROR(ENOMEM);
 874                 goto error;
 875             }
 876         } else {
 877             ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
 878             if (ret < 0)
 879                 goto error;
 880         }
 881
 882         ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
 883         if (!ctx->hwframe) {
 884             av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
 885             ret = AVERROR(ENOMEM);
 886             goto error;
 887         }
 888
 889         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 890     }
 891
 892     device_ctx = hwframe_ctx->device_ctx;
 893     device_hwctx = device_ctx->hwctx;
 894
 895     cuda_ctx = device_hwctx->cuda_ctx;
 896     ctx->cudl = device_hwctx->internal->cuda_dl;
 897
 898     memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
 899     memset(&seq_pkt, 0, sizeof(seq_pkt));
 900
 901     switch (avctx->codec->id) {
 902 #if CONFIG_H264_CUVID_DECODER
 903     case AV_CODEC_ID_H264:
 904         ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
 905         break;
 906 #endif
 907 #if CONFIG_HEVC_CUVID_DECODER
 908     case AV_CODEC_ID_HEVC:
 909         ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
 910         break;
 911 #endif
 912 #if CONFIG_MJPEG_CUVID_DECODER
 913     case AV_CODEC_ID_MJPEG:
 914         ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
 915         break;
 916 #endif
 917 #if CONFIG_MPEG1_CUVID_DECODER
 918     case AV_CODEC_ID_MPEG1VIDEO:
 919         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
 920         break;
 921 #endif
 922 #if CONFIG_MPEG2_CUVID_DECODER
 923     case AV_CODEC_ID_MPEG2VIDEO:
 924         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
 925         break;
 926 #endif
 927 #if CONFIG_MPEG4_CUVID_DECODER
 928     case AV_CODEC_ID_MPEG4:
 929         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
 930         break;
 931 #endif
 932 #if CONFIG_VP8_CUVID_DECODER
 933     case AV_CODEC_ID_VP8:
 934         ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
 935         break;
 936 #endif
 937 #if CONFIG_VP9_CUVID_DECODER
 938     case AV_CODEC_ID_VP9:
 939         ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
 940         break;
 941 #endif
 942 #if CONFIG_VC1_CUVID_DECODER
 943     case AV_CODEC_ID_VC1:
 944         ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
 945         break;
 946 #endif
 947 #if CONFIG_AV1_CUVID_DECODER && defined(CUVID_HAS_AV1_SUPPORT)
 948     case AV_CODEC_ID_AV1:
 949         ctx->cuparseinfo.CodecType = cudaVideoCodec_AV1;
 950         break;
 951 #endif
 952     default:
 953         av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
 954         return AVERROR_BUG;
 955     }
 956
 957     if (avctx->codec->bsfs) {
 958         const AVCodecParameters *par = avctx->internal->bsf->par_out;
 959         extradata = par->extradata;
 960         extradata_size = par->extradata_size;
 961     } else {
 962         extradata = avctx->extradata;
 963         extradata_size = avctx->extradata_size;
 964     }
 965
 966     ctx->cuparse_ext = av_mallocz(sizeof(*ctx->cuparse_ext)
 967             + FFMAX(extradata_size - (int)sizeof(ctx->cuparse_ext->raw_seqhdr_data), 0));
 968     if (!ctx->cuparse_ext) {
 969         ret = AVERROR(ENOMEM);
 970         goto error;
 971     }
 972
 973     if (extradata_size > 0)
 974         memcpy(ctx->cuparse_ext->raw_seqhdr_data, extradata, extradata_size);
 975     ctx->cuparse_ext->format.seqhdr_data_length = extradata_size;
 976
 977     ctx->cuparseinfo.pExtVideoInfo = ctx->cuparse_ext;
 978
 979     ctx->key_frame = av_mallocz(ctx->nb_surfaces * sizeof(int));
 980     if (!ctx->key_frame) {
 981         ret = AVERROR(ENOMEM);
 982         goto error;
 983     }
 984
 985     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
 986     ctx->cuparseinfo.ulMaxDisplayDelay = 4;
 987     ctx->cuparseinfo.pUserData = avctx;
 988     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
 989     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
 990     ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
 991
 992     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 993     if (ret < 0)
 994         goto error;
 995
 996     ret = cuvid_test_capabilities(avctx, &ctx->cuparseinfo,
 997                                   probed_width,
 998                                   probed_height,
 999                                   probed_bit_depth);
1000     if (ret < 0)
1001         goto error;
1002
1003     ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
1004     if (ret < 0)
1005         goto error;
1006
1007     seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
1008     seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
1009
1010     if (seq_pkt.payload && seq_pkt.payload_size) {
1011         ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
1012         if (ret < 0)
1013             goto error;
1014     }
1015
1016     ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
1017     if (ret < 0)
1018         goto error;
1019
1020     ctx->prev_pts = INT64_MIN;
1021
1022     if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
1023         av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
1024
1025     return 0;
1026
1027 error:
1028     cuvid_decode_end(avctx);
1029     return ret;
1030 }
1031
1032 static void cuvid_flush(AVCodecContext *avctx)
1033 {
1034     CuvidContext *ctx = avctx->priv_data;
1035     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
1036     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
1037     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
1038     CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
1039     int ret;
1040
1041     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
1042     if (ret < 0)
1043         goto error;
1044
1045     av_fifo_freep(&ctx->frame_queue);
1046
1047     ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
1048     if (!ctx->frame_queue) {
1049         av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
1050         return;
1051     }
1052
1053     if (ctx->cudecoder) {
1054         ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
1055         ctx->cudecoder = NULL;
1056     }
1057
1058     if (ctx->cuparser) {
1059         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
1060         ctx->cuparser = NULL;
1061     }
1062
1063     ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
1064     if (ret < 0)
1065         goto error;
1066
1067     seq_pkt.payload = ctx->cuparse_ext->raw_seqhdr_data;
1068     seq_pkt.payload_size = ctx->cuparse_ext->format.seqhdr_data_length;
1069
1070     if (seq_pkt.payload && seq_pkt.payload_size) {
1071         ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
1072         if (ret < 0)
1073             goto error;
1074     }
1075
1076     ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
1077     if (ret < 0)
1078         goto error;
1079
1080     ctx->prev_pts = INT64_MIN;
1081     ctx->decoder_flushing = 0;
1082
1083     return;
1084  error:
1085     av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
1086 }
1087
1088 #define OFFSET(x) offsetof(CuvidContext, x)
1089 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
1090 static const AVOption options[] = {
1091     { "deint",    "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT,   { .i64 = cudaVideoDeinterlaceMode_Weave    }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
1092     { "weave",    "Weave deinterlacing (do nothing)",        0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave    }, 0, 0, VD, "deint" },
1093     { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
1094     { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
1095     { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1096     { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
1097     { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
1098     { "crop",     "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1099     { "resize",   "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1100     { NULL }
1101 };
1102
1103 static const AVCodecHWConfigInternal *const cuvid_hw_configs[] = {
1104     &(const AVCodecHWConfigInternal) {
1105         .public = {
1106             .pix_fmt     = AV_PIX_FMT_CUDA,
1107             .methods     = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX |
1108                            AV_CODEC_HW_CONFIG_METHOD_INTERNAL,
1109             .device_type = AV_HWDEVICE_TYPE_CUDA
1110         },
1111         .hwaccel = NULL,
1112     },
1113     NULL
1114 };
1115
1116 #define DEFINE_CUVID_CODEC(x, X, bsf_name) \
1117     static const AVClass x##_cuvid_class = { \
1118         .class_name = #x "_cuvid", \
1119         .item_name = av_default_item_name, \
1120         .option = options, \
1121         .version = LIBAVUTIL_VERSION_INT, \
1122     }; \
1123     AVCodec ff_##x##_cuvid_decoder = { \
1124         .name           = #x "_cuvid", \
1125         .long_name      = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
1126         .type           = AVMEDIA_TYPE_VIDEO, \
1127         .id             = AV_CODEC_ID_##X, \
1128         .priv_data_size = sizeof(CuvidContext), \
1129         .priv_class     = &x##_cuvid_class, \
1130         .init           = cuvid_decode_init, \
1131         .close          = cuvid_decode_end, \
1132         .decode         = cuvid_decode_frame, \
1133         .receive_frame  = cuvid_output_frame, \
1134         .flush          = cuvid_flush, \
1135         .bsfs           = bsf_name, \
1136         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
1137         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
1138                                                         AV_PIX_FMT_NV12, \
1139                                                         AV_PIX_FMT_P010, \
1140                                                         AV_PIX_FMT_P016, \
1141                                                         AV_PIX_FMT_NONE }, \
1142         .hw_configs     = cuvid_hw_configs, \
1143         .wrapper_name   = "cuvid", \
1144     };
1145
1146 #if CONFIG_AV1_CUVID_DECODER && defined(CUVID_HAS_AV1_SUPPORT)
1147 DEFINE_CUVID_CODEC(av1, AV1, NULL)
1148 #endif
1149
1150 #if CONFIG_HEVC_CUVID_DECODER
1151 DEFINE_CUVID_CODEC(hevc, HEVC, "hevc_mp4toannexb")
1152 #endif
1153
1154 #if CONFIG_H264_CUVID_DECODER
1155 DEFINE_CUVID_CODEC(h264, H264, "h264_mp4toannexb")
1156 #endif
1157
1158 #if CONFIG_MJPEG_CUVID_DECODER
1159 DEFINE_CUVID_CODEC(mjpeg, MJPEG, NULL)
1160 #endif
1161
1162 #if CONFIG_MPEG1_CUVID_DECODER
1163 DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO, NULL)
1164 #endif
1165
1166 #if CONFIG_MPEG2_CUVID_DECODER
1167 DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO, NULL)
1168 #endif
1169
1170 #if CONFIG_MPEG4_CUVID_DECODER
1171 DEFINE_CUVID_CODEC(mpeg4, MPEG4, NULL)
1172 #endif
1173
1174 #if CONFIG_VP8_CUVID_DECODER
1175 DEFINE_CUVID_CODEC(vp8, VP8, NULL)
1176 #endif
1177
1178 #if CONFIG_VP9_CUVID_DECODER
1179 DEFINE_CUVID_CODEC(vp9, VP9, NULL)
1180 #endif
1181
1182 #if CONFIG_VC1_CUVID_DECODER
1183 DEFINE_CUVID_CODEC(vc1, VC1, NULL)
1184 #endif