git.sesse.net Git - ffmpeg/blob - libavcodec/cuviddec.c

   1 /*
   2  * Nvidia CUVID decoder
   3  * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "compat/cuda/dynlink_loader.h"
  23
  24 #include "libavutil/buffer.h"
  25 #include "libavutil/mathematics.h"
  26 #include "libavutil/hwcontext.h"
  27 #include "libavutil/hwcontext_cuda_internal.h"
  28 #include "libavutil/cuda_check.h"
  29 #include "libavutil/fifo.h"
  30 #include "libavutil/log.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33
  34 #include "avcodec.h"
  35 #include "decode.h"
  36 #include "hwaccel.h"
  37 #include "nvdec.h"
  38 #include "internal.h"
  39
  40 #if !NVDECAPI_CHECK_VERSION(9, 0)
  41 #define cudaVideoSurfaceFormat_YUV444 2
  42 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
  43 #endif
  44
  45 typedef struct CuvidContext
  46 {
  47     AVClass *avclass;
  48
  49     CUvideodecoder cudecoder;
  50     CUvideoparser cuparser;
  51
  52     char *cu_gpu;
  53     int nb_surfaces;
  54     int drop_second_field;
  55     char *crop_expr;
  56     char *resize_expr;
  57
  58     struct {
  59         int left;
  60         int top;
  61         int right;
  62         int bottom;
  63     } crop;
  64
  65     struct {
  66         int width;
  67         int height;
  68     } resize;
  69
  70     AVBufferRef *hwdevice;
  71     AVBufferRef *hwframe;
  72
  73     AVBSFContext *bsf;
  74
  75     AVFifoBuffer *frame_queue;
  76
  77     int deint_mode;
  78     int deint_mode_current;
  79     int64_t prev_pts;
  80     int progressive_sequence;
  81
  82     int internal_error;
  83     int decoder_flushing;
  84
  85     int *key_frame;
  86
  87     cudaVideoCodec codec_type;
  88     cudaVideoChromaFormat chroma_format;
  89
  90     CUVIDDECODECAPS caps8, caps10, caps12;
  91
  92     CUVIDPARSERPARAMS cuparseinfo;
  93     CUVIDEOFORMATEX cuparse_ext;
  94
  95     CudaFunctions *cudl;
  96     CuvidFunctions *cvdl;
  97 } CuvidContext;
  98
  99 typedef struct CuvidParsedFrame
 100 {
 101     CUVIDPARSERDISPINFO dispinfo;
 102     int second_field;
 103     int is_deinterlacing;
 104 } CuvidParsedFrame;
 105
 106 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
 107
 108 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 109 {
 110     AVCodecContext *avctx = opaque;
 111     CuvidContext *ctx = avctx->priv_data;
 112     AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 113     CUVIDDECODECAPS *caps = NULL;
 114     CUVIDDECODECREATEINFO cuinfo;
 115     int surface_fmt;
 116     int chroma_444;
 117
 118     int old_width = avctx->width;
 119     int old_height = avctx->height;
 120
 121     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
 122                                        AV_PIX_FMT_NONE,  // Will be updated below
 123                                        AV_PIX_FMT_NONE };
 124
 125     av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
 126
 127     memset(&cuinfo, 0, sizeof(cuinfo));
 128
 129     ctx->internal_error = 0;
 130
 131     avctx->coded_width = cuinfo.ulWidth = format->coded_width;
 132     avctx->coded_height = cuinfo.ulHeight = format->coded_height;
 133
 134     // apply cropping
 135     cuinfo.display_area.left = format->display_area.left + ctx->crop.left;
 136     cuinfo.display_area.top = format->display_area.top + ctx->crop.top;
 137     cuinfo.display_area.right = format->display_area.right - ctx->crop.right;
 138     cuinfo.display_area.bottom = format->display_area.bottom - ctx->crop.bottom;
 139
 140     // width and height need to be set before calling ff_get_format
 141     if (ctx->resize_expr) {
 142         avctx->width = ctx->resize.width;
 143         avctx->height = ctx->resize.height;
 144     } else {
 145         avctx->width = cuinfo.display_area.right - cuinfo.display_area.left;
 146         avctx->height = cuinfo.display_area.bottom - cuinfo.display_area.top;
 147     }
 148
 149     // target width/height need to be multiples of two
 150     cuinfo.ulTargetWidth = avctx->width = (avctx->width + 1) & ~1;
 151     cuinfo.ulTargetHeight = avctx->height = (avctx->height + 1) & ~1;
 152
 153     // aspect ratio conversion, 1:1, depends on scaled resolution
 154     cuinfo.target_rect.left = 0;
 155     cuinfo.target_rect.top = 0;
 156     cuinfo.target_rect.right = cuinfo.ulTargetWidth;
 157     cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
 158
 159     chroma_444 = format->chroma_format == cudaVideoChromaFormat_444;
 160
 161     switch (format->bit_depth_luma_minus8) {
 162     case 0: // 8-bit
 163         pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
 164         caps = &ctx->caps8;
 165         break;
 166     case 2: // 10-bit
 167         pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
 168         caps = &ctx->caps10;
 169         break;
 170     case 4: // 12-bit
 171         pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
 172         caps = &ctx->caps12;
 173         break;
 174     default:
 175         break;
 176     }
 177
 178     if (!caps || !caps->bIsSupported) {
 179         av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
 180                format->bit_depth_luma_minus8 + 8);
 181         ctx->internal_error = AVERROR(EINVAL);
 182         return 0;
 183     }
 184
 185     surface_fmt = ff_get_format(avctx, pix_fmts);
 186     if (surface_fmt < 0) {
 187         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
 188         ctx->internal_error = AVERROR(EINVAL);
 189         return 0;
 190     }
 191
 192     av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
 193            av_get_pix_fmt_name(avctx->pix_fmt),
 194            av_get_pix_fmt_name(surface_fmt),
 195            av_get_pix_fmt_name(avctx->sw_pix_fmt));
 196
 197     avctx->pix_fmt = surface_fmt;
 198
 199     // Update our hwframe ctx, as the get_format callback might have refreshed it!
 200     if (avctx->hw_frames_ctx) {
 201         av_buffer_unref(&ctx->hwframe);
 202
 203         ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
 204         if (!ctx->hwframe) {
 205             ctx->internal_error = AVERROR(ENOMEM);
 206             return 0;
 207         }
 208
 209         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 210     }
 211
 212     ff_set_sar(avctx, av_div_q(
 213         (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
 214         (AVRational){ avctx->width, avctx->height }));
 215
 216     ctx->deint_mode_current = format->progressive_sequence
 217                               ? cudaVideoDeinterlaceMode_Weave
 218                               : ctx->deint_mode;
 219
 220     ctx->progressive_sequence = format->progressive_sequence;
 221
 222     if (!format->progressive_sequence && ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
 223         avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
 224     else
 225         avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
 226
 227     if (format->video_signal_description.video_full_range_flag)
 228         avctx->color_range = AVCOL_RANGE_JPEG;
 229     else
 230         avctx->color_range = AVCOL_RANGE_MPEG;
 231
 232     avctx->color_primaries = format->video_signal_description.color_primaries;
 233     avctx->color_trc = format->video_signal_description.transfer_characteristics;
 234     avctx->colorspace = format->video_signal_description.matrix_coefficients;
 235
 236     if (format->bitrate)
 237         avctx->bit_rate = format->bitrate;
 238
 239     if (format->frame_rate.numerator && format->frame_rate.denominator) {
 240         avctx->framerate.num = format->frame_rate.numerator;
 241         avctx->framerate.den = format->frame_rate.denominator;
 242     }
 243
 244     if (ctx->cudecoder
 245             && avctx->coded_width == format->coded_width
 246             && avctx->coded_height == format->coded_height
 247             && avctx->width == old_width
 248             && avctx->height == old_height
 249             && ctx->chroma_format == format->chroma_format
 250             && ctx->codec_type == format->codec)
 251         return 1;
 252
 253     if (ctx->cudecoder) {
 254         av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
 255         ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
 256         if (ctx->internal_error < 0)
 257             return 0;
 258         ctx->cudecoder = NULL;
 259     }
 260
 261     if (hwframe_ctx->pool && (
 262             hwframe_ctx->width < avctx->width ||
 263             hwframe_ctx->height < avctx->height ||
 264             hwframe_ctx->format != AV_PIX_FMT_CUDA ||
 265             hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
 266         av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
 267         av_log(avctx, AV_LOG_DEBUG, "width: %d <-> %d\n", hwframe_ctx->width, avctx->width);
 268         av_log(avctx, AV_LOG_DEBUG, "height: %d <-> %d\n", hwframe_ctx->height, avctx->height);
 269         av_log(avctx, AV_LOG_DEBUG, "format: %s <-> cuda\n", av_get_pix_fmt_name(hwframe_ctx->format));
 270         av_log(avctx, AV_LOG_DEBUG, "sw_format: %s <-> %s\n",
 271                av_get_pix_fmt_name(hwframe_ctx->sw_format), av_get_pix_fmt_name(avctx->sw_pix_fmt));
 272         ctx->internal_error = AVERROR(EINVAL);
 273         return 0;
 274     }
 275
 276     ctx->chroma_format = format->chroma_format;
 277
 278     cuinfo.CodecType = ctx->codec_type = format->codec;
 279     cuinfo.ChromaFormat = format->chroma_format;
 280
 281     switch (avctx->sw_pix_fmt) {
 282     case AV_PIX_FMT_NV12:
 283         cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
 284         break;
 285     case AV_PIX_FMT_P010:
 286     case AV_PIX_FMT_P016:
 287         cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
 288         break;
 289     case AV_PIX_FMT_YUV444P:
 290         cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444;
 291         break;
 292     case AV_PIX_FMT_YUV444P16:
 293         cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
 294         break;
 295     default:
 296         av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
 297                av_get_pix_fmt_name(avctx->sw_pix_fmt));
 298         ctx->internal_error = AVERROR(EINVAL);
 299         return 0;
 300     }
 301
 302     cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
 303     cuinfo.ulNumOutputSurfaces = 1;
 304     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
 305     cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
 306     cuinfo.DeinterlaceMode = ctx->deint_mode_current;
 307
 308     if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
 309         avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
 310
 311     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
 312     if (ctx->internal_error < 0)
 313         return 0;
 314
 315     if (!hwframe_ctx->pool) {
 316         hwframe_ctx->format = AV_PIX_FMT_CUDA;
 317         hwframe_ctx->sw_format = avctx->sw_pix_fmt;
 318         hwframe_ctx->width = avctx->width;
 319         hwframe_ctx->height = avctx->height;
 320
 321         if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
 322             av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
 323             return 0;
 324         }
 325     }
 326
 327     return 1;
 328 }
 329
 330 static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
 331 {
 332     AVCodecContext *avctx = opaque;
 333     CuvidContext *ctx = avctx->priv_data;
 334
 335     av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
 336
 337     ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
 338
 339     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
 340     if (ctx->internal_error < 0)
 341         return 0;
 342
 343     return 1;
 344 }
 345
 346 static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
 347 {
 348     AVCodecContext *avctx = opaque;
 349     CuvidContext *ctx = avctx->priv_data;
 350     CuvidParsedFrame parsed_frame = { { 0 } };
 351
 352     parsed_frame.dispinfo = *dispinfo;
 353     ctx->internal_error = 0;
 354
 355     // For some reason, dispinfo->progressive_frame is sometimes wrong.
 356     parsed_frame.dispinfo.progressive_frame = ctx->progressive_sequence;
 357
 358     if (ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
 359         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 360     } else {
 361         parsed_frame.is_deinterlacing = 1;
 362         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 363         if (!ctx->drop_second_field) {
 364             parsed_frame.second_field = 1;
 365             av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 366         }
 367     }
 368
 369     return 1;
 370 }
 371
 372 static int cuvid_is_buffer_full(AVCodecContext *avctx)
 373 {
 374     CuvidContext *ctx = avctx->priv_data;
 375
 376     int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
 377     if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
 378         delay *= 2;
 379
 380     return (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + delay >= ctx->nb_surfaces;
 381 }
 382
 383 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 384 {
 385     CuvidContext *ctx = avctx->priv_data;
 386     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 387     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 388     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 389     CUVIDSOURCEDATAPACKET cupkt;
 390     AVPacket filter_packet = { 0 };
 391     AVPacket filtered_packet = { 0 };
 392     int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
 393
 394     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
 395
 396     if (is_flush && avpkt && avpkt->size)
 397         return AVERROR_EOF;
 398
 399     if (cuvid_is_buffer_full(avctx) && avpkt && avpkt->size)
 400         return AVERROR(EAGAIN);
 401
 402     if (ctx->bsf && avpkt && avpkt->size) {
 403         if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
 404             av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
 405             return ret;
 406         }
 407
 408         if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
 409             av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
 410             av_packet_unref(&filter_packet);
 411             return ret;
 412         }
 413
 414         if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
 415             av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
 416             return ret;
 417         }
 418
 419         avpkt = &filtered_packet;
 420     }
 421
 422     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 423     if (ret < 0) {
 424         av_packet_unref(&filtered_packet);
 425         return ret;
 426     }
 427
 428     memset(&cupkt, 0, sizeof(cupkt));
 429
 430     if (avpkt && avpkt->size) {
 431         cupkt.payload_size = avpkt->size;
 432         cupkt.payload = avpkt->data;
 433
 434         if (avpkt->pts != AV_NOPTS_VALUE) {
 435             cupkt.flags = CUVID_PKT_TIMESTAMP;
 436             if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
 437                 cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
 438             else
 439                 cupkt.timestamp = avpkt->pts;
 440         }
 441     } else {
 442         cupkt.flags = CUVID_PKT_ENDOFSTREAM;
 443         ctx->decoder_flushing = 1;
 444     }
 445
 446     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 447
 448     av_packet_unref(&filtered_packet);
 449
 450     if (ret < 0)
 451         goto error;
 452
 453     // cuvidParseVideoData doesn't return an error just because stuff failed...
 454     if (ctx->internal_error) {
 455         av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
 456         ret = ctx->internal_error;
 457         goto error;
 458     }
 459
 460 error:
 461     eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 462
 463     if (eret < 0)
 464         return eret;
 465     else if (ret < 0)
 466         return ret;
 467     else if (is_flush)
 468         return AVERROR_EOF;
 469     else
 470         return 0;
 471 }
 472
 473 static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
 474 {
 475     CuvidContext *ctx = avctx->priv_data;
 476     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 477     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 478     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 479     CUdeviceptr mapped_frame = 0;
 480     int ret = 0, eret = 0;
 481
 482     av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
 483
 484     if (ctx->decoder_flushing) {
 485         ret = cuvid_decode_packet(avctx, NULL);
 486         if (ret < 0 && ret != AVERROR_EOF)
 487             return ret;
 488     }
 489
 490     if (!cuvid_is_buffer_full(avctx)) {
 491         AVPacket pkt = {0};
 492         ret = ff_decode_get_packet(avctx, &pkt);
 493         if (ret < 0 && ret != AVERROR_EOF)
 494             return ret;
 495         ret = cuvid_decode_packet(avctx, &pkt);
 496         av_packet_unref(&pkt);
 497         // cuvid_is_buffer_full() should avoid this.
 498         if (ret == AVERROR(EAGAIN))
 499             ret = AVERROR_EXTERNAL;
 500         if (ret < 0 && ret != AVERROR_EOF)
 501             return ret;
 502     }
 503
 504     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 505     if (ret < 0)
 506         return ret;
 507
 508     if (av_fifo_size(ctx->frame_queue)) {
 509         const AVPixFmtDescriptor *pixdesc;
 510         CuvidParsedFrame parsed_frame;
 511         CUVIDPROCPARAMS params;
 512         unsigned int pitch = 0;
 513         int offset = 0;
 514         int i;
 515
 516         av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 517
 518         memset(&params, 0, sizeof(params));
 519         params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
 520         params.second_field = parsed_frame.second_field;
 521         params.top_field_first = parsed_frame.dispinfo.top_field_first;
 522
 523         ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
 524         if (ret < 0)
 525             goto error;
 526
 527         if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
 528             ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
 529             if (ret < 0) {
 530                 av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
 531                 goto error;
 532             }
 533
 534             ret = ff_decode_frame_props(avctx, frame);
 535             if (ret < 0) {
 536                 av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
 537                 goto error;
 538             }
 539
 540             pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
 541
 542             for (i = 0; i < pixdesc->nb_components; i++) {
 543                 int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0);
 544                 CUDA_MEMCPY2D cpy = {
 545                     .srcMemoryType = CU_MEMORYTYPE_DEVICE,
 546                     .dstMemoryType = CU_MEMORYTYPE_DEVICE,
 547                     .srcDevice     = mapped_frame,
 548                     .dstDevice     = (CUdeviceptr)frame->data[i],
 549                     .srcPitch      = pitch,
 550                     .dstPitch      = frame->linesize[i],
 551                     .srcY          = offset,
 552                     .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
 553                     .Height        = height,
 554                 };
 555
 556                 ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
 557                 if (ret < 0)
 558                     goto error;
 559
 560                 offset += height;
 561             }
 562         } else if (avctx->pix_fmt == AV_PIX_FMT_NV12      ||
 563                    avctx->pix_fmt == AV_PIX_FMT_P010      ||
 564                    avctx->pix_fmt == AV_PIX_FMT_P016      ||
 565                    avctx->pix_fmt == AV_PIX_FMT_YUV444P   ||
 566                    avctx->pix_fmt == AV_PIX_FMT_YUV444P16) {
 567             unsigned int offset = 0;
 568             AVFrame *tmp_frame = av_frame_alloc();
 569             if (!tmp_frame) {
 570                 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
 571                 ret = AVERROR(ENOMEM);
 572                 goto error;
 573             }
 574
 575             pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
 576
 577             tmp_frame->format        = AV_PIX_FMT_CUDA;
 578             tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
 579             tmp_frame->width         = avctx->width;
 580             tmp_frame->height        = avctx->height;
 581
 582             /*
 583              * Note that the following logic would not work for three plane
 584              * YUV420 because the pitch value is different for the chroma
 585              * planes.
 586              */
 587             for (i = 0; i < pixdesc->nb_components; i++) {
 588                 tmp_frame->data[i]     = (uint8_t*)mapped_frame + offset;
 589                 tmp_frame->linesize[i] = pitch;
 590                 offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0));
 591             }
 592
 593             ret = ff_get_buffer(avctx, frame, 0);
 594             if (ret < 0) {
 595                 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
 596                 av_frame_free(&tmp_frame);
 597                 goto error;
 598             }
 599
 600             ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
 601             if (ret) {
 602                 av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
 603                 av_frame_free(&tmp_frame);
 604                 goto error;
 605             }
 606             av_frame_free(&tmp_frame);
 607         } else {
 608             ret = AVERROR_BUG;
 609             goto error;
 610         }
 611
 612         frame->key_frame = ctx->key_frame[parsed_frame.dispinfo.picture_index];
 613         frame->width = avctx->width;
 614         frame->height = avctx->height;
 615         if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
 616             frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
 617         else
 618             frame->pts = parsed_frame.dispinfo.timestamp;
 619
 620         if (parsed_frame.second_field) {
 621             if (ctx->prev_pts == INT64_MIN) {
 622                 ctx->prev_pts = frame->pts;
 623                 frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
 624             } else {
 625                 int pts_diff = (frame->pts - ctx->prev_pts) / 2;
 626                 ctx->prev_pts = frame->pts;
 627                 frame->pts += pts_diff;
 628             }
 629         }
 630
 631         /* CUVIDs opaque reordering breaks the internal pkt logic.
 632          * So set pkt_pts and clear all the other pkt_ fields.
 633          */
 634 #if FF_API_PKT_PTS
 635 FF_DISABLE_DEPRECATION_WARNINGS
 636         frame->pkt_pts = frame->pts;
 637 FF_ENABLE_DEPRECATION_WARNINGS
 638 #endif
 639         frame->pkt_pos = -1;
 640         frame->pkt_duration = 0;
 641         frame->pkt_size = -1;
 642
 643         frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
 644
 645         if (frame->interlaced_frame)
 646             frame->top_field_first = parsed_frame.dispinfo.top_field_first;
 647     } else if (ctx->decoder_flushing) {
 648         ret = AVERROR_EOF;
 649     } else {
 650         ret = AVERROR(EAGAIN);
 651     }
 652
 653 error:
 654     if (mapped_frame)
 655         eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
 656
 657     eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 658
 659     if (eret < 0)
 660         return eret;
 661     else
 662         return ret;
 663 }
 664
 665 static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
 666 {
 667     CuvidContext *ctx = avctx->priv_data;
 668     AVFrame *frame = data;
 669     int ret = 0;
 670
 671     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
 672
 673     if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave) {
 674         av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (!ctx->decoder_flushing) {
 679         ret = cuvid_decode_packet(avctx, avpkt);
 680         if (ret < 0)
 681             return ret;
 682     }
 683
 684     ret = cuvid_output_frame(avctx, frame);
 685     if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 686         *got_frame = 0;
 687     } else if (ret < 0) {
 688         return ret;
 689     } else {
 690         *got_frame = 1;
 691     }
 692
 693     return 0;
 694 }
 695
 696 static av_cold int cuvid_decode_end(AVCodecContext *avctx)
 697 {
 698     CuvidContext *ctx = avctx->priv_data;
 699
 700     av_fifo_freep(&ctx->frame_queue);
 701
 702     if (ctx->bsf)
 703         av_bsf_free(&ctx->bsf);
 704
 705     if (ctx->cuparser)
 706         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
 707
 708     if (ctx->cudecoder)
 709         ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
 710
 711     ctx->cudl = NULL;
 712
 713     av_buffer_unref(&ctx->hwframe);
 714     av_buffer_unref(&ctx->hwdevice);
 715
 716     av_freep(&ctx->key_frame);
 717
 718     cuvid_free_functions(&ctx->cvdl);
 719
 720     return 0;
 721 }
 722
 723 static int cuvid_test_capabilities(AVCodecContext *avctx,
 724                                    const CUVIDPARSERPARAMS *cuparseinfo,
 725                                    int probed_width,
 726                                    int probed_height,
 727                                    int bit_depth)
 728 {
 729     CuvidContext *ctx = avctx->priv_data;
 730     CUVIDDECODECAPS *caps;
 731     int res8 = 0, res10 = 0, res12 = 0;
 732
 733     if (!ctx->cvdl->cuvidGetDecoderCaps) {
 734         av_log(avctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
 735         av_log(avctx, AV_LOG_WARNING, "The minimum required version is "
 736 #if defined(_WIN32) || defined(__CYGWIN__)
 737             "378.66"
 738 #else
 739             "378.13"
 740 #endif
 741             ". Continuing blind.\n");
 742         ctx->caps8.bIsSupported = ctx->caps10.bIsSupported = 1;
 743         // 12 bit was not supported before the capability check was introduced, so disable it.
 744         ctx->caps12.bIsSupported = 0;
 745         return 0;
 746     }
 747
 748     ctx->caps8.eCodecType = ctx->caps10.eCodecType = ctx->caps12.eCodecType
 749         = cuparseinfo->CodecType;
 750     ctx->caps8.eChromaFormat = ctx->caps10.eChromaFormat = ctx->caps12.eChromaFormat
 751         = cudaVideoChromaFormat_420;
 752
 753     ctx->caps8.nBitDepthMinus8 = 0;
 754     ctx->caps10.nBitDepthMinus8 = 2;
 755     ctx->caps12.nBitDepthMinus8 = 4;
 756
 757     res8 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps8));
 758     res10 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps10));
 759     res12 = CHECK_CU(ctx->cvdl->cuvidGetDecoderCaps(&ctx->caps12));
 760
 761     av_log(avctx, AV_LOG_VERBOSE, "CUVID capabilities for %s:\n", avctx->codec->name);
 762     av_log(avctx, AV_LOG_VERBOSE, "8 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
 763            ctx->caps8.bIsSupported, ctx->caps8.nMinWidth, ctx->caps8.nMaxWidth, ctx->caps8.nMinHeight, ctx->caps8.nMaxHeight);
 764     av_log(avctx, AV_LOG_VERBOSE, "10 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
 765            ctx->caps10.bIsSupported, ctx->caps10.nMinWidth, ctx->caps10.nMaxWidth, ctx->caps10.nMinHeight, ctx->caps10.nMaxHeight);
 766     av_log(avctx, AV_LOG_VERBOSE, "12 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
 767            ctx->caps12.bIsSupported, ctx->caps12.nMinWidth, ctx->caps12.nMaxWidth, ctx->caps12.nMinHeight, ctx->caps12.nMaxHeight);
 768
 769     switch (bit_depth) {
 770     case 10:
 771         caps = &ctx->caps10;
 772         if (res10 < 0)
 773             return res10;
 774         break;
 775     case 12:
 776         caps = &ctx->caps12;
 777         if (res12 < 0)
 778             return res12;
 779         break;
 780     default:
 781         caps = &ctx->caps8;
 782         if (res8 < 0)
 783             return res8;
 784     }
 785
 786     if (!ctx->caps8.bIsSupported) {
 787         av_log(avctx, AV_LOG_ERROR, "Codec %s is not supported.\n", avctx->codec->name);
 788         return AVERROR(EINVAL);
 789     }
 790
 791     if (!caps->bIsSupported) {
 792         av_log(avctx, AV_LOG_ERROR, "Bit depth %d is not supported.\n", bit_depth);
 793         return AVERROR(EINVAL);
 794     }
 795
 796     if (probed_width > caps->nMaxWidth || probed_width < caps->nMinWidth) {
 797         av_log(avctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
 798                probed_width, caps->nMinWidth, caps->nMaxWidth);
 799         return AVERROR(EINVAL);
 800     }
 801
 802     if (probed_height > caps->nMaxHeight || probed_height < caps->nMinHeight) {
 803         av_log(avctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
 804                probed_height, caps->nMinHeight, caps->nMaxHeight);
 805         return AVERROR(EINVAL);
 806     }
 807
 808     return 0;
 809 }
 810
 811 static av_cold int cuvid_decode_init(AVCodecContext *avctx)
 812 {
 813     CuvidContext *ctx = avctx->priv_data;
 814     AVCUDADeviceContext *device_hwctx;
 815     AVHWDeviceContext *device_ctx;
 816     AVHWFramesContext *hwframe_ctx;
 817     CUVIDSOURCEDATAPACKET seq_pkt;
 818     CUcontext cuda_ctx = NULL;
 819     CUcontext dummy;
 820     const AVBitStreamFilter *bsf;
 821     int ret = 0;
 822
 823     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
 824                                        AV_PIX_FMT_NV12,
 825                                        AV_PIX_FMT_NONE };
 826
 827     int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
 828     int probed_height = avctx->coded_height ? avctx->coded_height : 720;
 829     int probed_bit_depth = 8;
 830
 831     const AVPixFmtDescriptor *probe_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 832     if (probe_desc && probe_desc->nb_components)
 833         probed_bit_depth = probe_desc->comp[0].depth;
 834
 835     // Accelerated transcoding scenarios with 'ffmpeg' require that the
 836     // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
 837     // pix_fmt for non-accelerated transcoding, do not need to be correct
 838     // but need to be set to something. We arbitrarily pick NV12.
 839     ret = ff_get_format(avctx, pix_fmts);
 840     if (ret < 0) {
 841         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
 842         return ret;
 843     }
 844     avctx->pix_fmt = ret;
 845
 846     if (ctx->resize_expr && sscanf(ctx->resize_expr, "%dx%d",
 847                                    &ctx->resize.width, &ctx->resize.height) != 2) {
 848         av_log(avctx, AV_LOG_ERROR, "Invalid resize expressions\n");
 849         ret = AVERROR(EINVAL);
 850         goto error;
 851     }
 852
 853     if (ctx->crop_expr && sscanf(ctx->crop_expr, "%dx%dx%dx%d",
 854                                  &ctx->crop.top, &ctx->crop.bottom,
 855                                  &ctx->crop.left, &ctx->crop.right) != 4) {
 856         av_log(avctx, AV_LOG_ERROR, "Invalid cropping expressions\n");
 857         ret = AVERROR(EINVAL);
 858         goto error;
 859     }
 860
 861     ret = cuvid_load_functions(&ctx->cvdl, avctx);
 862     if (ret < 0) {
 863         av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
 864         goto error;
 865     }
 866
 867     ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
 868     if (!ctx->frame_queue) {
 869         ret = AVERROR(ENOMEM);
 870         goto error;
 871     }
 872
 873     if (avctx->hw_frames_ctx) {
 874         ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
 875         if (!ctx->hwframe) {
 876             ret = AVERROR(ENOMEM);
 877             goto error;
 878         }
 879
 880         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 881
 882         ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
 883         if (!ctx->hwdevice) {
 884             ret = AVERROR(ENOMEM);
 885             goto error;
 886         }
 887     } else {
 888         if (avctx->hw_device_ctx) {
 889             ctx->hwdevice = av_buffer_ref(avctx->hw_device_ctx);
 890             if (!ctx->hwdevice) {
 891                 ret = AVERROR(ENOMEM);
 892                 goto error;
 893             }
 894         } else {
 895             ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
 896             if (ret < 0)
 897                 goto error;
 898         }
 899
 900         ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
 901         if (!ctx->hwframe) {
 902             av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
 903             ret = AVERROR(ENOMEM);
 904             goto error;
 905         }
 906
 907         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 908     }
 909
 910     device_ctx = hwframe_ctx->device_ctx;
 911     device_hwctx = device_ctx->hwctx;
 912
 913     cuda_ctx = device_hwctx->cuda_ctx;
 914     ctx->cudl = device_hwctx->internal->cuda_dl;
 915
 916     memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
 917     memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
 918     memset(&seq_pkt, 0, sizeof(seq_pkt));
 919
 920     ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
 921
 922     switch (avctx->codec->id) {
 923 #if CONFIG_H264_CUVID_DECODER
 924     case AV_CODEC_ID_H264:
 925         ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
 926         break;
 927 #endif
 928 #if CONFIG_HEVC_CUVID_DECODER
 929     case AV_CODEC_ID_HEVC:
 930         ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
 931         break;
 932 #endif
 933 #if CONFIG_MJPEG_CUVID_DECODER
 934     case AV_CODEC_ID_MJPEG:
 935         ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
 936         break;
 937 #endif
 938 #if CONFIG_MPEG1_CUVID_DECODER
 939     case AV_CODEC_ID_MPEG1VIDEO:
 940         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
 941         break;
 942 #endif
 943 #if CONFIG_MPEG2_CUVID_DECODER
 944     case AV_CODEC_ID_MPEG2VIDEO:
 945         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
 946         break;
 947 #endif
 948 #if CONFIG_MPEG4_CUVID_DECODER
 949     case AV_CODEC_ID_MPEG4:
 950         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
 951         break;
 952 #endif
 953 #if CONFIG_VP8_CUVID_DECODER
 954     case AV_CODEC_ID_VP8:
 955         ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
 956         break;
 957 #endif
 958 #if CONFIG_VP9_CUVID_DECODER
 959     case AV_CODEC_ID_VP9:
 960         ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
 961         break;
 962 #endif
 963 #if CONFIG_VC1_CUVID_DECODER
 964     case AV_CODEC_ID_VC1:
 965         ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
 966         break;
 967 #endif
 968     default:
 969         av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
 970         return AVERROR_BUG;
 971     }
 972
 973     if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
 974         if (avctx->codec->id == AV_CODEC_ID_H264)
 975             bsf = av_bsf_get_by_name("h264_mp4toannexb");
 976         else
 977             bsf = av_bsf_get_by_name("hevc_mp4toannexb");
 978
 979         if (!bsf) {
 980             ret = AVERROR_BSF_NOT_FOUND;
 981             goto error;
 982         }
 983         if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
 984             goto error;
 985         }
 986         if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
 987             av_bsf_free(&ctx->bsf);
 988             goto error;
 989         }
 990
 991         ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
 992         memcpy(ctx->cuparse_ext.raw_seqhdr_data,
 993                ctx->bsf->par_out->extradata,
 994                FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
 995     } else if (avctx->extradata_size > 0) {
 996         ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
 997         memcpy(ctx->cuparse_ext.raw_seqhdr_data,
 998                avctx->extradata,
 999                FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
1000     }
1001
1002     ctx->key_frame = av_mallocz(ctx->nb_surfaces * sizeof(int));
1003     if (!ctx->key_frame) {
1004         ret = AVERROR(ENOMEM);
1005         goto error;
1006     }
1007
1008     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
1009     ctx->cuparseinfo.ulMaxDisplayDelay = 4;
1010     ctx->cuparseinfo.pUserData = avctx;
1011     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
1012     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
1013     ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
1014
1015     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
1016     if (ret < 0)
1017         goto error;
1018
1019     ret = cuvid_test_capabilities(avctx, &ctx->cuparseinfo,
1020                                   probed_width,
1021                                   probed_height,
1022                                   probed_bit_depth);
1023     if (ret < 0)
1024         goto error;
1025
1026     ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
1027     if (ret < 0)
1028         goto error;
1029
1030     seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
1031     seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
1032
1033     if (seq_pkt.payload && seq_pkt.payload_size) {
1034         ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
1035         if (ret < 0)
1036             goto error;
1037     }
1038
1039     ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
1040     if (ret < 0)
1041         goto error;
1042
1043     ctx->prev_pts = INT64_MIN;
1044
1045     if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
1046         av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
1047
1048     return 0;
1049
1050 error:
1051     cuvid_decode_end(avctx);
1052     return ret;
1053 }
1054
1055 static void cuvid_flush(AVCodecContext *avctx)
1056 {
1057     CuvidContext *ctx = avctx->priv_data;
1058     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
1059     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
1060     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
1061     CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
1062     int ret;
1063
1064     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
1065     if (ret < 0)
1066         goto error;
1067
1068     av_fifo_freep(&ctx->frame_queue);
1069
1070     ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
1071     if (!ctx->frame_queue) {
1072         av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
1073         return;
1074     }
1075
1076     if (ctx->cudecoder) {
1077         ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
1078         ctx->cudecoder = NULL;
1079     }
1080
1081     if (ctx->cuparser) {
1082         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
1083         ctx->cuparser = NULL;
1084     }
1085
1086     ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
1087     if (ret < 0)
1088         goto error;
1089
1090     seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
1091     seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
1092
1093     if (seq_pkt.payload && seq_pkt.payload_size) {
1094         ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
1095         if (ret < 0)
1096             goto error;
1097     }
1098
1099     ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
1100     if (ret < 0)
1101         goto error;
1102
1103     ctx->prev_pts = INT64_MIN;
1104     ctx->decoder_flushing = 0;
1105
1106     return;
1107  error:
1108     av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
1109 }
1110
1111 #define OFFSET(x) offsetof(CuvidContext, x)
1112 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
1113 static const AVOption options[] = {
1114     { "deint",    "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT,   { .i64 = cudaVideoDeinterlaceMode_Weave    }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
1115     { "weave",    "Weave deinterlacing (do nothing)",        0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave    }, 0, 0, VD, "deint" },
1116     { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
1117     { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
1118     { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1119     { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
1120     { "drop_second_field", "Drop second field when deinterlacing", OFFSET(drop_second_field), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
1121     { "crop",     "Crop (top)x(bottom)x(left)x(right)", OFFSET(crop_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1122     { "resize",   "Resize (width)x(height)", OFFSET(resize_expr), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
1123     { NULL }
1124 };
1125
1126 static const AVCodecHWConfigInternal *cuvid_hw_configs[] = {
1127     &(const AVCodecHWConfigInternal) {
1128         .public = {
1129             .pix_fmt     = AV_PIX_FMT_CUDA,
1130             .methods     = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX |
1131                            AV_CODEC_HW_CONFIG_METHOD_INTERNAL,
1132             .device_type = AV_HWDEVICE_TYPE_CUDA
1133         },
1134         .hwaccel = NULL,
1135     },
1136     NULL
1137 };
1138
1139 #define DEFINE_CUVID_CODEC(x, X) \
1140     static const AVClass x##_cuvid_class = { \
1141         .class_name = #x "_cuvid", \
1142         .item_name = av_default_item_name, \
1143         .option = options, \
1144         .version = LIBAVUTIL_VERSION_INT, \
1145     }; \
1146     AVCodec ff_##x##_cuvid_decoder = { \
1147         .name           = #x "_cuvid", \
1148         .long_name      = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
1149         .type           = AVMEDIA_TYPE_VIDEO, \
1150         .id             = AV_CODEC_ID_##X, \
1151         .priv_data_size = sizeof(CuvidContext), \
1152         .priv_class     = &x##_cuvid_class, \
1153         .init           = cuvid_decode_init, \
1154         .close          = cuvid_decode_end, \
1155         .decode         = cuvid_decode_frame, \
1156         .receive_frame  = cuvid_output_frame, \
1157         .flush          = cuvid_flush, \
1158         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
1159         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
1160                                                         AV_PIX_FMT_NV12, \
1161                                                         AV_PIX_FMT_P010, \
1162                                                         AV_PIX_FMT_P016, \
1163                                                         AV_PIX_FMT_NONE }, \
1164         .hw_configs     = cuvid_hw_configs, \
1165         .wrapper_name   = "cuvid", \
1166     };
1167
1168 #if CONFIG_HEVC_CUVID_DECODER
1169 DEFINE_CUVID_CODEC(hevc, HEVC)
1170 #endif
1171
1172 #if CONFIG_H264_CUVID_DECODER
1173 DEFINE_CUVID_CODEC(h264, H264)
1174 #endif
1175
1176 #if CONFIG_MJPEG_CUVID_DECODER
1177 DEFINE_CUVID_CODEC(mjpeg, MJPEG)
1178 #endif
1179
1180 #if CONFIG_MPEG1_CUVID_DECODER
1181 DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
1182 #endif
1183
1184 #if CONFIG_MPEG2_CUVID_DECODER
1185 DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
1186 #endif
1187
1188 #if CONFIG_MPEG4_CUVID_DECODER
1189 DEFINE_CUVID_CODEC(mpeg4, MPEG4)
1190 #endif
1191
1192 #if CONFIG_VP8_CUVID_DECODER
1193 DEFINE_CUVID_CODEC(vp8, VP8)
1194 #endif
1195
1196 #if CONFIG_VP9_CUVID_DECODER
1197 DEFINE_CUVID_CODEC(vp9, VP9)
1198 #endif
1199
1200 #if CONFIG_VC1_CUVID_DECODER
1201 DEFINE_CUVID_CODEC(vc1, VC1)
1202 #endif