git.sesse.net Git - ffmpeg/blob - libavcodec/cuvid.c

   1 /*
   2  * Nvidia CUVID decoder
   3  * Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "compat/cuda/dynlink_loader.h"
  23
  24 #include "libavutil/buffer.h"
  25 #include "libavutil/mathematics.h"
  26 #include "libavutil/hwcontext.h"
  27 #include "libavutil/hwcontext_cuda_internal.h"
  28 #include "libavutil/fifo.h"
  29 #include "libavutil/log.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32
  33 #include "avcodec.h"
  34 #include "internal.h"
  35
  36 typedef struct CuvidContext
  37 {
  38     AVClass *avclass;
  39
  40     CUvideodecoder cudecoder;
  41     CUvideoparser cuparser;
  42
  43     char *cu_gpu;
  44     int nb_surfaces;
  45
  46     AVBufferRef *hwdevice;
  47     AVBufferRef *hwframe;
  48
  49     AVBSFContext *bsf;
  50
  51     AVFifoBuffer *frame_queue;
  52
  53     int deint_mode;
  54     int64_t prev_pts;
  55
  56     int internal_error;
  57     int decoder_flushing;
  58
  59     cudaVideoCodec codec_type;
  60     cudaVideoChromaFormat chroma_format;
  61
  62     CUVIDPARSERPARAMS cuparseinfo;
  63     CUVIDEOFORMATEX cuparse_ext;
  64
  65     CudaFunctions *cudl;
  66     CuvidFunctions *cvdl;
  67 } CuvidContext;
  68
  69 typedef struct CuvidParsedFrame
  70 {
  71     CUVIDPARSERDISPINFO dispinfo;
  72     int second_field;
  73     int is_deinterlacing;
  74 } CuvidParsedFrame;
  75
  76 static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
  77 {
  78     CuvidContext *ctx = avctx->priv_data;
  79     const char *err_name;
  80     const char *err_string;
  81
  82     av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
  83
  84     if (err == CUDA_SUCCESS)
  85         return 0;
  86
  87     ctx->cudl->cuGetErrorName(err, &err_name);
  88     ctx->cudl->cuGetErrorString(err, &err_string);
  89
  90     av_log(avctx, AV_LOG_ERROR, "%s failed", func);
  91     if (err_name && err_string)
  92         av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
  93     av_log(avctx, AV_LOG_ERROR, "\n");
  94
  95     return AVERROR_EXTERNAL;
  96 }
  97
  98 #define CHECK_CU(x) check_cu(avctx, (x), #x)
  99
 100 static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* format)
 101 {
 102     AVCodecContext *avctx = opaque;
 103     CuvidContext *ctx = avctx->priv_data;
 104     AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 105     CUVIDDECODECREATEINFO cuinfo;
 106     int surface_fmt;
 107
 108     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
 109                                        AV_PIX_FMT_NONE,  // Will be updated below
 110                                        AV_PIX_FMT_NONE };
 111
 112     av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
 113
 114     ctx->internal_error = 0;
 115
 116     switch (format->bit_depth_luma_minus8) {
 117     case 0: // 8-bit
 118         pix_fmts[1] = AV_PIX_FMT_NV12;
 119         break;
 120     case 2: // 10-bit
 121         pix_fmts[1] = AV_PIX_FMT_P010;
 122         break;
 123     case 4: // 12-bit
 124         pix_fmts[1] = AV_PIX_FMT_P016;
 125         break;
 126     default:
 127         av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
 128                format->bit_depth_luma_minus8 + 8);
 129         ctx->internal_error = AVERROR(EINVAL);
 130         return 0;
 131     }
 132     surface_fmt = ff_get_format(avctx, pix_fmts);
 133     if (surface_fmt < 0) {
 134         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
 135         ctx->internal_error = AVERROR(EINVAL);
 136         return 0;
 137     }
 138
 139     av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
 140            av_get_pix_fmt_name(avctx->pix_fmt),
 141            av_get_pix_fmt_name(surface_fmt),
 142            av_get_pix_fmt_name(avctx->sw_pix_fmt));
 143
 144     avctx->pix_fmt = surface_fmt;
 145
 146     avctx->width = format->display_area.right;
 147     avctx->height = format->display_area.bottom;
 148
 149     ff_set_sar(avctx, av_div_q(
 150         (AVRational){ format->display_aspect_ratio.x, format->display_aspect_ratio.y },
 151         (AVRational){ avctx->width, avctx->height }));
 152
 153     if (!format->progressive_sequence && ctx->deint_mode == cudaVideoDeinterlaceMode_Weave)
 154         avctx->flags |= AV_CODEC_FLAG_INTERLACED_DCT;
 155     else
 156         avctx->flags &= ~AV_CODEC_FLAG_INTERLACED_DCT;
 157
 158     if (format->video_signal_description.video_full_range_flag)
 159         avctx->color_range = AVCOL_RANGE_JPEG;
 160     else
 161         avctx->color_range = AVCOL_RANGE_MPEG;
 162
 163     avctx->color_primaries = format->video_signal_description.color_primaries;
 164     avctx->color_trc = format->video_signal_description.transfer_characteristics;
 165     avctx->colorspace = format->video_signal_description.matrix_coefficients;
 166
 167     if (format->bitrate)
 168         avctx->bit_rate = format->bitrate;
 169
 170     if (format->frame_rate.numerator && format->frame_rate.denominator) {
 171         avctx->framerate.num = format->frame_rate.numerator;
 172         avctx->framerate.den = format->frame_rate.denominator;
 173     }
 174
 175     if (ctx->cudecoder
 176             && avctx->coded_width == format->coded_width
 177             && avctx->coded_height == format->coded_height
 178             && ctx->chroma_format == format->chroma_format
 179             && ctx->codec_type == format->codec)
 180         return 1;
 181
 182     if (ctx->cudecoder) {
 183         av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
 184         ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
 185         if (ctx->internal_error < 0)
 186             return 0;
 187         ctx->cudecoder = NULL;
 188     }
 189
 190     if (hwframe_ctx->pool && (
 191             hwframe_ctx->width < avctx->width ||
 192             hwframe_ctx->height < avctx->height ||
 193             hwframe_ctx->format != AV_PIX_FMT_CUDA ||
 194             hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
 195         av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
 196         ctx->internal_error = AVERROR(EINVAL);
 197         return 0;
 198     }
 199
 200     if (format->chroma_format != cudaVideoChromaFormat_420) {
 201         av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n");
 202         ctx->internal_error = AVERROR(EINVAL);
 203         return 0;
 204     }
 205
 206     avctx->coded_width = format->coded_width;
 207     avctx->coded_height = format->coded_height;
 208
 209     ctx->chroma_format = format->chroma_format;
 210
 211     memset(&cuinfo, 0, sizeof(cuinfo));
 212
 213     cuinfo.CodecType = ctx->codec_type = format->codec;
 214     cuinfo.ChromaFormat = format->chroma_format;
 215
 216     switch (avctx->sw_pix_fmt) {
 217     case AV_PIX_FMT_NV12:
 218         cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
 219         break;
 220     case AV_PIX_FMT_P010:
 221     case AV_PIX_FMT_P016:
 222         cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
 223         break;
 224     default:
 225         av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
 226         ctx->internal_error = AVERROR(EINVAL);
 227         return 0;
 228     }
 229
 230     cuinfo.ulWidth = avctx->coded_width;
 231     cuinfo.ulHeight = avctx->coded_height;
 232     cuinfo.ulTargetWidth = cuinfo.ulWidth;
 233     cuinfo.ulTargetHeight = cuinfo.ulHeight;
 234
 235     cuinfo.target_rect.left = 0;
 236     cuinfo.target_rect.top = 0;
 237     cuinfo.target_rect.right = cuinfo.ulWidth;
 238     cuinfo.target_rect.bottom = cuinfo.ulHeight;
 239
 240     cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
 241     cuinfo.ulNumOutputSurfaces = 1;
 242     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
 243     cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
 244
 245     if (format->progressive_sequence) {
 246         ctx->deint_mode = cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
 247     } else {
 248         cuinfo.DeinterlaceMode = ctx->deint_mode;
 249     }
 250
 251     if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
 252         avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
 253
 254     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
 255     if (ctx->internal_error < 0)
 256         return 0;
 257
 258     if (!hwframe_ctx->pool) {
 259         hwframe_ctx->format = AV_PIX_FMT_CUDA;
 260         hwframe_ctx->sw_format = avctx->sw_pix_fmt;
 261         hwframe_ctx->width = avctx->width;
 262         hwframe_ctx->height = avctx->height;
 263
 264         if ((ctx->internal_error = av_hwframe_ctx_init(ctx->hwframe)) < 0) {
 265             av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_init failed\n");
 266             return 0;
 267         }
 268     }
 269
 270     return 1;
 271 }
 272
 273 static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* picparams)
 274 {
 275     AVCodecContext *avctx = opaque;
 276     CuvidContext *ctx = avctx->priv_data;
 277
 278     av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
 279
 280     ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
 281     if (ctx->internal_error < 0)
 282         return 0;
 283
 284     return 1;
 285 }
 286
 287 static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINFO* dispinfo)
 288 {
 289     AVCodecContext *avctx = opaque;
 290     CuvidContext *ctx = avctx->priv_data;
 291     CuvidParsedFrame parsed_frame = { { 0 } };
 292
 293     parsed_frame.dispinfo = *dispinfo;
 294     ctx->internal_error = 0;
 295
 296     if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
 297         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 298     } else {
 299         parsed_frame.is_deinterlacing = 1;
 300         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 301         parsed_frame.second_field = 1;
 302         av_fifo_generic_write(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 303     }
 304
 305     return 1;
 306 }
 307
 308 static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 309 {
 310     CuvidContext *ctx = avctx->priv_data;
 311     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 312     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 313     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 314     CUVIDSOURCEDATAPACKET cupkt;
 315     AVPacket filter_packet = { 0 };
 316     AVPacket filtered_packet = { 0 };
 317     int ret = 0, eret = 0, is_flush = ctx->decoder_flushing;
 318
 319     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_packet\n");
 320
 321     if (is_flush && avpkt && avpkt->size)
 322         return AVERROR_EOF;
 323
 324     if ((av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces && avpkt && avpkt->size)
 325         return AVERROR(EAGAIN);
 326
 327     if (ctx->bsf && avpkt && avpkt->size) {
 328         if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) {
 329             av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n");
 330             return ret;
 331         }
 332
 333         if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) {
 334             av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n");
 335             av_packet_unref(&filter_packet);
 336             return ret;
 337         }
 338
 339         if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) {
 340             av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n");
 341             return ret;
 342         }
 343
 344         avpkt = &filtered_packet;
 345     }
 346
 347     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 348     if (ret < 0) {
 349         av_packet_unref(&filtered_packet);
 350         return ret;
 351     }
 352
 353     memset(&cupkt, 0, sizeof(cupkt));
 354
 355     if (avpkt && avpkt->size) {
 356         cupkt.payload_size = avpkt->size;
 357         cupkt.payload = avpkt->data;
 358
 359         if (avpkt->pts != AV_NOPTS_VALUE) {
 360             cupkt.flags = CUVID_PKT_TIMESTAMP;
 361             if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
 362                 cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000});
 363             else
 364                 cupkt.timestamp = avpkt->pts;
 365         }
 366     } else {
 367         cupkt.flags = CUVID_PKT_ENDOFSTREAM;
 368         ctx->decoder_flushing = 1;
 369     }
 370
 371     ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
 372
 373     av_packet_unref(&filtered_packet);
 374
 375     if (ret < 0)
 376         goto error;
 377
 378     // cuvidParseVideoData doesn't return an error just because stuff failed...
 379     if (ctx->internal_error) {
 380         av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n");
 381         ret = ctx->internal_error;
 382         goto error;
 383     }
 384
 385 error:
 386     eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 387
 388     if (eret < 0)
 389         return eret;
 390     else if (ret < 0)
 391         return ret;
 392     else if (is_flush)
 393         return AVERROR_EOF;
 394     else
 395         return 0;
 396 }
 397
 398 static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
 399 {
 400     CuvidContext *ctx = avctx->priv_data;
 401     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 402     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 403     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 404     CUdeviceptr mapped_frame = 0;
 405     int ret = 0, eret = 0;
 406
 407     av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n");
 408
 409     if (ctx->decoder_flushing) {
 410         ret = cuvid_decode_packet(avctx, NULL);
 411         if (ret < 0 && ret != AVERROR_EOF)
 412             return ret;
 413     }
 414
 415     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 416     if (ret < 0)
 417         return ret;
 418
 419     if (av_fifo_size(ctx->frame_queue)) {
 420         CuvidParsedFrame parsed_frame;
 421         CUVIDPROCPARAMS params;
 422         unsigned int pitch = 0;
 423         int offset = 0;
 424         int i;
 425
 426         av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL);
 427
 428         memset(&params, 0, sizeof(params));
 429         params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
 430         params.second_field = parsed_frame.second_field;
 431         params.top_field_first = parsed_frame.dispinfo.top_field_first;
 432
 433         ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
 434         if (ret < 0)
 435             goto error;
 436
 437         if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
 438             ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0);
 439             if (ret < 0) {
 440                 av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n");
 441                 goto error;
 442             }
 443
 444             ret = ff_decode_frame_props(avctx, frame);
 445             if (ret < 0) {
 446                 av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n");
 447                 goto error;
 448             }
 449
 450             for (i = 0; i < 2; i++) {
 451                 CUDA_MEMCPY2D cpy = {
 452                     .srcMemoryType = CU_MEMORYTYPE_DEVICE,
 453                     .dstMemoryType = CU_MEMORYTYPE_DEVICE,
 454                     .srcDevice     = mapped_frame,
 455                     .dstDevice     = (CUdeviceptr)frame->data[i],
 456                     .srcPitch      = pitch,
 457                     .dstPitch      = frame->linesize[i],
 458                     .srcY          = offset,
 459                     .WidthInBytes  = FFMIN(pitch, frame->linesize[i]),
 460                     .Height        = avctx->height >> (i ? 1 : 0),
 461                 };
 462
 463                 ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
 464                 if (ret < 0)
 465                     goto error;
 466
 467                 offset += avctx->coded_height;
 468             }
 469         } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
 470                    avctx->pix_fmt == AV_PIX_FMT_P010 ||
 471                    avctx->pix_fmt == AV_PIX_FMT_P016) {
 472             AVFrame *tmp_frame = av_frame_alloc();
 473             if (!tmp_frame) {
 474                 av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
 475                 ret = AVERROR(ENOMEM);
 476                 goto error;
 477             }
 478
 479             tmp_frame->format        = AV_PIX_FMT_CUDA;
 480             tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe);
 481             tmp_frame->data[0]       = (uint8_t*)mapped_frame;
 482             tmp_frame->linesize[0]   = pitch;
 483             tmp_frame->data[1]       = (uint8_t*)(mapped_frame + avctx->coded_height * pitch);
 484             tmp_frame->linesize[1]   = pitch;
 485             tmp_frame->width         = avctx->width;
 486             tmp_frame->height        = avctx->height;
 487
 488             ret = ff_get_buffer(avctx, frame, 0);
 489             if (ret < 0) {
 490                 av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");
 491                 av_frame_free(&tmp_frame);
 492                 goto error;
 493             }
 494
 495             ret = av_hwframe_transfer_data(frame, tmp_frame, 0);
 496             if (ret) {
 497                 av_log(avctx, AV_LOG_ERROR, "av_hwframe_transfer_data failed\n");
 498                 av_frame_free(&tmp_frame);
 499                 goto error;
 500             }
 501             av_frame_free(&tmp_frame);
 502         } else {
 503             ret = AVERROR_BUG;
 504             goto error;
 505         }
 506
 507         frame->width = avctx->width;
 508         frame->height = avctx->height;
 509         if (avctx->pkt_timebase.num && avctx->pkt_timebase.den)
 510             frame->pts = av_rescale_q(parsed_frame.dispinfo.timestamp, (AVRational){1, 10000000}, avctx->pkt_timebase);
 511         else
 512             frame->pts = parsed_frame.dispinfo.timestamp;
 513
 514         if (parsed_frame.second_field) {
 515             if (ctx->prev_pts == INT64_MIN) {
 516                 ctx->prev_pts = frame->pts;
 517                 frame->pts += (avctx->pkt_timebase.den * avctx->framerate.den) / (avctx->pkt_timebase.num * avctx->framerate.num);
 518             } else {
 519                 int pts_diff = (frame->pts - ctx->prev_pts) / 2;
 520                 ctx->prev_pts = frame->pts;
 521                 frame->pts += pts_diff;
 522             }
 523         }
 524
 525         /* CUVIDs opaque reordering breaks the internal pkt logic.
 526          * So set pkt_pts and clear all the other pkt_ fields.
 527          */
 528 #if FF_API_PKT_PTS
 529 FF_DISABLE_DEPRECATION_WARNINGS
 530         frame->pkt_pts = frame->pts;
 531 FF_ENABLE_DEPRECATION_WARNINGS
 532 #endif
 533         av_frame_set_pkt_pos(frame, -1);
 534         av_frame_set_pkt_duration(frame, 0);
 535         av_frame_set_pkt_size(frame, -1);
 536
 537         frame->interlaced_frame = !parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame;
 538
 539         if (frame->interlaced_frame)
 540             frame->top_field_first = parsed_frame.dispinfo.top_field_first;
 541     } else if (ctx->decoder_flushing) {
 542         ret = AVERROR_EOF;
 543     } else {
 544         ret = AVERROR(EAGAIN);
 545     }
 546
 547 error:
 548     if (mapped_frame)
 549         eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
 550
 551     eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 552
 553     if (eret < 0)
 554         return eret;
 555     else
 556         return ret;
 557 }
 558
 559 static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
 560 {
 561     CuvidContext *ctx = avctx->priv_data;
 562     AVFrame *frame = data;
 563     int ret = 0;
 564
 565     av_log(avctx, AV_LOG_TRACE, "cuvid_decode_frame\n");
 566
 567     if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave) {
 568         av_log(avctx, AV_LOG_ERROR, "Deinterlacing is not supported via the old API\n");
 569         return AVERROR(EINVAL);
 570     }
 571
 572     if (!ctx->decoder_flushing) {
 573         ret = cuvid_decode_packet(avctx, avpkt);
 574         if (ret < 0)
 575             return ret;
 576     }
 577
 578     ret = cuvid_output_frame(avctx, frame);
 579     if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
 580         *got_frame = 0;
 581     } else if (ret < 0) {
 582         return ret;
 583     } else {
 584         *got_frame = 1;
 585     }
 586
 587     return 0;
 588 }
 589
 590 static av_cold int cuvid_decode_end(AVCodecContext *avctx)
 591 {
 592     CuvidContext *ctx = avctx->priv_data;
 593
 594     av_fifo_freep(&ctx->frame_queue);
 595
 596     if (ctx->bsf)
 597         av_bsf_free(&ctx->bsf);
 598
 599     if (ctx->cuparser)
 600         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
 601
 602     if (ctx->cudecoder)
 603         ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
 604
 605     ctx->cudl = NULL;
 606
 607     av_buffer_unref(&ctx->hwframe);
 608     av_buffer_unref(&ctx->hwdevice);
 609
 610     cuvid_free_functions(&ctx->cvdl);
 611
 612     return 0;
 613 }
 614
 615 static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
 616 {
 617     CuvidContext *ctx = avctx->priv_data;
 618     CUVIDDECODECREATEINFO cuinfo;
 619     CUvideodecoder cudec = 0;
 620     int ret = 0;
 621
 622     memset(&cuinfo, 0, sizeof(cuinfo));
 623
 624     cuinfo.CodecType = cuparseinfo->CodecType;
 625     cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
 626     cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
 627
 628     cuinfo.ulWidth = 1280;
 629     cuinfo.ulHeight = 720;
 630     cuinfo.ulTargetWidth = cuinfo.ulWidth;
 631     cuinfo.ulTargetHeight = cuinfo.ulHeight;
 632
 633     cuinfo.target_rect.left = 0;
 634     cuinfo.target_rect.top = 0;
 635     cuinfo.target_rect.right = cuinfo.ulWidth;
 636     cuinfo.target_rect.bottom = cuinfo.ulHeight;
 637
 638     cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
 639     cuinfo.ulNumOutputSurfaces = 1;
 640     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
 641     cuinfo.bitDepthMinus8 = 0;
 642
 643     cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
 644
 645     ret = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&cudec, &cuinfo));
 646     if (ret < 0)
 647         return ret;
 648
 649     ret = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(cudec));
 650     if (ret < 0)
 651         return ret;
 652
 653     return 0;
 654 }
 655
 656 static av_cold int cuvid_decode_init(AVCodecContext *avctx)
 657 {
 658     CuvidContext *ctx = avctx->priv_data;
 659     AVCUDADeviceContext *device_hwctx;
 660     AVHWDeviceContext *device_ctx;
 661     AVHWFramesContext *hwframe_ctx;
 662     CUVIDSOURCEDATAPACKET seq_pkt;
 663     CUcontext cuda_ctx = NULL;
 664     CUcontext dummy;
 665     const AVBitStreamFilter *bsf;
 666     int ret = 0;
 667
 668     enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
 669                                        AV_PIX_FMT_NV12,
 670                                        AV_PIX_FMT_NONE };
 671
 672     // Accelerated transcoding scenarios with 'ffmpeg' require that the
 673     // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
 674     // pix_fmt for non-accelerated transcoding, do not need to be correct
 675     // but need to be set to something. We arbitrarily pick NV12.
 676     ret = ff_get_format(avctx, pix_fmts);
 677     if (ret < 0) {
 678         av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
 679         return ret;
 680     }
 681     avctx->pix_fmt = ret;
 682
 683     ret = cuvid_load_functions(&ctx->cvdl);
 684     if (ret < 0) {
 685         av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
 686         goto error;
 687     }
 688
 689     ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
 690     if (!ctx->frame_queue) {
 691         ret = AVERROR(ENOMEM);
 692         goto error;
 693     }
 694
 695     if (avctx->hw_frames_ctx) {
 696         ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
 697         if (!ctx->hwframe) {
 698             ret = AVERROR(ENOMEM);
 699             goto error;
 700         }
 701
 702         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 703
 704         ctx->hwdevice = av_buffer_ref(hwframe_ctx->device_ref);
 705         if (!ctx->hwdevice) {
 706             ret = AVERROR(ENOMEM);
 707             goto error;
 708         }
 709     } else {
 710         ret = av_hwdevice_ctx_create(&ctx->hwdevice, AV_HWDEVICE_TYPE_CUDA, ctx->cu_gpu, NULL, 0);
 711         if (ret < 0)
 712             goto error;
 713
 714         ctx->hwframe = av_hwframe_ctx_alloc(ctx->hwdevice);
 715         if (!ctx->hwframe) {
 716             av_log(avctx, AV_LOG_ERROR, "av_hwframe_ctx_alloc failed\n");
 717             ret = AVERROR(ENOMEM);
 718             goto error;
 719         }
 720
 721         hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
 722     }
 723
 724     device_ctx = hwframe_ctx->device_ctx;
 725     device_hwctx = device_ctx->hwctx;
 726
 727     cuda_ctx = device_hwctx->cuda_ctx;
 728     ctx->cudl = device_hwctx->internal->cuda_dl;
 729
 730     memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
 731     memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
 732     memset(&seq_pkt, 0, sizeof(seq_pkt));
 733
 734     ctx->cuparseinfo.pExtVideoInfo = &ctx->cuparse_ext;
 735
 736     switch (avctx->codec->id) {
 737 #if CONFIG_H264_CUVID_DECODER
 738     case AV_CODEC_ID_H264:
 739         ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
 740         break;
 741 #endif
 742 #if CONFIG_HEVC_CUVID_DECODER
 743     case AV_CODEC_ID_HEVC:
 744         ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
 745         break;
 746 #endif
 747 #if CONFIG_MJPEG_CUVID_DECODER
 748     case AV_CODEC_ID_MJPEG:
 749         ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
 750         break;
 751 #endif
 752 #if CONFIG_MPEG1_CUVID_DECODER
 753     case AV_CODEC_ID_MPEG1VIDEO:
 754         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
 755         break;
 756 #endif
 757 #if CONFIG_MPEG2_CUVID_DECODER
 758     case AV_CODEC_ID_MPEG2VIDEO:
 759         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
 760         break;
 761 #endif
 762 #if CONFIG_MPEG4_CUVID_DECODER
 763     case AV_CODEC_ID_MPEG4:
 764         ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
 765         break;
 766 #endif
 767 #if CONFIG_VP8_CUVID_DECODER
 768     case AV_CODEC_ID_VP8:
 769         ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
 770         break;
 771 #endif
 772 #if CONFIG_VP9_CUVID_DECODER
 773     case AV_CODEC_ID_VP9:
 774         ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
 775         break;
 776 #endif
 777 #if CONFIG_VC1_CUVID_DECODER
 778     case AV_CODEC_ID_VC1:
 779         ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
 780         break;
 781 #endif
 782     default:
 783         av_log(avctx, AV_LOG_ERROR, "Invalid CUVID codec!\n");
 784         return AVERROR_BUG;
 785     }
 786
 787     if (avctx->codec->id == AV_CODEC_ID_H264 || avctx->codec->id == AV_CODEC_ID_HEVC) {
 788         if (avctx->codec->id == AV_CODEC_ID_H264)
 789             bsf = av_bsf_get_by_name("h264_mp4toannexb");
 790         else
 791             bsf = av_bsf_get_by_name("hevc_mp4toannexb");
 792
 793         if (!bsf) {
 794             ret = AVERROR_BSF_NOT_FOUND;
 795             goto error;
 796         }
 797         if (ret = av_bsf_alloc(bsf, &ctx->bsf)) {
 798             goto error;
 799         }
 800         if (((ret = avcodec_parameters_from_context(ctx->bsf->par_in, avctx)) < 0) || ((ret = av_bsf_init(ctx->bsf)) < 0)) {
 801             av_bsf_free(&ctx->bsf);
 802             goto error;
 803         }
 804
 805         ctx->cuparse_ext.format.seqhdr_data_length = ctx->bsf->par_out->extradata_size;
 806         memcpy(ctx->cuparse_ext.raw_seqhdr_data,
 807                ctx->bsf->par_out->extradata,
 808                FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), ctx->bsf->par_out->extradata_size));
 809     } else if (avctx->extradata_size > 0) {
 810         ctx->cuparse_ext.format.seqhdr_data_length = avctx->extradata_size;
 811         memcpy(ctx->cuparse_ext.raw_seqhdr_data,
 812                avctx->extradata,
 813                FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
 814     }
 815
 816     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
 817     ctx->cuparseinfo.ulMaxDisplayDelay = 4;
 818     ctx->cuparseinfo.pUserData = avctx;
 819     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
 820     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
 821     ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
 822
 823     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 824     if (ret < 0)
 825         goto error;
 826
 827     ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo);
 828     if (ret < 0)
 829         goto error;
 830
 831     ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
 832     if (ret < 0)
 833         goto error;
 834
 835     seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
 836     seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
 837
 838     if (seq_pkt.payload && seq_pkt.payload_size) {
 839         ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
 840         if (ret < 0)
 841             goto error;
 842     }
 843
 844     ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 845     if (ret < 0)
 846         goto error;
 847
 848     ctx->prev_pts = INT64_MIN;
 849
 850     if (!avctx->pkt_timebase.num || !avctx->pkt_timebase.den)
 851         av_log(avctx, AV_LOG_WARNING, "Invalid pkt_timebase, passing timestamps as-is.\n");
 852
 853     return 0;
 854
 855 error:
 856     cuvid_decode_end(avctx);
 857     return ret;
 858 }
 859
 860 static void cuvid_flush(AVCodecContext *avctx)
 861 {
 862     CuvidContext *ctx = avctx->priv_data;
 863     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data;
 864     AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
 865     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 866     CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
 867     int ret;
 868
 869     ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
 870     if (ret < 0)
 871         goto error;
 872
 873     av_fifo_freep(&ctx->frame_queue);
 874
 875     ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
 876     if (!ctx->frame_queue) {
 877         av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
 878         return;
 879     }
 880
 881     if (ctx->cudecoder) {
 882         ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
 883         ctx->cudecoder = NULL;
 884     }
 885
 886     if (ctx->cuparser) {
 887         ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
 888         ctx->cuparser = NULL;
 889     }
 890
 891     ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
 892     if (ret < 0)
 893         goto error;
 894
 895     seq_pkt.payload = ctx->cuparse_ext.raw_seqhdr_data;
 896     seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
 897
 898     if (seq_pkt.payload && seq_pkt.payload_size) {
 899         ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
 900         if (ret < 0)
 901             goto error;
 902     }
 903
 904     ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
 905     if (ret < 0)
 906         goto error;
 907
 908     ctx->prev_pts = INT64_MIN;
 909     ctx->decoder_flushing = 0;
 910
 911     return;
 912  error:
 913     av_log(avctx, AV_LOG_ERROR, "CUDA reinit on flush failed\n");
 914 }
 915
 916 #define OFFSET(x) offsetof(CuvidContext, x)
 917 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 918 static const AVOption options[] = {
 919     { "deint",    "Set deinterlacing mode", OFFSET(deint_mode), AV_OPT_TYPE_INT,   { .i64 = cudaVideoDeinterlaceMode_Weave    }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive, VD, "deint" },
 920     { "weave",    "Weave deinterlacing (do nothing)",        0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave    }, 0, 0, VD, "deint" },
 921     { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
 922     { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
 923     { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
 924     { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
 925     { NULL }
 926 };
 927
 928 #define DEFINE_CUVID_CODEC(x, X) \
 929     static const AVClass x##_cuvid_class = { \
 930         .class_name = #x "_cuvid", \
 931         .item_name = av_default_item_name, \
 932         .option = options, \
 933         .version = LIBAVUTIL_VERSION_INT, \
 934     }; \
 935     AVHWAccel ff_##x##_cuvid_hwaccel = { \
 936         .name           = #x "_cuvid", \
 937         .type           = AVMEDIA_TYPE_VIDEO, \
 938         .id             = AV_CODEC_ID_##X, \
 939         .pix_fmt        = AV_PIX_FMT_CUDA, \
 940     }; \
 941     AVCodec ff_##x##_cuvid_decoder = { \
 942         .name           = #x "_cuvid", \
 943         .long_name      = NULL_IF_CONFIG_SMALL("Nvidia CUVID " #X " decoder"), \
 944         .type           = AVMEDIA_TYPE_VIDEO, \
 945         .id             = AV_CODEC_ID_##X, \
 946         .priv_data_size = sizeof(CuvidContext), \
 947         .priv_class     = &x##_cuvid_class, \
 948         .init           = cuvid_decode_init, \
 949         .close          = cuvid_decode_end, \
 950         .decode         = cuvid_decode_frame, \
 951         .send_packet    = cuvid_decode_packet, \
 952         .receive_frame  = cuvid_output_frame, \
 953         .flush          = cuvid_flush, \
 954         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
 955         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
 956                                                         AV_PIX_FMT_NV12, \
 957                                                         AV_PIX_FMT_P010, \
 958                                                         AV_PIX_FMT_P016, \
 959                                                         AV_PIX_FMT_NONE }, \
 960     };
 961
 962 #if CONFIG_HEVC_CUVID_DECODER
 963 DEFINE_CUVID_CODEC(hevc, HEVC)
 964 #endif
 965
 966 #if CONFIG_H264_CUVID_DECODER
 967 DEFINE_CUVID_CODEC(h264, H264)
 968 #endif
 969
 970 #if CONFIG_MJPEG_CUVID_DECODER
 971 DEFINE_CUVID_CODEC(mjpeg, MJPEG)
 972 #endif
 973
 974 #if CONFIG_MPEG1_CUVID_DECODER
 975 DEFINE_CUVID_CODEC(mpeg1, MPEG1VIDEO)
 976 #endif
 977
 978 #if CONFIG_MPEG2_CUVID_DECODER
 979 DEFINE_CUVID_CODEC(mpeg2, MPEG2VIDEO)
 980 #endif
 981
 982 #if CONFIG_MPEG4_CUVID_DECODER
 983 DEFINE_CUVID_CODEC(mpeg4, MPEG4)
 984 #endif
 985
 986 #if CONFIG_VP8_CUVID_DECODER
 987 DEFINE_CUVID_CODEC(vp8, VP8)
 988 #endif
 989
 990 #if CONFIG_VP9_CUVID_DECODER
 991 DEFINE_CUVID_CODEC(vp9, VP9)
 992 #endif
 993
 994 #if CONFIG_VC1_CUVID_DECODER
 995 DEFINE_CUVID_CODEC(vc1, VC1)
 996 #endif