Merge commit 'facdfe40805559963b5875931af9406ed5ddcd5c'

[ffmpeg] / libavcodec / cuvid.c
diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c

index eafce0ac19074cdc67275477b551592d629840e4..9b35476f2a51cb30b0a4a0c68a79c0fc07e6c066 100644 (file)
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -19,21 +19,20 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
+#include "compat/cuda/dynlink_loader.h"
+
  #include "libavutil/buffer.h"
  #include "libavutil/mathematics.h"
  #include "libavutil/hwcontext.h"
-#include "libavutil/hwcontext_cuda.h"
+#include "libavutil/hwcontext_cuda_internal.h"
  #include "libavutil/fifo.h"
  #include "libavutil/log.h"
  #include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
  
  #include "avcodec.h"
  #include "internal.h"
  
-#include "compat/cuda/nvcuvid.h"
-
-#define MAX_FRAME_COUNT 25
-
  typedef struct CuvidContext
  {
      AVClass *avclass;
@@ -42,6 +41,7 @@ typedef struct CuvidContext
      CUvideoparser cuparser;
  
      char *cu_gpu;
+    int nb_surfaces;
  
      AVBufferRef *hwdevice;
      AVBufferRef *hwframe;
@@ -61,6 +61,9 @@ typedef struct CuvidContext
  
      CUVIDPARSERPARAMS cuparseinfo;
      CUVIDEOFORMATEX cuparse_ext;
+
+    CudaFunctions *cudl;
+    CuvidFunctions *cvdl;
  } CuvidContext;
  
  typedef struct CuvidParsedFrame
@@ -72,6 +75,7 @@ typedef struct CuvidParsedFrame
  
  static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
  {
+    CuvidContext *ctx = avctx->priv_data;
      const char *err_name;
      const char *err_string;
  
@@ -80,8 +84,8 @@ static int check_cu(AVCodecContext *avctx, CUresult err, const char *func)
      if (err == CUDA_SUCCESS)
          return 0;
  
-    cuGetErrorName(err, &err_name);
-    cuGetErrorString(err, &err_string);
+    ctx->cudl->cuGetErrorName(err, &err_name);
+    ctx->cudl->cuGetErrorString(err, &err_string);
  
      av_log(avctx, AV_LOG_ERROR, "%s failed", func);
      if (err_name && err_string)
@@ -99,11 +103,46 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
      CuvidContext *ctx = avctx->priv_data;
      AVHWFramesContext *hwframe_ctx = (AVHWFramesContext*)ctx->hwframe->data;
      CUVIDDECODECREATEINFO cuinfo;
+    int surface_fmt;
+
+    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_CUDA,
+                                       AV_PIX_FMT_NONE,  // Will be updated below
+                                       AV_PIX_FMT_NONE };
  
      av_log(avctx, AV_LOG_TRACE, "pfnSequenceCallback, progressive_sequence=%d\n", format->progressive_sequence);
  
      ctx->internal_error = 0;
  
+    switch (format->bit_depth_luma_minus8) {
+    case 0: // 8-bit
+        pix_fmts[1] = AV_PIX_FMT_NV12;
+        break;
+    case 2: // 10-bit
+        pix_fmts[1] = AV_PIX_FMT_P010;
+        break;
+    case 4: // 12-bit
+        pix_fmts[1] = AV_PIX_FMT_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "unsupported bit depth: %d\n",
+               format->bit_depth_luma_minus8 + 8);
+        ctx->internal_error = AVERROR(EINVAL);
+        return 0;
+    }
+    surface_fmt = ff_get_format(avctx, pix_fmts);
+    if (surface_fmt < 0) {
+        av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", surface_fmt);
+        ctx->internal_error = AVERROR(EINVAL);
+        return 0;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Formats: Original: %s | HW: %s | SW: %s\n",
+           av_get_pix_fmt_name(avctx->pix_fmt),
+           av_get_pix_fmt_name(surface_fmt),
+           av_get_pix_fmt_name(avctx->sw_pix_fmt));
+
+    avctx->pix_fmt = surface_fmt;
+
      avctx->width = format->display_area.right;
      avctx->height = format->display_area.bottom;
  
@@ -142,7 +181,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
  
      if (ctx->cudecoder) {
          av_log(avctx, AV_LOG_TRACE, "Re-initializing decoder\n");
-        ctx->internal_error = CHECK_CU(cuvidDestroyDecoder(ctx->cudecoder));
+        ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder));
          if (ctx->internal_error < 0)
              return 0;
          ctx->cudecoder = NULL;
@@ -152,7 +191,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
              hwframe_ctx->width < avctx->width ||
              hwframe_ctx->height < avctx->height ||
              hwframe_ctx->format != AV_PIX_FMT_CUDA ||
-            hwframe_ctx->sw_format != AV_PIX_FMT_NV12)) {
+            hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
          av_log(avctx, AV_LOG_ERROR, "AVHWFramesContext is already initialized with incompatible parameters\n");
          ctx->internal_error = AVERROR(EINVAL);
          return 0;
@@ -173,7 +212,20 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
  
      cuinfo.CodecType = ctx->codec_type = format->codec;
      cuinfo.ChromaFormat = format->chroma_format;
-    cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+
+    switch (avctx->sw_pix_fmt) {
+    case AV_PIX_FMT_NV12:
+        cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
+        break;
+    case AV_PIX_FMT_P010:
+    case AV_PIX_FMT_P016:
+        cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n");
+        ctx->internal_error = AVERROR(EINVAL);
+        return 0;
+    }
  
      cuinfo.ulWidth = avctx->coded_width;
      cuinfo.ulHeight = avctx->coded_height;
@@ -185,7 +237,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
      cuinfo.target_rect.right = cuinfo.ulWidth;
      cuinfo.target_rect.bottom = cuinfo.ulHeight;
  
-    cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
+    cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
      cuinfo.ulNumOutputSurfaces = 1;
      cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
      cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
@@ -199,13 +251,13 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
      if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave)
          avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
  
-    ctx->internal_error = CHECK_CU(cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
+    ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
      if (ctx->internal_error < 0)
          return 0;
  
      if (!hwframe_ctx->pool) {
          hwframe_ctx->format = AV_PIX_FMT_CUDA;
-        hwframe_ctx->sw_format = AV_PIX_FMT_NV12;
+        hwframe_ctx->sw_format = avctx->sw_pix_fmt;
          hwframe_ctx->width = avctx->width;
          hwframe_ctx->height = avctx->height;
  
@@ -225,7 +277,7 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic
  
      av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");
  
-    ctx->internal_error = CHECK_CU(cuvidDecodePicture(ctx->cudecoder, picparams));
+    ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
      if (ctx->internal_error < 0)
          return 0;
  
@@ -236,8 +288,9 @@ static int CUDAAPI cuvid_handle_picture_display(void *opaque, CUVIDPARSERDISPINF
  {
      AVCodecContext *avctx = opaque;
      CuvidContext *ctx = avctx->priv_data;
-    CuvidParsedFrame parsed_frame = { *dispinfo, 0, 0 };
+    CuvidParsedFrame parsed_frame = { { 0 } };
  
+    parsed_frame.dispinfo = *dispinfo;
      ctx->internal_error = 0;
  
      if (ctx->deint_mode == cudaVideoDeinterlaceMode_Weave) {
@@ -268,7 +321,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
      if (is_flush && avpkt && avpkt->size)
          return AVERROR_EOF;
  
-    if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
+    if ((av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces && avpkt && avpkt->size)
          return AVERROR(EAGAIN);
  
      if (ctx->bsf && avpkt && avpkt->size) {
@@ -291,7 +344,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
          avpkt = &filtered_packet;
      }
  
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
      if (ret < 0) {
          av_packet_unref(&filtered_packet);
          return ret;
@@ -315,7 +368,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
          ctx->decoder_flushing = 1;
      }
  
-    ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt));
+    ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &cupkt));
  
      av_packet_unref(&filtered_packet);
  
@@ -330,7 +383,7 @@ static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
      }
  
  error:
-    eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  
      if (eret < 0)
          return eret;
@@ -359,7 +412,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
              return ret;
      }
  
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
      if (ret < 0)
          return ret;
  
@@ -377,7 +430,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
          params.second_field = parsed_frame.second_field;
          params.top_field_first = parsed_frame.dispinfo.top_field_first;
  
-        ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
+        ret = CHECK_CU(ctx->cvdl->cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, &params));
          if (ret < 0)
              goto error;
  
@@ -407,13 +460,15 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                      .Height        = avctx->height >> (i ? 1 : 0),
                  };
  
-                ret = CHECK_CU(cuMemcpy2D(&cpy));
+                ret = CHECK_CU(ctx->cudl->cuMemcpy2D(&cpy));
                  if (ret < 0)
                      goto error;
  
                  offset += avctx->coded_height;
              }
-        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
+        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 ||
+                   avctx->pix_fmt == AV_PIX_FMT_P010 ||
+                   avctx->pix_fmt == AV_PIX_FMT_P016) {
              AVFrame *tmp_frame = av_frame_alloc();
              if (!tmp_frame) {
                  av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n");
@@ -443,7 +498,6 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
                  av_frame_free(&tmp_frame);
                  goto error;
              }
-
              av_frame_free(&tmp_frame);
          } else {
              ret = AVERROR_BUG;
@@ -492,9 +546,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
  
  error:
      if (mapped_frame)
-        eret = CHECK_CU(cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
+        eret = CHECK_CU(ctx->cvdl->cuvidUnmapVideoFrame(ctx->cudecoder, mapped_frame));
  
-    eret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    eret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
  
      if (eret < 0)
          return eret;
@@ -543,19 +597,27 @@ static av_cold int cuvid_decode_end(AVCodecContext *avctx)
          av_bsf_free(&ctx->bsf);
  
      if (ctx->cuparser)
-        cuvidDestroyVideoParser(ctx->cuparser);
+        ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
  
      if (ctx->cudecoder)
-        cuvidDestroyDecoder(ctx->cudecoder);
+        ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
+
+    ctx->cudl = NULL;
  
      av_buffer_unref(&ctx->hwframe);
      av_buffer_unref(&ctx->hwdevice);
  
+    cuvid_free_functions(&ctx->cvdl);
+
      return 0;
  }
  
-static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cuparseinfo)
+static int cuvid_test_dummy_decoder(AVCodecContext *avctx,
+                                    const CUVIDPARSERPARAMS *cuparseinfo,
+                                    int probed_width,
+                                    int probed_height)
  {
+    CuvidContext *ctx = avctx->priv_data;
      CUVIDDECODECREATEINFO cuinfo;
      CUvideodecoder cudec = 0;
      int ret = 0;
@@ -566,8 +628,8 @@ static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cu
      cuinfo.ChromaFormat = cudaVideoChromaFormat_420;
      cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
  
-    cuinfo.ulWidth = 1280;
-    cuinfo.ulHeight = 720;
+    cuinfo.ulWidth = probed_width;
+    cuinfo.ulHeight = probed_height;
      cuinfo.ulTargetWidth = cuinfo.ulWidth;
      cuinfo.ulTargetHeight = cuinfo.ulHeight;
  
@@ -576,18 +638,18 @@ static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cu
      cuinfo.target_rect.right = cuinfo.ulWidth;
      cuinfo.target_rect.bottom = cuinfo.ulHeight;
  
-    cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
+    cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
      cuinfo.ulNumOutputSurfaces = 1;
      cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
      cuinfo.bitDepthMinus8 = 0;
  
      cuinfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  
-    ret = CHECK_CU(cuvidCreateDecoder(&cudec, &cuinfo));
+    ret = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&cudec, &cuinfo));
      if (ret < 0)
          return ret;
  
-    ret = CHECK_CU(cuvidDestroyDecoder(cudec));
+    ret = CHECK_CU(ctx->cvdl->cuvidDestroyDecoder(cudec));
      if (ret < 0)
          return ret;
  
@@ -610,20 +672,32 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
                                         AV_PIX_FMT_NV12,
                                         AV_PIX_FMT_NONE };
  
+    int probed_width = avctx->coded_width ? avctx->coded_width : 1280;
+    int probed_height = avctx->coded_height ? avctx->coded_height : 720;
+
+    // Accelerated transcoding scenarios with 'ffmpeg' require that the
+    // pix_fmt be set to AV_PIX_FMT_CUDA early. The sw_pix_fmt, and the
+    // pix_fmt for non-accelerated transcoding, do not need to be correct
+    // but need to be set to something. We arbitrarily pick NV12.
      ret = ff_get_format(avctx, pix_fmts);
      if (ret < 0) {
          av_log(avctx, AV_LOG_ERROR, "ff_get_format failed: %d\n", ret);
          return ret;
      }
+    avctx->pix_fmt = ret;
+
+    ret = cuvid_load_functions(&ctx->cvdl);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
+        goto error;
+    }
  
-    ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
+    ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
      if (!ctx->frame_queue) {
          ret = AVERROR(ENOMEM);
          goto error;
      }
  
-    avctx->pix_fmt = ret;
-
      if (avctx->hw_frames_ctx) {
          ctx->hwframe = av_buffer_ref(avctx->hw_frames_ctx);
          if (!ctx->hwframe) {
@@ -655,7 +729,9 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
  
      device_ctx = hwframe_ctx->device_ctx;
      device_hwctx = device_ctx->hwctx;
+
      cuda_ctx = device_hwctx->cuda_ctx;
+    ctx->cudl = device_hwctx->internal->cuda_dl;
  
      memset(&ctx->cuparseinfo, 0, sizeof(ctx->cuparseinfo));
      memset(&ctx->cuparse_ext, 0, sizeof(ctx->cuparse_ext));
@@ -743,22 +819,24 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
                 FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
      }
  
-    ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
+    ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
      ctx->cuparseinfo.ulMaxDisplayDelay = 4;
      ctx->cuparseinfo.pUserData = avctx;
      ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
      ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
      ctx->cuparseinfo.pfnDisplayPicture = cuvid_handle_picture_display;
  
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
      if (ret < 0)
          goto error;
  
-    ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo);
+    ret = cuvid_test_dummy_decoder(avctx, &ctx->cuparseinfo,
+                                   probed_width,
+                                   probed_height);
      if (ret < 0)
          goto error;
  
-    ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
+    ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
      if (ret < 0)
          goto error;
  
@@ -766,12 +844,12 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
      seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  
      if (seq_pkt.payload && seq_pkt.payload_size) {
-        ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+        ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
          if (ret < 0)
              goto error;
      }
  
-    ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
      if (ret < 0)
          goto error;
  
@@ -796,29 +874,29 @@ static void cuvid_flush(AVCodecContext *avctx)
      CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
      int ret;
  
-    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
+    ret = CHECK_CU(ctx->cudl->cuCtxPushCurrent(cuda_ctx));
      if (ret < 0)
          goto error;
  
      av_fifo_freep(&ctx->frame_queue);
  
-    ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
+    ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
      if (!ctx->frame_queue) {
          av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
          return;
      }
  
      if (ctx->cudecoder) {
-        cuvidDestroyDecoder(ctx->cudecoder);
+        ctx->cvdl->cuvidDestroyDecoder(ctx->cudecoder);
          ctx->cudecoder = NULL;
      }
  
      if (ctx->cuparser) {
-        cuvidDestroyVideoParser(ctx->cuparser);
+        ctx->cvdl->cuvidDestroyVideoParser(ctx->cuparser);
          ctx->cuparser = NULL;
      }
  
-    ret = CHECK_CU(cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
+    ret = CHECK_CU(ctx->cvdl->cuvidCreateVideoParser(&ctx->cuparser, &ctx->cuparseinfo));
      if (ret < 0)
          goto error;
  
@@ -826,12 +904,12 @@ static void cuvid_flush(AVCodecContext *avctx)
      seq_pkt.payload_size = ctx->cuparse_ext.format.seqhdr_data_length;
  
      if (seq_pkt.payload && seq_pkt.payload_size) {
-        ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &seq_pkt));
+        ret = CHECK_CU(ctx->cvdl->cuvidParseVideoData(ctx->cuparser, &seq_pkt));
          if (ret < 0)
              goto error;
      }
  
-    ret = CHECK_CU(cuCtxPopCurrent(&dummy));
+    ret = CHECK_CU(ctx->cudl->cuCtxPopCurrent(&dummy));
      if (ret < 0)
          goto error;
  
@@ -851,6 +929,7 @@ static const AVOption options[] = {
      { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
      { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
      { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
+    { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
      { NULL }
  };
  
@@ -883,6 +962,8 @@ static const AVOption options[] = {
          .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
          .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, \
                                                          AV_PIX_FMT_NV12, \
+                                                        AV_PIX_FMT_P010, \
+                                                        AV_PIX_FMT_P016, \
                                                          AV_PIX_FMT_NONE }, \
      };