avcodec/dpx: add support for 10bit gray

[ffmpeg] / libavcodec / nvenc.c
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c

index f3193f1cc74555ccbabe6171b2dc1afd51b62e86..97497be0bcf78f45c52a5e5b762a40675aabeeb0 100644 (file)
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -25,12 +25,15 @@
  
  #include "libavutil/hwcontext_cuda.h"
  #include "libavutil/hwcontext.h"
+#include "libavutil/cuda_check.h"
  #include "libavutil/imgutils.h"
  #include "libavutil/avassert.h"
  #include "libavutil/mem.h"
  #include "libavutil/pixdesc.h"
  #include "internal.h"
  
+#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, dl_fn->cuda_dl, x)
+
  #define NVENC_CAP 0x30
  #define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR ||             \
                      rc == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ || \
@@ -183,37 +186,23 @@ static int nvenc_push_context(AVCodecContext *avctx)
  {
      NvencContext *ctx            = avctx->priv_data;
      NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
  
      if (ctx->d3d11_device)
          return 0;
  
-    cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
-
-    return 0;
+    return CHECK_CU(dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context));
  }
  
  static int nvenc_pop_context(AVCodecContext *avctx)
  {
      NvencContext *ctx            = avctx->priv_data;
      NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
-    CUresult cu_res;
      CUcontext dummy;
  
      if (ctx->d3d11_device)
          return 0;
  
-    cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
-        return AVERROR_EXTERNAL;
-    }
-
-    return 0;
+    return CHECK_CU(dl_fn->cuda_dl->cuCtxPopCurrent(&dummy));
  }
  
  static av_cold int nvenc_open_session(AVCodecContext *avctx)
@@ -406,32 +395,23 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
      NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
      char name[128] = { 0};
      int major, minor, ret;
-    CUresult cu_res;
      CUdevice cu_device;
      int loglevel = AV_LOG_VERBOSE;
  
      if (ctx->device == LIST_DEVICES)
          loglevel = AV_LOG_INFO;
  
-    cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Cannot access the CUDA device %d\n",
-               idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx));
+    if (ret < 0)
+        return ret;
  
-    cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device));
+    if (ret < 0)
+        return ret;
  
-    cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx);
-        return -1;
-    }
+    ret = CHECK_CU(dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device));
+    if (ret < 0)
+        return ret;
  
      av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
      if (((major << 4) | minor) < NVENC_CAP) {
@@ -442,11 +422,9 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
      if (ctx->device != idx && ctx->device != ANY_DEVICE)
          return -1;
  
-    cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device);
-    if (cu_res != CUDA_SUCCESS) {
-        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
+    ret = CHECK_CU(dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device));
+    if (ret < 0)
          goto fail;
-    }
  
      ctx->cu_context = ctx->cu_context_internal;
  
@@ -477,7 +455,7 @@ fail3:
          return ret;
  
  fail2:
-    dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+    CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
      ctx->cu_context_internal = NULL;
  
  fail:
@@ -555,17 +533,11 @@ static av_cold int nvenc_setup_device(AVCodecContext *avctx)
      } else {
          int i, nb_devices = 0;
  
-        if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Cannot init CUDA\n");
+        if (CHECK_CU(dl_fn->cuda_dl->cuInit(0)) < 0)
              return AVERROR_UNKNOWN;
-        }
  
-        if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Cannot enumerate the CUDA devices\n");
+        if (CHECK_CU(dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) < 0)
              return AVERROR_UNKNOWN;
-        }
  
          if (!nb_devices) {
              av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
@@ -1460,7 +1432,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
      ctx->nvencoder = NULL;
  
      if (ctx->cu_context_internal)
-        dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
+        CHECK_CU(dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal));
      ctx->cu_context = ctx->cu_context_internal = NULL;
  
  #if CONFIG_D3D11VA
@@ -1486,21 +1458,19 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
      int ret;
  
      if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == AV_PIX_FMT_D3D11) {
-        if (avctx->hw_frames_ctx) {
-            AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-            if (frames_ctx->format != avctx->pix_fmt) {
-                av_log(avctx, AV_LOG_ERROR,
-                       "hw_frames_ctx must match the GPU frame type\n");
-                return AVERROR(EINVAL);
-            }
-            ctx->data_pix_fmt = frames_ctx->sw_format;
-        } else if (avctx->sw_pix_fmt && avctx->sw_pix_fmt != AV_PIX_FMT_NONE) {
-            ctx->data_pix_fmt = avctx->sw_pix_fmt;
-        } else {
+        AVHWFramesContext *frames_ctx;
+        if (!avctx->hw_frames_ctx) {
              av_log(avctx, AV_LOG_ERROR,
-                   "either hw_frames_ctx or sw_pix_fmt is required for hw frame input\n");
+                   "hw_frames_ctx must be set when using GPU frames as input\n");
              return AVERROR(EINVAL);
          }
+        frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+        if (frames_ctx->format != avctx->pix_fmt) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "hw_frames_ctx must match the GPU frame type\n");
+            return AVERROR(EINVAL);
+        }
+        ctx->data_pix_fmt = frames_ctx->sw_format;
      } else {
          ctx->data_pix_fmt = avctx->pix_fmt;
      }
@@ -1603,15 +1573,10 @@ static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
      NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
      NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
  
-    enum AVPixelFormat sw_format = ctx->data_pix_fmt;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
      NV_ENC_REGISTER_RESOURCE reg;
      int i, idx, ret;
  
-    if (frame->hw_frames_ctx) {
-        AVHWFramesContext *frames_ctx = (AVHWFramesContext*)frame->hw_frames_ctx->data;
-        sw_format = frames_ctx->sw_format;
-    }
-
      for (i = 0; i < ctx->nb_registered_frames; i++) {
          if (avctx->pix_fmt == AV_PIX_FMT_CUDA && ctx->registered_frames[i].ptr == frame->data[0])
              return i;
@@ -1624,8 +1589,8 @@ static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
          return idx;
  
      reg.version            = NV_ENC_REGISTER_RESOURCE_VER;
-    reg.width              = frame->width;
-    reg.height             = frame->height;
+    reg.width              = frames_ctx->width;
+    reg.height             = frames_ctx->height;
      reg.pitch              = frame->linesize[0];
      reg.resourceToRegister = frame->data[0];
  
@@ -1637,10 +1602,10 @@ static int nvenc_register_frame(AVCodecContext *avctx, const AVFrame *frame)
          reg.subResourceIndex = (intptr_t)frame->data[1];
      }
  
-    reg.bufferFormat       = nvenc_map_buffer_format(sw_format);
+    reg.bufferFormat       = nvenc_map_buffer_format(frames_ctx->sw_format);
      if (reg.bufferFormat == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
          av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format: %s\n",
-               av_get_pix_fmt_name(sw_format));
+               av_get_pix_fmt_name(frames_ctx->sw_format));
          return AVERROR(EINVAL);
      }
  
@@ -1948,7 +1913,7 @@ static int output_ready(AVCodecContext *avctx, int flush)
      return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);
  }
  
-static int reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
+static void reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
  {
      NvencContext *ctx = avctx->priv_data;
      NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
@@ -2038,8 +2003,6 @@ static int reconfig_encoder(AVCodecContext *avctx, const AVFrame *frame)
  
          }
      }
-
-    return 0;
  }
  
  int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
@@ -2060,12 +2023,18 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
      if ((!ctx->cu_context && !ctx->d3d11_device) || !ctx->nvencoder)
          return AVERROR(EINVAL);
  
-    if (ctx->encoder_flushing)
-        return AVERROR_EOF;
+    if (ctx->encoder_flushing) {
+        if (avctx->internal->draining)
+            return AVERROR_EOF;
  
-    if (frame) {
-        reconfig_encoder(avctx, frame);
+        ctx->encoder_flushing = 0;
+        ctx->first_packet_output = 0;
+        ctx->initial_pts[0] = AV_NOPTS_VALUE;
+        ctx->initial_pts[1] = AV_NOPTS_VALUE;
+        av_fifo_reset(ctx->timestamp_list);
+    }
  
+    if (frame) {
          in_surf = get_free_frame(ctx);
          if (!in_surf)
              return AVERROR(EAGAIN);
@@ -2074,6 +2043,8 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
          if (res < 0)
              return res;
  
+        reconfig_encoder(avctx, frame);
+
          res = nvenc_upload_frame(avctx, frame, in_surf);
  
          res2 = nvenc_pop_context(avctx);
@@ -2108,7 +2079,7 @@ int ff_nvenc_send_frame(AVCodecContext *avctx, const AVFrame *frame)
  
          pic_params.inputTimeStamp = frame->pts;
  
-        if (av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
+        if (ctx->a53_cc && av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC)) {
              if (ff_alloc_a53_sei(frame, sizeof(NV_ENC_SEI_PAYLOAD), (void**)&sei_data, &sei_size) < 0) {
                  av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
              }