avutil/buffer: Switch AVBuffer API to size_t

[ffmpeg] / libavutil / hwcontext_cuda.c
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c

index 58d128a280c5eb68a765ac3a5f3af63dcd8e0c4b..cfdf2d7fd42961559f6ee58eb12686304e8b13db 100644 (file)
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -30,10 +30,9 @@
  #include "pixfmt.h"
  #include "imgutils.h"
  
-#define CUDA_FRAME_ALIGNMENT 256
-
  typedef struct CUDAFramesContext {
      int shift_width, shift_height;
+    int tex_alignment;
  } CUDAFramesContext;
  
  static const enum AVPixelFormat supported_formats[] = {
@@ -94,7 +93,7 @@ static void cuda_buffer_free(void *opaque, uint8_t *data)
      CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  }
  
-static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
+static AVBufferRef *cuda_pool_alloc(void *opaque, size_t size)
  {
      AVHWFramesContext        *ctx = opaque;
      AVHWDeviceContext *device_ctx = ctx->device_ctx;
@@ -127,8 +126,11 @@ fail:
  
  static int cuda_frames_init(AVHWFramesContext *ctx)
  {
-    CUDAFramesContext *priv = ctx->internal->priv;
-    int i;
+    AVHWDeviceContext *device_ctx = ctx->device_ctx;
+    AVCUDADeviceContext    *hwctx = device_ctx->hwctx;
+    CUDAFramesContext       *priv = ctx->internal->priv;
+    CudaFunctions             *cu = hwctx->internal->cuda_dl;
+    int err, i;
  
      for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
          if (ctx->sw_format == supported_formats[i])
@@ -140,10 +142,24 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
          return AVERROR(ENOSYS);
      }
  
+    err = CHECK_CU(cu->cuDeviceGetAttribute(&priv->tex_alignment,
+                                            14 /* CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT */,
+                                            hwctx->internal->cuda_device));
+    if (err < 0)
+        return err;
+
+    av_log(ctx, AV_LOG_DEBUG, "CUDA texture alignment: %d\n", priv->tex_alignment);
+
+    // YUV420P is a special case.
+    // Since nvenc expects the U/V planes to have half the linesize of the Y plane
+    // alignment has to be doubled to ensure the U/V planes still end up aligned.
+    if (ctx->sw_format == AV_PIX_FMT_YUV420P)
+        priv->tex_alignment *= 2;
+
      av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
  
      if (!ctx->pool) {
-        int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
+        int size = av_image_get_buffer_size(ctx->sw_format, ctx->width, ctx->height, priv->tex_alignment);
          if (size < 0)
              return size;
  
@@ -157,6 +173,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
  
  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
  {
+    CUDAFramesContext *priv = ctx->internal->priv;
      int res;
  
      frame->buf[0] = av_buffer_pool_get(ctx->pool);
@@ -164,7 +181,7 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
          return AVERROR(ENOMEM);
  
      res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data,
-                               ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
+                               ctx->sw_format, ctx->width, ctx->height, priv->tex_alignment);
      if (res < 0)
          return res;
  
@@ -173,7 +190,7 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
      if (ctx->sw_format == AV_PIX_FMT_YUV420P) {
          frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2;
          frame->data[2]     = frame->data[1];
-        frame->data[1]     = frame->data[2] + frame->linesize[2] * ctx->height / 2;
+        frame->data[1]     = frame->data[2] + frame->linesize[2] * (ctx->height / 2);
      }
  
      frame->format = AV_PIX_FMT_CUDA;
@@ -395,7 +412,7 @@ error:
  }
  
  static int cuda_device_derive(AVHWDeviceContext *device_ctx,
-                              AVHWDeviceContext *src_ctx,
+                              AVHWDeviceContext *src_ctx, AVDictionary *opts,
                                int flags) {
      AVCUDADeviceContext *hwctx = device_ctx->hwctx;
      CudaFunctions *cu;