]> git.sesse.net Git - ffmpeg/blobdiff - libavfilter/vf_scale_cuda.c
avfilter: Constify all AVFilters
[ffmpeg] / libavfilter / vf_scale_cuda.c
index 5405e6a4ed565d19b6a115d433a878c9882780d6..d97c7df273a07ad82ed9a2decd755b891618ae55 100644 (file)
@@ -48,6 +48,8 @@ static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_P010,
     AV_PIX_FMT_P016,
     AV_PIX_FMT_YUV444P16,
+    AV_PIX_FMT_0RGB32,
+    AV_PIX_FMT_0BGR32,
 };
 
 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -419,10 +421,14 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel
         .res.pitch2D.numChannels = channels,
         .res.pitch2D.width = src_width,
         .res.pitch2D.height = src_height,
-        .res.pitch2D.pitchInBytes = src_pitch * pixel_size,
+        .res.pitch2D.pitchInBytes = src_pitch,
         .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
     };
 
+    // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch
+    // Same for pixel_size, which is represented via datatypes on the cuda side of things.
+    dst_pitch /= channels * pixel_size;
+
     ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
     if (ret < 0)
         goto exit;
@@ -475,16 +481,16 @@ static int scalecuda_resize(AVFilterContext *ctx,
         break;
     case AV_PIX_FMT_YUV444P16:
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[0], in->width, in->height, in->linesize[0] / 2,
-                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
                            2, 16);
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[1], in->width, in->height, in->linesize[1] / 2,
-                           out->data[1], out->width, out->height, out->linesize[1] / 2,
+                           in->data[1], in->width, in->height, in->linesize[1],
+                           out->data[1], out->width, out->height, out->linesize[1],
                            2, 16);
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[2], in->width, in->height, in->linesize[2] / 2,
-                           out->data[2], out->width, out->height, out->linesize[2] / 2,
+                           in->data[2], in->width, in->height, in->linesize[2],
+                           out->data[2], out->width, out->height, out->linesize[2],
                            2, 16);
         break;
     case AV_PIX_FMT_NV12:
@@ -494,29 +500,36 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            1, 8);
         call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                            in->data[1], in->width / 2, in->height / 2, in->linesize[1],
-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 2,
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
                            1, 8);
         break;
     case AV_PIX_FMT_P010LE:
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[0], in->width, in->height, in->linesize[0] / 2,
-                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
                            2, 10);
         call_resize_kernel(ctx, s->cu_func_ushort2, 2,
-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
+                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
                            2, 10);
         break;
     case AV_PIX_FMT_P016LE:
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[0], in->width, in->height, in->linesize[0] / 2,
-                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
                            2, 16);
         call_resize_kernel(ctx, s->cu_func_ushort2, 2,
-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
+                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
                            2, 16);
         break;
+    case AV_PIX_FMT_0RGB32:
+    case AV_PIX_FMT_0BGR32:
+        call_resize_kernel(ctx, s->cu_func_uchar4, 4,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
+                           1, 8);
+        break;
     default:
         return AVERROR_BUG;
     }
@@ -651,7 +664,7 @@ static const AVFilterPad cudascale_outputs[] = {
     { NULL }
 };
 
-AVFilter ff_vf_scale_cuda = {
+const AVFilter ff_vf_scale_cuda = {
     .name      = "scale_cuda",
     .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),