]> git.sesse.net Git - ffmpeg/blobdiff - libavfilter/vf_scale_cuda.c
avcodec/packet_internal: move the next pointer in PacketList to the top of the struct
[ffmpeg] / libavfilter / vf_scale_cuda.c
index fb585e5edc53539b89bd19b47526f6cd1dd76e7b..d97c7df273a07ad82ed9a2decd755b891618ae55 100644 (file)
@@ -421,10 +421,14 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel
         .res.pitch2D.numChannels = channels,
         .res.pitch2D.width = src_width,
         .res.pitch2D.height = src_height,
-        .res.pitch2D.pitchInBytes = src_pitch * pixel_size,
+        .res.pitch2D.pitchInBytes = src_pitch,
         .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
     };
 
+    // Handling of channels is done via vector-types in cuda, so their size is implicitly part of the pitch
+    // Same for pixel_size, which is represented via datatypes on the cuda side of things.
+    dst_pitch /= channels * pixel_size;
+
     ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
     if (ret < 0)
         goto exit;
@@ -477,16 +481,16 @@ static int scalecuda_resize(AVFilterContext *ctx,
         break;
     case AV_PIX_FMT_YUV444P16:
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[0], in->width, in->height, in->linesize[0] / 2,
-                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
                            2, 16);
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[1], in->width, in->height, in->linesize[1] / 2,
-                           out->data[1], out->width, out->height, out->linesize[1] / 2,
+                           in->data[1], in->width, in->height, in->linesize[1],
+                           out->data[1], out->width, out->height, out->linesize[1],
                            2, 16);
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[2], in->width, in->height, in->linesize[2] / 2,
-                           out->data[2], out->width, out->height, out->linesize[2] / 2,
+                           in->data[2], in->width, in->height, in->linesize[2],
+                           out->data[2], out->width, out->height, out->linesize[2],
                            2, 16);
         break;
     case AV_PIX_FMT_NV12:
@@ -496,34 +500,34 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            1, 8);
         call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                            in->data[1], in->width / 2, in->height / 2, in->linesize[1],
-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 2,
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
                            1, 8);
         break;
     case AV_PIX_FMT_P010LE:
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[0], in->width, in->height, in->linesize[0] / 2,
-                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
                            2, 10);
         call_resize_kernel(ctx, s->cu_func_ushort2, 2,
-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
+                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
                            2, 10);
         break;
     case AV_PIX_FMT_P016LE:
         call_resize_kernel(ctx, s->cu_func_ushort, 1,
-                           in->data[0], in->width, in->height, in->linesize[0] / 2,
-                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           in->data[0], in->width, in->height, in->linesize[0],
+                           out->data[0], out->width, out->height, out->linesize[0],
                            2, 16);
         call_resize_kernel(ctx, s->cu_func_ushort2, 2,
-                           in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
-                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
+                           in->data[1], in->width / 2, in->height / 2, in->linesize[1],
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1],
                            2, 16);
         break;
     case AV_PIX_FMT_0RGB32:
     case AV_PIX_FMT_0BGR32:
         call_resize_kernel(ctx, s->cu_func_uchar4, 4,
                            in->data[0], in->width, in->height, in->linesize[0],
-                           out->data[0], out->width, out->height, out->linesize[0] / 4,
+                           out->data[0], out->width, out->height, out->linesize[0],
                            1, 8);
         break;
     default:
@@ -660,7 +664,7 @@ static const AVFilterPad cudascale_outputs[] = {
     { NULL }
 };
 
-AVFilter ff_vf_scale_cuda = {
+const AVFilter ff_vf_scale_cuda = {
     .name      = "scale_cuda",
     .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),