X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Fvf_scale_cuda.c;h=0a73ea142287c280bf92f7c297173cbc1c77812b;hb=9691e2a4264b9859061efaaf818b528add45656f;hp=c97a802ddc6d9380768c9e28c92fb2b4d964af75;hpb=5c363d3e595a9e5b7c42897b7aab91b91b154ac1;p=ffmpeg

diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index c97a802ddc6..0a73ea14228 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -43,7 +43,8 @@ static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_NV12,
     AV_PIX_FMT_YUV444P,
     AV_PIX_FMT_P010,
-    AV_PIX_FMT_P016
+    AV_PIX_FMT_P016,
+    AV_PIX_FMT_YUV444P16,
 };
 
 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -357,7 +358,7 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, int channel
         .res.pitch2D.numChannels = channels,
         .res.pitch2D.width = src_width,
         .res.pitch2D.height = src_height,
-        .res.pitch2D.pitchInBytes = src_pitch,
+        .res.pitch2D.pitchInBytes = src_pitch * pixel_size,
         .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
     };
 
@@ -389,12 +390,12 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            out->data[0], out->width, out->height, out->linesize[0],
                            1);
         call_resize_kernel(ctx, s->cu_func_uchar, 1,
-                           in->data[0]+in->linesize[0]*in->height, in->width/2, in->height/2, in->linesize[0]/2,
-                           out->data[0]+out->linesize[0]*out->height, out->width/2, out->height/2, out->linesize[0]/2,
+                           in->data[1], in->width/2, in->height/2, in->linesize[0]/2,
+                           out->data[1], out->width/2, out->height/2, out->linesize[0]/2,
                            1);
         call_resize_kernel(ctx, s->cu_func_uchar, 1,
-                           in->data[0]+ ALIGN_UP((in->linesize[0]*in->height*5)/4, s->tex_alignment), in->width/2, in->height/2, in->linesize[0]/2,
-                           out->data[0]+(out->linesize[0]*out->height*5)/4, out->width/2, out->height/2, out->linesize[0]/2,
+                           in->data[2], in->width/2, in->height/2, in->linesize[0]/2,
+                           out->data[2], out->width/2, out->height/2, out->linesize[0]/2,
                            1);
         break;
     case AV_PIX_FMT_YUV444P:
@@ -403,14 +404,28 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            out->data[0], out->width, out->height, out->linesize[0],
                            1);
         call_resize_kernel(ctx, s->cu_func_uchar, 1,
-                           in->data[0]+in->linesize[0]*in->height, in->width, in->height, in->linesize[0],
-                           out->data[0]+out->linesize[0]*out->height, out->width, out->height, out->linesize[0],
+                           in->data[1], in->width, in->height, in->linesize[0],
+                           out->data[1], out->width, out->height, out->linesize[0],
                            1);
         call_resize_kernel(ctx, s->cu_func_uchar, 1,
-                           in->data[0]+in->linesize[0]*in->height*2, in->width, in->height, in->linesize[0],
-                           out->data[0]+out->linesize[0]*out->height*2, out->width, out->height, out->linesize[0],
+                           in->data[2], in->width, in->height, in->linesize[0],
+                           out->data[2], out->width, out->height, out->linesize[0],
                            1);
         break;
+    case AV_PIX_FMT_YUV444P16:
+        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+                           in->data[0], in->width, in->height, in->linesize[0] / 2,
+                           out->data[0], out->width, out->height, out->linesize[0] / 2,
+                           2);
+        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+                           in->data[1], in->width, in->height, in->linesize[1] / 2,
+                           out->data[1], out->width, out->height, out->linesize[1] / 2,
+                           2);
+        call_resize_kernel(ctx, s->cu_func_ushort, 1,
+                           in->data[2], in->width, in->height, in->linesize[2] / 2,
+                           out->data[2], out->width, out->height, out->linesize[2] / 2,
+                           2);
+        break;
     case AV_PIX_FMT_NV12:
         call_resize_kernel(ctx, s->cu_func_uchar, 1,
                            in->data[0], in->width, in->height, in->linesize[0],
@@ -418,7 +433,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            1);
         call_resize_kernel(ctx, s->cu_func_uchar2, 2,
                            in->data[1], in->width/2, in->height/2, in->linesize[1],
-                           out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width/2, out->height/2, out->linesize[1]/2,
+                           out->data[1], out->width/2, out->height/2, out->linesize[1]/2,
                            1);
         break;
     case AV_PIX_FMT_P010LE:
@@ -428,7 +443,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            2);
         call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                            in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
-                           out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                            2);
         break;
     case AV_PIX_FMT_P016LE:
@@ -438,7 +453,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
                            2);
         call_resize_kernel(ctx, s->cu_func_ushort2, 2,
                            in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
-                           out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
+                           out->data[1], out->width / 2, out->height / 2, out->linesize[1] / 4,
                            2);
         break;
     default:
@@ -466,6 +481,9 @@ static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
     av_frame_move_ref(out, s->frame);
     av_frame_move_ref(s->frame, s->tmp_frame);
 
+    s->frame->width  = s->planes_out[0].width;
+    s->frame->height = s->planes_out[0].height;
+
     ret = av_frame_copy_props(out, in);
     if (ret < 0)
         return ret;