From: Timo Rothenpieler <timo@rothenpieler.org>
Date: Tue, 3 Nov 2020 18:28:06 +0000 (+0100)
Subject: avfilter/scale_cuda: add nearest neighbour algorithm
X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=4ad7af085cd3db473bf035394d7d934800461bdf;hp=15c0e038ce90c3c1e13e80ea4fcf56c327b686f4;p=ffmpeg

avfilter/scale_cuda: add nearest neighbour algorithm
---

diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index dc565cda89f..dfa638dbf7e 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -56,6 +56,7 @@ static const enum AVPixelFormat supported_formats[] = {
 enum {
     INTERP_ALGO_DEFAULT,
 
+    INTERP_ALGO_NEAREST,
     INTERP_ALGO_BILINEAR,
     INTERP_ALGO_BICUBIC,
 
@@ -273,6 +274,12 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink)
     extern char vf_scale_cuda_bicubic_ptx[];
 
     switch(s->interp_algo) {
+    case INTERP_ALGO_NEAREST:
+        scaler_ptx = vf_scale_cuda_ptx;
+        function_infix = "_Nearest";
+        s->interp_use_linear = 0;
+        s->interp_as_integer = 1;
+        break;
     case INTERP_ALGO_BILINEAR:
         scaler_ptx = vf_scale_cuda_ptx;
         function_infix = "_Bilinear";
@@ -591,6 +598,7 @@ static const AVOption options[] = {
     { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS },
     { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS },
     { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = INTERP_ALGO_DEFAULT }, 0, INTERP_ALGO_COUNT - 1, FLAGS, "interp_algo" },
+        { "nearest",  "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_NEAREST }, 0, 0, FLAGS, "interp_algo" },
         { "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BILINEAR }, 0, 0, FLAGS, "interp_algo" },
         { "bicubic",  "bicubic",  0, AV_OPT_TYPE_CONST, { .i64 = INTERP_ALGO_BICUBIC  }, 0, 0, FLAGS, "interp_algo" },
     { "passthrough", "Do not process frames at all if parameters match", OFFSET(passthrough), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index 24b11512158..44eef535fda 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -22,6 +22,27 @@
 
 #include "cuda/vector_helpers.cuh"
 
+template<typename T>
+__device__ inline void Subsample_Nearest(cudaTextureObject_t tex,
+                                         T *dst,
+                                         int dst_width, int dst_height, int dst_pitch,
+                                         int src_width, int src_height,
+                                         int bit_depth)
+{
+    int xo = blockIdx.x * blockDim.x + threadIdx.x;
+    int yo = blockIdx.y * blockDim.y + threadIdx.y;
+
+    if (yo < dst_height && xo < dst_width)
+    {
+        float hscale = (float)src_width / (float)dst_width;
+        float vscale = (float)src_height / (float)dst_height;
+        float xi = (xo + 0.5f) * hscale;
+        float yi = (yo + 0.5f) * vscale;
+
+        dst[yo*dst_pitch+xo] = tex2D<T>(tex, xi, yi);
+    }
+}
+
 template<typename T>
 __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
                                           T *dst,
@@ -57,6 +78,27 @@ __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
 
 extern "C" {
 
+#define NEAREST_KERNEL(T) \
+    __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex,                  \
+                                            T *dst,                                       \
+                                            int dst_width, int dst_height, int dst_pitch, \
+                                            int src_width, int src_height,                \
+                                            int bit_depth)                                \
+    {                                                                                     \
+        Subsample_Nearest<T>(src_tex, dst,                                                \
+                              dst_width, dst_height, dst_pitch,                           \
+                              src_width, src_height,                                      \
+                              bit_depth);                                                 \
+    }
+
+NEAREST_KERNEL(uchar)
+NEAREST_KERNEL(uchar2)
+NEAREST_KERNEL(uchar4)
+
+NEAREST_KERNEL(ushort)
+NEAREST_KERNEL(ushort2)
+NEAREST_KERNEL(ushort4)
+
 #define BILINEAR_KERNEL(T) \
     __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex,                  \
                                              T *dst,                                       \