git.sesse.net Git - ffmpeg/blob - libavfilter/vf_scale_cuda.cu

   1 /*
   2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20  * DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 #include "cuda/vector_helpers.cuh"
  24
  25 template<typename T>
  26 __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
  27                                           T *dst,
  28                                           int dst_width, int dst_height, int dst_pitch,
  29                                           int src_width, int src_height,
  30                                           int bit_depth)
  31 {
  32     int xo = blockIdx.x * blockDim.x + threadIdx.x;
  33     int yo = blockIdx.y * blockDim.y + threadIdx.y;
  34
  35     if (yo < dst_height && xo < dst_width)
  36     {
  37         float hscale = (float)src_width / (float)dst_width;
  38         float vscale = (float)src_height / (float)dst_height;
  39         float xi = (xo + 0.5f) * hscale;
  40         float yi = (yo + 0.5f) * vscale;
  41         // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv}
  42         float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f);
  43         float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f);
  44         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
  45         float dx = wh / (0.5f + wh);
  46         float dy = wv / (0.5f + wv);
  47
  48         intT r = { 0 };
  49         vec_set_scalar(r, 2);
  50         r += tex2D<T>(tex, xi - dx, yi - dy);
  51         r += tex2D<T>(tex, xi + dx, yi - dy);
  52         r += tex2D<T>(tex, xi - dx, yi + dy);
  53         r += tex2D<T>(tex, xi + dx, yi + dy);
  54         vec_set(dst[yo*dst_pitch+xo], r >> 2);
  55     }
  56 }
  57
  58 extern "C" {
  59
  60 #define BILINEAR_KERNEL(T) \
  61     __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex,                  \
  62                                              T *dst,                                       \
  63                                              int dst_width, int dst_height, int dst_pitch, \
  64                                              int src_width, int src_height,                \
  65                                              int bit_depth)                                \
  66     {                                                                                      \
  67         Subsample_Bilinear<T>(src_tex, dst,                                                \
  68                               dst_width, dst_height, dst_pitch,                            \
  69                               src_width, src_height,                                       \
  70                               bit_depth);                                                  \
  71     }
  72
  73 BILINEAR_KERNEL(uchar)
  74 BILINEAR_KERNEL(uchar2)
  75 BILINEAR_KERNEL(uchar4)
  76
  77 BILINEAR_KERNEL(ushort)
  78 BILINEAR_KERNEL(ushort2)
  79 BILINEAR_KERNEL(ushort4)
  80
  81 }