]> git.sesse.net Git - ffmpeg/blob - libavfilter/vf_scale_cuda.cu
avfilter/scale_cuda: code cleanup
[ffmpeg] / libavfilter / vf_scale_cuda.cu
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22
23 #include "cuda/vector_helpers.cuh"
24
25 template<typename T>
26 __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
27                                           T *dst,
28                                           int dst_width, int dst_height, int dst_pitch,
29                                           int src_width, int src_height,
30                                           int bit_depth)
31 {
32     int xo = blockIdx.x * blockDim.x + threadIdx.x;
33     int yo = blockIdx.y * blockDim.y + threadIdx.y;
34
35     if (yo < dst_height && xo < dst_width)
36     {
37         float hscale = (float)src_width / (float)dst_width;
38         float vscale = (float)src_height / (float)dst_height;
39         float xi = (xo + 0.5f) * hscale;
40         float yi = (yo + 0.5f) * vscale;
41         // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv}
42         float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f);
43         float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f);
44         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
45         float dx = wh / (0.5f + wh);
46         float dy = wv / (0.5f + wv);
47
48         intT r = { 0 };
49         vec_set_scalar(r, 2);
50         r += tex2D<T>(tex, xi - dx, yi - dy);
51         r += tex2D<T>(tex, xi + dx, yi - dy);
52         r += tex2D<T>(tex, xi - dx, yi + dy);
53         r += tex2D<T>(tex, xi + dx, yi + dy);
54         vec_set(dst[yo*dst_pitch+xo], r >> 2);
55     }
56 }
57
58 extern "C" {
59
60 #define BILINEAR_KERNEL(T) \
61     __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex,                  \
62                                              T *dst,                                       \
63                                              int dst_width, int dst_height, int dst_pitch, \
64                                              int src_width, int src_height,                \
65                                              int bit_depth)                                \
66     {                                                                                      \
67         Subsample_Bilinear<T>(src_tex, dst,                                                \
68                               dst_width, dst_height, dst_pitch,                            \
69                               src_width, src_height,                                       \
70                               bit_depth);                                                  \
71     }
72
73 BILINEAR_KERNEL(uchar)
74 BILINEAR_KERNEL(uchar2)
75 BILINEAR_KERNEL(uchar4)
76
77 BILINEAR_KERNEL(ushort)
78 BILINEAR_KERNEL(ushort2)
79 BILINEAR_KERNEL(ushort4)
80
81 }