]> git.sesse.net Git - ffmpeg/blob - libavfilter/vf_scale_cuda.cu
avutil/hwcontext_vulkan: fix format specifiers for some printed variables
[ffmpeg] / libavfilter / vf_scale_cuda.cu
1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22
23 #include "cuda/vector_helpers.cuh"
24
25 template<typename T>
26 __device__ inline void Subsample_Nearest(cudaTextureObject_t tex,
27                                          T *dst,
28                                          int dst_width, int dst_height, int dst_pitch,
29                                          int src_width, int src_height,
30                                          int bit_depth)
31 {
32     int xo = blockIdx.x * blockDim.x + threadIdx.x;
33     int yo = blockIdx.y * blockDim.y + threadIdx.y;
34
35     if (yo < dst_height && xo < dst_width)
36     {
37         float hscale = (float)src_width / (float)dst_width;
38         float vscale = (float)src_height / (float)dst_height;
39         float xi = (xo + 0.5f) * hscale;
40         float yi = (yo + 0.5f) * vscale;
41
42         dst[yo*dst_pitch+xo] = tex2D<T>(tex, xi, yi);
43     }
44 }
45
46 template<typename T>
47 __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex,
48                                           T *dst,
49                                           int dst_width, int dst_height, int dst_pitch,
50                                           int src_width, int src_height,
51                                           int bit_depth)
52 {
53     int xo = blockIdx.x * blockDim.x + threadIdx.x;
54     int yo = blockIdx.y * blockDim.y + threadIdx.y;
55
56     if (yo < dst_height && xo < dst_width)
57     {
58         float hscale = (float)src_width / (float)dst_width;
59         float vscale = (float)src_height / (float)dst_height;
60         float xi = (xo + 0.5f) * hscale;
61         float yi = (yo + 0.5f) * vscale;
62         // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv}
63         float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f);
64         float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f);
65         // Convert weights to two bilinear weights -> {wh,1.0,wh} -> {wh,0.5,0} + {0,0.5,wh}
66         float dx = wh / (0.5f + wh);
67         float dy = wv / (0.5f + wv);
68
69         intT r = { 0 };
70         vec_set_scalar(r, 2);
71         r += tex2D<T>(tex, xi - dx, yi - dy);
72         r += tex2D<T>(tex, xi + dx, yi - dy);
73         r += tex2D<T>(tex, xi - dx, yi + dy);
74         r += tex2D<T>(tex, xi + dx, yi + dy);
75         vec_set(dst[yo*dst_pitch+xo], r >> 2);
76     }
77 }
78
79 extern "C" {
80
81 #define NEAREST_KERNEL(T) \
82     __global__ void Subsample_Nearest_ ## T(cudaTextureObject_t src_tex,                  \
83                                             T *dst,                                       \
84                                             int dst_width, int dst_height, int dst_pitch, \
85                                             int src_width, int src_height,                \
86                                             int bit_depth)                                \
87     {                                                                                     \
88         Subsample_Nearest<T>(src_tex, dst,                                                \
89                               dst_width, dst_height, dst_pitch,                           \
90                               src_width, src_height,                                      \
91                               bit_depth);                                                 \
92     }
93
94 NEAREST_KERNEL(uchar)
95 NEAREST_KERNEL(uchar2)
96 NEAREST_KERNEL(uchar4)
97
98 NEAREST_KERNEL(ushort)
99 NEAREST_KERNEL(ushort2)
100 NEAREST_KERNEL(ushort4)
101
102 #define BILINEAR_KERNEL(T) \
103     __global__ void Subsample_Bilinear_ ## T(cudaTextureObject_t src_tex,                  \
104                                              T *dst,                                       \
105                                              int dst_width, int dst_height, int dst_pitch, \
106                                              int src_width, int src_height,                \
107                                              int bit_depth)                                \
108     {                                                                                      \
109         Subsample_Bilinear<T>(src_tex, dst,                                                \
110                               dst_width, dst_height, dst_pitch,                            \
111                               src_width, src_height,                                       \
112                               bit_depth);                                                  \
113     }
114
115 BILINEAR_KERNEL(uchar)
116 BILINEAR_KERNEL(uchar2)
117 BILINEAR_KERNEL(uchar4)
118
119 BILINEAR_KERNEL(ushort)
120 BILINEAR_KERNEL(ushort2)
121 BILINEAR_KERNEL(ushort4)
122
123 }