git.sesse.net Git - ffmpeg/blob - libavfilter/deshake_opencl_kernel.h

   1 /*
   2  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
   3  * Copyright (C) 2013 Lenny Wang
   4  *
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #ifndef AVFILTER_DESHAKE_OPENCL_KERNEL_H
  24 #define AVFILTER_DESHAKE_OPENCL_KERNEL_H
  25
  26 #include "libavutil/opencl.h"
  27
  28 const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
  29 inline unsigned char pixel(global const unsigned char *src, int x, int y,
  30                            int w, int h,int stride, unsigned char def)
  31 {
  32     return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[x + y * stride];
  33 }
  34
  35 unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
  36                                   int width, int height, int stride, unsigned char def)
  37 {
  38     return pixel(src, (int)(x + 0.5f), (int)(y + 0.5f), width, height, stride, def);
  39 }
  40
  41 unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
  42                                    int width, int height, int stride, unsigned char def)
  43 {
  44     int x_c, x_f, y_c, y_f;
  45     int v1, v2, v3, v4;
  46     x_f = (int)x;
  47     y_f = (int)y;
  48     x_c = x_f + 1;
  49     y_c = y_f + 1;
  50
  51     if (x_f < -1 || x_f > width || y_f < -1 || y_f > height) {
  52         return def;
  53     } else {
  54         v4 = pixel(src, x_f, y_f, width, height, stride, def);
  55         v2 = pixel(src, x_c, y_f, width, height, stride, def);
  56         v3 = pixel(src, x_f, y_c, width, height, stride, def);
  57         v1 = pixel(src, x_c, y_c, width, height, stride, def);
  58         return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
  59                 v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
  60     }
  61 }
  62
  63 unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
  64                                       int width, int height, int stride, unsigned char def)
  65 {
  66     int     x_c, x_f, y_c, y_f;
  67     unsigned char v1,  v2,  v3,  v4;
  68     float   f1,  f2,  f3,  f4;
  69     x_f = (int)x;
  70     y_f = (int)y;
  71     x_c = x_f + 1;
  72     y_c = y_f + 1;
  73
  74     if (x_f < - 1 || x_f > width || y_f < -1 || y_f > height)
  75         return def;
  76     else {
  77         v4 = pixel(src, x_f, y_f, width, height, stride, def);
  78         v2 = pixel(src, x_c, y_f, width, height, stride, def);
  79         v3 = pixel(src, x_f, y_c, width, height, stride, def);
  80         v1 = pixel(src, x_c, y_c, width, height, stride, def);
  81
  82         f1 = 1 - sqrt((x_c - x) * (y_c - y));
  83         f2 = 1 - sqrt((x_c - x) * (y - y_f));
  84         f3 = 1 - sqrt((x - x_f) * (y_c - y));
  85         f4 = 1 - sqrt((x - x_f) * (y - y_f));
  86         return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
  87     }
  88 }
  89
  90 inline const float clipf(float a, float amin, float amax)
  91 {
  92     if      (a < amin) return amin;
  93     else if (a > amax) return amax;
  94     else               return a;
  95 }
  96
  97 inline int mirror(int v, int m)
  98 {
  99     while ((unsigned)v > (unsigned)m) {
 100         v = -v;
 101         if (v < 0)
 102             v += 2 * m;
 103     }
 104     return v;
 105 }
 106
 107 kernel void avfilter_transform_luma(global unsigned char *src,
 108                                     global unsigned char *dst,
 109                                     float4 matrix,
 110                                     int interpolate,
 111                                     int fill,
 112                                     int src_stride_lu,
 113                                     int dst_stride_lu,
 114                                     int height,
 115                                     int width)
 116 {
 117     int x = get_global_id(0);
 118     int y = get_global_id(1);
 119     int idx_dst = y * dst_stride_lu + x;
 120     unsigned char def = 0;
 121     float x_s = x * matrix.x + y * matrix.y + matrix.z;
 122     float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;
 123
 124     if (x < width && y < height) {
 125         switch (fill) {
 126             case 0: //FILL_BLANK
 127                 def = 0;
 128                 break;
 129             case 1: //FILL_ORIGINAL
 130                 def = src[y*src_stride_lu + x];
 131                 break;
 132             case 2: //FILL_CLAMP
 133                 y_s = clipf(y_s, 0, height - 1);
 134                 x_s = clipf(x_s, 0, width - 1);
 135                 def = src[(int)y_s * src_stride_lu + (int)x_s];
 136                 break;
 137             case 3: //FILL_MIRROR
 138                 y_s = mirror(y_s, height - 1);
 139                 x_s = mirror(x_s, width - 1);
 140                 def = src[(int)y_s * src_stride_lu + (int)x_s];
 141                 break;
 142         }
 143         switch (interpolate) {
 144             case 0: //INTERPOLATE_NEAREST
 145                 dst[idx_dst] = interpolate_nearest(x_s, y_s, src, width, height, src_stride_lu, def);
 146                 break;
 147             case 1: //INTERPOLATE_BILINEAR
 148                 dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, width, height, src_stride_lu, def);
 149                 break;
 150             case 2: //INTERPOLATE_BIQUADRATIC
 151                 dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, width, height, src_stride_lu, def);
 152                 break;
 153             default:
 154                 return;
 155         }
 156     }
 157 }
 158
 159 kernel void avfilter_transform_chroma(global unsigned char *src,
 160                                       global unsigned char *dst,
 161                                       float4 matrix,
 162                                       int interpolate,
 163                                       int fill,
 164                                       int src_stride_lu,
 165                                       int dst_stride_lu,
 166                                       int src_stride_ch,
 167                                       int dst_stride_ch,
 168                                       int height,
 169                                       int width,
 170                                       int ch,
 171                                       int cw)
 172 {
 173
 174     int x = get_global_id(0);
 175     int y = get_global_id(1);
 176     int pad_ch = get_global_size(1)>>1;
 177     global unsigned char *dst_u = dst + height * dst_stride_lu;
 178     global unsigned char *src_u = src + height * src_stride_lu;
 179     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
 180     global unsigned char *src_v = src_u + ch * src_stride_ch;
 181     src = y < pad_ch ? src_u : src_v;
 182     dst = y < pad_ch ? dst_u : dst_v;
 183     y = select(y - pad_ch, y, y < pad_ch);
 184     float x_s = x * matrix.x + y * matrix.y + matrix.z;
 185     float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;
 186     int idx_dst = y * dst_stride_ch + x;
 187     unsigned char def;
 188
 189     if (x < cw && y < ch) {
 190         switch (fill) {
 191             case 0: //FILL_BLANK
 192                 def = 0;
 193                 break;
 194             case 1: //FILL_ORIGINAL
 195                 def = src[y*src_stride_ch + x];
 196                 break;
 197             case 2: //FILL_CLAMP
 198                 y_s = clipf(y_s, 0, ch - 1);
 199                 x_s = clipf(x_s, 0, cw - 1);
 200                 def = src[(int)y_s * src_stride_ch + (int)x_s];
 201                 break;
 202             case 3: //FILL_MIRROR
 203                 y_s = mirror(y_s, ch - 1);
 204                 x_s = mirror(x_s, cw - 1);
 205                 def = src[(int)y_s * src_stride_ch + (int)x_s];
 206                 break;
 207         }
 208         switch (interpolate) {
 209             case 0: //INTERPOLATE_NEAREST
 210                 dst[idx_dst] = interpolate_nearest(x_s, y_s, src, cw, ch, src_stride_ch, def);
 211                 break;
 212             case 1: //INTERPOLATE_BILINEAR
 213                 dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, cw, ch, src_stride_ch, def);
 214                 break;
 215             case 2: //INTERPOLATE_BIQUADRATIC
 216                 dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, cw, ch, src_stride_ch, def);
 217                 break;
 218             default:
 219                 return;
 220         }
 221     }
 222 }
 223 );
 224
 225 #endif /* AVFILTER_DESHAKE_OPENCL_KERNEL_H */