git.sesse.net Git - ffmpeg/blob - libavfilter/opencl/convolution.cl

   1 /*
   2  * Copyright (c) 2018 Danil Iashchenko
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 __kernel void convolution_global(__write_only image2d_t dst,
  22                                  __read_only  image2d_t src,
  23                                  int coef_matrix_dim,
  24                                  __constant float *coef_matrix,
  25                                  float div,
  26                                  float bias)
  27 {
  28     const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  29                                CLK_ADDRESS_CLAMP_TO_EDGE   |
  30                                CLK_FILTER_NEAREST);
  31
  32     const int half_matrix_dim = (coef_matrix_dim / 2);
  33     int2 loc = (int2)(get_global_id(0), get_global_id(1));
  34     float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
  35
  36     for (int conv_i = -half_matrix_dim; conv_i <= half_matrix_dim; conv_i++) {
  37         for (int conv_j = -half_matrix_dim; conv_j <= half_matrix_dim; conv_j++) {
  38             float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, conv_i));
  39             convPix += px * coef_matrix[(conv_i + half_matrix_dim) * coef_matrix_dim +
  40                                         (conv_j + half_matrix_dim)];
  41         }
  42      }
  43      float4 dstPix = convPix * div + bias;
  44      write_imagef(dst, loc, dstPix);
  45 }
  46
  47
  48 __kernel void sobel_global(__write_only image2d_t dst,
  49                            __read_only  image2d_t src,
  50                              float div,
  51                              float bias)
  52 {
  53     const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  54                                CLK_ADDRESS_CLAMP_TO_EDGE   |
  55                                CLK_FILTER_NEAREST);
  56
  57     int2 loc = (int2)(get_global_id(0), get_global_id(1));
  58
  59     float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
  60                   read_imagef(src, sampler, loc + (int2)( 0,-1)) * -2 +
  61                   read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
  62                   read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
  63                   read_imagef(src, sampler, loc + (int2)( 0, 1)) *  2 +
  64                   read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
  65
  66     float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
  67                   read_imagef(src, sampler, loc + (int2)(-1, 0)) * -2 +
  68                   read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
  69                   read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
  70                   read_imagef(src, sampler, loc + (int2)( 1, 0)) *  2 +
  71                   read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
  72
  73     float4 dstPix = hypot(sum1, sum2) * div + bias;
  74     write_imagef(dst, loc, dstPix);
  75 }
  76
  77 __kernel void prewitt_global(__write_only image2d_t dst,
  78                              __read_only  image2d_t src,
  79                              float div,
  80                              float bias)
  81 {
  82     const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  83                                CLK_ADDRESS_CLAMP_TO_EDGE   |
  84                                CLK_FILTER_NEAREST);
  85
  86     int2 loc = (int2)(get_global_id(0), get_global_id(1));
  87
  88     float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
  89                   read_imagef(src, sampler, loc + (int2)( 0,-1)) *  1 +
  90                   read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
  91                   read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
  92                   read_imagef(src, sampler, loc + (int2)( 0, 1)) * -1 +
  93                   read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
  94
  95     float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
  96                   read_imagef(src, sampler, loc + (int2)(-1, 0)) *  1 +
  97                   read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
  98                   read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
  99                   read_imagef(src, sampler, loc + (int2)( 1, 0)) * -1 +
 100                   read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
 101
 102     float4 dstPix = hypot(sum1, sum2) * div + bias;
 103     write_imagef(dst, loc, dstPix);
 104 }
 105
 106 __kernel void roberts_global(__write_only image2d_t dst,
 107                              __read_only  image2d_t src,
 108                              float div,
 109                              float bias)
 110 {
 111     const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
 112                                CLK_ADDRESS_CLAMP_TO_EDGE   |
 113                                CLK_FILTER_NEAREST);
 114
 115     int2 loc = (int2)(get_global_id(0), get_global_id(1));
 116
 117     float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
 118                   read_imagef(src, sampler, loc + (int2)( 0,-1)) * -1;
 119
 120
 121     float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1, 0)) * -1 +
 122                   read_imagef(src, sampler, loc + (int2)( 0, 0)) *  1;
 123
 124
 125     float4 dstPix = hypot(sum1, sum2) * div + bias;
 126     write_imagef(dst, loc, dstPix);
 127 }