git.sesse.net Git - ffmpeg/blob - libavfilter/opencl/unsharp.cl

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 __kernel void unsharp_global(__write_only image2d_t dst,
  20                              __read_only  image2d_t src,
  21                              int size_x,
  22                              int size_y,
  23                              float amount,
  24                              __constant float *coef_matrix)
  25 {
  26     const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  27                                CLK_FILTER_NEAREST);
  28     int2 loc    = (int2)(get_global_id(0), get_global_id(1));
  29     int2 centre = (int2)(size_x / 2, size_y / 2);
  30
  31     float4 val = read_imagef(src, sampler, loc);
  32     float4 sum = 0.0f;
  33     int x, y;
  34
  35     for (y = 0; y < size_y; y++) {
  36         for (x = 0; x < size_x; x++) {
  37             int2 pos = loc + (int2)(x, y) - centre;
  38             sum += coef_matrix[y * size_x + x] *
  39                 read_imagef(src, sampler, pos);
  40         }
  41     }
  42
  43     write_imagef(dst, loc, val + (val - sum) * amount);
  44 }
  45
  46 __kernel void unsharp_local(__write_only image2d_t dst,
  47                             __read_only  image2d_t src,
  48                             int size_x,
  49                             int size_y,
  50                             float amount,
  51                             __constant float *coef_x,
  52                             __constant float *coef_y)
  53 {
  54     const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
  55                                CLK_ADDRESS_CLAMP_TO_EDGE |
  56                                CLK_FILTER_NEAREST);
  57     int2 block = (int2)(get_group_id(0), get_group_id(1)) * 16;
  58     int2 pos   = (int2)(get_local_id(0), get_local_id(1));
  59
  60     __local float4 tmp[32][32];
  61
  62     int rad_x = size_x / 2;
  63     int rad_y = size_y / 2;
  64     int x, y;
  65
  66     for (y = 0; y <= 1; y++) {
  67         for (x = 0; x <= 1; x++) {
  68             tmp[pos.y + 16 * y][pos.x + 16 * x] =
  69                 read_imagef(src, sampler, block + pos + (int2)(16 * x - 8, 16 * y - 8));
  70         }
  71     }
  72
  73     barrier(CLK_LOCAL_MEM_FENCE);
  74
  75     float4 val = tmp[pos.y + 8][pos.x + 8];
  76
  77     float4 horiz[2];
  78     for (y = 0; y <= 1; y++) {
  79         horiz[y] = 0.0f;
  80         for (x = 0; x < size_x; x++)
  81             horiz[y] += coef_x[x] * tmp[pos.y + y * 16][pos.x + 8 + x - rad_x];
  82     }
  83
  84     barrier(CLK_LOCAL_MEM_FENCE);
  85
  86     for (y = 0; y <= 1; y++) {
  87         tmp[pos.y + y * 16][pos.x + 8] = horiz[y];
  88     }
  89
  90     barrier(CLK_LOCAL_MEM_FENCE);
  91
  92     float4 sum = 0.0f;
  93     for (y = 0; y < size_y; y++)
  94         sum += coef_y[y] * tmp[pos.y + 8 + y - rad_y][pos.x + 8];
  95
  96     if (block.x + pos.x < get_image_width(dst) &&
  97         block.y + pos.y < get_image_height(dst))
  98         write_imagef(dst, block + pos, val + (val - sum) * amount);
  99 }