]> git.sesse.net Git - ffmpeg/blob - libavfilter/unsharp_opencl.c
Merge commit '545a0b807cf45b2bbc4c9087a297b741ce00f508'
[ffmpeg] / libavfilter / unsharp_opencl.c
1 /*
2  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 /**
22  * @file
23  * unsharp input video
24  */
25
26 #include "unsharp_opencl.h"
27 #include "libavutil/common.h"
28 #include "libavutil/opencl_internal.h"
29
30 #define PLANE_NUM 3
31
32 static inline void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
33 {
34     int i;
35     for (i = 0; i < len; i++) {
36         dst[i] = counter1[i] + counter2[i];
37     }
38 }
39
40 static int compute_mask(int step, uint32_t *mask)
41 {
42     int i, z, ret = 0;
43     int counter_size = sizeof(uint32_t) * (2 * step + 1);
44     uint32_t *temp1_counter, *temp2_counter, **counter;
45     temp1_counter = av_mallocz(counter_size);
46     if (!temp1_counter) {
47         ret = AVERROR(ENOMEM);
48         goto end;
49     }
50     temp2_counter = av_mallocz(counter_size);
51     if (!temp2_counter) {
52         ret = AVERROR(ENOMEM);
53         goto end;
54     }
55     counter = av_mallocz(sizeof(uint32_t *) * (2 * step + 1));
56     if (!counter) {
57         ret = AVERROR(ENOMEM);
58         goto end;
59     }
60     for (i = 0; i < 2 * step + 1; i++) {
61         counter[i] = av_mallocz(counter_size);
62         if (!counter[i]) {
63             ret = AVERROR(ENOMEM);
64             goto end;
65         }
66     }
67     for (i = 0; i < 2 * step + 1; i++) {
68         memset(temp1_counter, 0, counter_size);
69         temp1_counter[i] = 1;
70         for (z = 0; z < step * 2; z += 2) {
71             add_mask_counter(temp2_counter, counter[z], temp1_counter, step * 2);
72             memcpy(counter[z], temp1_counter, counter_size);
73             add_mask_counter(temp1_counter, counter[z + 1], temp2_counter, step * 2);
74             memcpy(counter[z + 1], temp2_counter, counter_size);
75         }
76     }
77     memcpy(mask, temp1_counter, counter_size);
78 end:
79     av_freep(&temp1_counter);
80     av_freep(&temp2_counter);
81     for (i = 0; i < 2 * step + 1; i++) {
82         av_freep(&counter[i]);
83     }
84     av_freep(&counter);
85     return ret;
86 }
87
88 static int compute_mask_matrix(cl_mem cl_mask_matrix, int step_x, int step_y)
89 {
90     int i, j, ret = 0;
91     uint32_t *mask_matrix, *mask_x, *mask_y;
92     size_t size_matrix = sizeof(uint32_t) * (2 * step_x + 1) * (2 * step_y + 1);
93     mask_x = av_mallocz(sizeof(uint32_t) * (2 * step_x + 1));
94     if (!mask_x) {
95         ret = AVERROR(ENOMEM);
96         goto end;
97     }
98     mask_y = av_mallocz(sizeof(uint32_t) * (2 * step_y + 1));
99     if (!mask_y) {
100         ret = AVERROR(ENOMEM);
101         goto end;
102     }
103     mask_matrix = av_mallocz(size_matrix);
104     if (!mask_matrix) {
105         ret = AVERROR(ENOMEM);
106         goto end;
107     }
108     ret = compute_mask(step_x, mask_x);
109     if (ret < 0)
110         goto end;
111     ret = compute_mask(step_y, mask_y);
112     if (ret < 0)
113         goto end;
114     for (j = 0; j < 2 * step_y + 1; j++) {
115         for (i = 0; i < 2 * step_x + 1; i++) {
116             mask_matrix[i + j * (2 * step_x + 1)] = mask_y[j] * mask_x[i];
117         }
118     }
119     ret = av_opencl_buffer_write(cl_mask_matrix, (uint8_t *)mask_matrix, size_matrix);
120 end:
121     av_freep(&mask_x);
122     av_freep(&mask_y);
123     av_freep(&mask_matrix);
124     return ret;
125 }
126
127 static int generate_mask(AVFilterContext *ctx)
128 {
129     UnsharpContext *unsharp = ctx->priv;
130     int i, ret = 0, step_x[2], step_y[2];
131     cl_mem mask_matrix[2];
132     mask_matrix[0] = unsharp->opencl_ctx.cl_luma_mask;
133     mask_matrix[1] = unsharp->opencl_ctx.cl_chroma_mask;
134     step_x[0] = unsharp->luma.steps_x;
135     step_x[1] = unsharp->chroma.steps_x;
136     step_y[0] = unsharp->luma.steps_y;
137     step_y[1] = unsharp->chroma.steps_y;
138     if (!mask_matrix[0] || !mask_matrix[1]) {
139         av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
140         return AVERROR(EINVAL);
141     }
142     for (i = 0; i < 2; i++) {
143         ret = compute_mask_matrix(mask_matrix[i], step_x[i], step_y[i]);
144         if (ret < 0)
145             return ret;
146     }
147     return ret;
148 }
149
150 int ff_opencl_apply_unsharp(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
151 {
152     int ret;
153     AVFilterLink *link = ctx->inputs[0];
154     UnsharpContext *unsharp = ctx->priv;
155     cl_int status;
156     int cw = FF_CEIL_RSHIFT(link->w, unsharp->hsub);
157     int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
158     const size_t global_work_size = link->w * link->h + 2 * ch * cw;
159     FFOpenclParam opencl_param = {0};
160
161     opencl_param.ctx = ctx;
162     opencl_param.kernel = unsharp->opencl_ctx.kernel_env.kernel;
163     ret = ff_opencl_set_parameter(&opencl_param,
164                                   FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
165                                   FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
166                                   FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
167                                   FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
168                                   FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
169                                   FF_OPENCL_PARAM_INFO(unsharp->chroma.amount),
170                                   FF_OPENCL_PARAM_INFO(unsharp->luma.steps_x),
171                                   FF_OPENCL_PARAM_INFO(unsharp->luma.steps_y),
172                                   FF_OPENCL_PARAM_INFO(unsharp->chroma.steps_x),
173                                   FF_OPENCL_PARAM_INFO(unsharp->chroma.steps_y),
174                                   FF_OPENCL_PARAM_INFO(unsharp->luma.scalebits),
175                                   FF_OPENCL_PARAM_INFO(unsharp->chroma.scalebits),
176                                   FF_OPENCL_PARAM_INFO(unsharp->luma.halfscale),
177                                   FF_OPENCL_PARAM_INFO(unsharp->chroma.halfscale),
178                                   FF_OPENCL_PARAM_INFO(in->linesize[0]),
179                                   FF_OPENCL_PARAM_INFO(in->linesize[1]),
180                                   FF_OPENCL_PARAM_INFO(out->linesize[0]),
181                                   FF_OPENCL_PARAM_INFO(out->linesize[1]),
182                                   FF_OPENCL_PARAM_INFO(link->h),
183                                   FF_OPENCL_PARAM_INFO(link->w),
184                                   FF_OPENCL_PARAM_INFO(ch),
185                                   FF_OPENCL_PARAM_INFO(cw),
186                                   NULL);
187     if (ret < 0)
188         return ret;
189     status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.kernel_env.command_queue,
190                                     unsharp->opencl_ctx.kernel_env.kernel, 1, NULL,
191                                     &global_work_size, NULL, 0, NULL, NULL);
192     if (status != CL_SUCCESS) {
193         av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
194         return AVERROR_EXTERNAL;
195     }
196     clFinish(unsharp->opencl_ctx.kernel_env.command_queue);
197     return av_opencl_buffer_read_image(out->data, unsharp->opencl_ctx.out_plane_size,
198                                        unsharp->opencl_ctx.plane_num, unsharp->opencl_ctx.cl_outbuf,
199                                        unsharp->opencl_ctx.cl_outbuf_size);
200 }
201
202 int ff_opencl_unsharp_init(AVFilterContext *ctx)
203 {
204     int ret = 0;
205     UnsharpContext *unsharp = ctx->priv;
206     ret = av_opencl_init(NULL);
207     if (ret < 0)
208         return ret;
209     ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask,
210                                   sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1) * (2 * unsharp->luma.steps_y + 1),
211                                   CL_MEM_READ_ONLY, NULL);
212     if (ret < 0)
213         return ret;
214     ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask,
215                                   sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1) * (2 * unsharp->chroma.steps_y + 1),
216                                   CL_MEM_READ_ONLY, NULL);
217     if (ret < 0)
218         return ret;
219     ret = generate_mask(ctx);
220     if (ret < 0)
221         return ret;
222     unsharp->opencl_ctx.plane_num = PLANE_NUM;
223     if (!unsharp->opencl_ctx.kernel_env.kernel) {
224         ret = av_opencl_create_kernel(&unsharp->opencl_ctx.kernel_env, "unsharp");
225         if (ret < 0) {
226             av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel with name 'unsharp'\n");
227             return ret;
228         }
229     }
230     return ret;
231 }
232
233 void ff_opencl_unsharp_uninit(AVFilterContext *ctx)
234 {
235     UnsharpContext *unsharp = ctx->priv;
236     av_opencl_buffer_release(&unsharp->opencl_ctx.cl_inbuf);
237     av_opencl_buffer_release(&unsharp->opencl_ctx.cl_outbuf);
238     av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask);
239     av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask);
240     av_opencl_release_kernel(&unsharp->opencl_ctx.kernel_env);
241     av_opencl_uninit();
242 }
243
244 int ff_opencl_unsharp_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
245 {
246     int ret = 0;
247     AVFilterLink *link = ctx->inputs[0];
248     UnsharpContext *unsharp = ctx->priv;
249     int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
250
251     if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
252         unsharp->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
253         unsharp->opencl_ctx.in_plane_size[1]  = (in->linesize[1] * ch);
254         unsharp->opencl_ctx.in_plane_size[2]  = (in->linesize[2] * ch);
255         unsharp->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
256         unsharp->opencl_ctx.out_plane_size[1] = (out->linesize[1] * ch);
257         unsharp->opencl_ctx.out_plane_size[2] = (out->linesize[2] * ch);
258         unsharp->opencl_ctx.cl_inbuf_size  = unsharp->opencl_ctx.in_plane_size[0] +
259                                              unsharp->opencl_ctx.in_plane_size[1] +
260                                              unsharp->opencl_ctx.in_plane_size[2];
261         unsharp->opencl_ctx.cl_outbuf_size = unsharp->opencl_ctx.out_plane_size[0] +
262                                              unsharp->opencl_ctx.out_plane_size[1] +
263                                              unsharp->opencl_ctx.out_plane_size[2];
264         if (!unsharp->opencl_ctx.cl_inbuf) {
265             ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_inbuf,
266                                           unsharp->opencl_ctx.cl_inbuf_size,
267                                           CL_MEM_READ_ONLY, NULL);
268             if (ret < 0)
269                 return ret;
270         }
271         if (!unsharp->opencl_ctx.cl_outbuf) {
272             ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_outbuf,
273                                           unsharp->opencl_ctx.cl_outbuf_size,
274                                           CL_MEM_READ_WRITE, NULL);
275             if (ret < 0)
276                 return ret;
277         }
278     }
279     return av_opencl_buffer_write_image(unsharp->opencl_ctx.cl_inbuf,
280                                         unsharp->opencl_ctx.cl_inbuf_size,
281                                         0, in->data, unsharp->opencl_ctx.in_plane_size,
282                                         unsharp->opencl_ctx.plane_num);
283 }