2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "libavutil/avassert.h"
20 #include "libavutil/buffer.h"
21 #include "libavutil/common.h"
22 #include "libavutil/hwcontext.h"
23 #include "libavutil/hwcontext_opencl.h"
24 #include "libavutil/log.h"
25 #include "libavutil/mathematics.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/pixdesc.h"
28 #include "libavutil/opt.h"
31 #include "framesync.h"
34 #include "opencl_source.h"
37 typedef struct OverlayOpenCLContext {
38 OpenCLFilterContext ocf;
42 cl_command_queue command_queue;
53 } OverlayOpenCLContext;
55 static int overlay_opencl_load(AVFilterContext *avctx,
56 enum AVPixelFormat main_format,
57 enum AVPixelFormat overlay_format)
59 OverlayOpenCLContext *ctx = avctx->priv;
61 const char *source = ff_opencl_source_overlay;
63 const AVPixFmtDescriptor *main_desc, *overlay_desc;
64 int err, i, main_planes, overlay_planes;
66 main_desc = av_pix_fmt_desc_get(main_format);
67 overlay_desc = av_pix_fmt_desc_get(overlay_format);
69 main_planes = overlay_planes = 0;
70 for (i = 0; i < main_desc->nb_components; i++)
71 main_planes = FFMAX(main_planes,
72 main_desc->comp[i].plane + 1);
73 for (i = 0; i < overlay_desc->nb_components; i++)
74 overlay_planes = FFMAX(overlay_planes,
75 overlay_desc->comp[i].plane + 1);
77 ctx->nb_planes = main_planes;
78 ctx->x_subsample = 1 << main_desc->log2_chroma_w;
79 ctx->y_subsample = 1 << main_desc->log2_chroma_h;
81 if (ctx->x_position % ctx->x_subsample ||
82 ctx->y_position % ctx->y_subsample) {
83 av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) "
84 "does not match subsampling (%d, %d).\n",
85 ctx->x_position, ctx->y_position,
86 ctx->x_subsample, ctx->y_subsample);
89 if (main_planes == overlay_planes) {
90 if (main_desc->nb_components == overlay_desc->nb_components)
91 kernel = "overlay_no_alpha";
93 kernel = "overlay_internal_alpha";
94 ctx->alpha_separate = 0;
96 kernel = "overlay_external_alpha";
97 ctx->alpha_separate = 1;
100 av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
102 err = ff_opencl_filter_load_program(avctx, &source, 1);
106 ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
107 ctx->ocf.hwctx->device_id,
109 if (!ctx->command_queue) {
110 av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
111 "command queue: %d.\n", cle);
116 ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle);
118 av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
123 ctx->initialised = 1;
127 if (ctx->command_queue)
128 clReleaseCommandQueue(ctx->command_queue);
130 clReleaseKernel(ctx->kernel);
134 static int overlay_opencl_blend(FFFrameSync *fs)
136 AVFilterContext *avctx = fs->parent;
137 AVFilterLink *outlink = avctx->outputs[0];
138 OverlayOpenCLContext *ctx = avctx->priv;
139 AVFrame *input_main, *input_overlay;
143 size_t global_work[2];
147 err = ff_framesync_get_frame(fs, 0, &input_main, 0);
150 err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
154 if (!ctx->initialised) {
155 AVHWFramesContext *main_fc =
156 (AVHWFramesContext*)input_main->hw_frames_ctx->data;
157 AVHWFramesContext *overlay_fc =
158 (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
160 err = overlay_opencl_load(avctx, main_fc->sw_format,
161 overlay_fc->sw_format);
166 output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
168 err = AVERROR(ENOMEM);
172 for (plane = 0; plane < ctx->nb_planes; plane++) {
175 mem = (cl_mem)output->data[plane];
176 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem);
177 if (cle != CL_SUCCESS)
178 goto fail_kernel_arg;
180 mem = (cl_mem)input_main->data[plane];
181 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem);
182 if (cle != CL_SUCCESS)
183 goto fail_kernel_arg;
185 mem = (cl_mem)input_overlay->data[plane];
186 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem);
187 if (cle != CL_SUCCESS)
188 goto fail_kernel_arg;
190 if (ctx->alpha_separate) {
191 mem = (cl_mem)input_overlay->data[ctx->nb_planes];
192 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem);
193 if (cle != CL_SUCCESS)
194 goto fail_kernel_arg;
197 x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample);
198 y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample);
200 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &x);
201 if (cle != CL_SUCCESS)
202 goto fail_kernel_arg;
203 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &y);
204 if (cle != CL_SUCCESS)
205 goto fail_kernel_arg;
207 if (ctx->alpha_separate) {
208 cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample;
209 cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample;
211 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &alpha_adj_x);
212 if (cle != CL_SUCCESS)
213 goto fail_kernel_arg;
214 cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &alpha_adj_y);
215 if (cle != CL_SUCCESS)
216 goto fail_kernel_arg;
219 global_work[0] = output->width;
220 global_work[1] = output->height;
222 cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
223 global_work, NULL, 0, NULL, NULL);
224 if (cle != CL_SUCCESS) {
225 av_log(avctx, AV_LOG_ERROR, "Failed to enqueue "
226 "overlay kernel for plane %d: %d.\n", cle, plane);
232 cle = clFinish(ctx->command_queue);
233 if (cle != CL_SUCCESS) {
234 av_log(avctx, AV_LOG_ERROR, "Failed to finish "
235 "command queue: %d.\n", cle);
240 err = av_frame_copy_props(output, input_main);
242 av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
243 av_get_pix_fmt_name(output->format),
244 output->width, output->height, output->pts);
246 return ff_filter_frame(outlink, output);
249 av_log(avctx, AV_LOG_ERROR, "Failed to set kernel arg %d: %d.\n",
256 static int overlay_opencl_config_output(AVFilterLink *outlink)
258 AVFilterContext *avctx = outlink->src;
259 OverlayOpenCLContext *ctx = avctx->priv;
262 err = ff_opencl_filter_config_output(outlink);
266 err = ff_framesync_init_dualinput(&ctx->fs, avctx);
270 return ff_framesync_configure(&ctx->fs);
273 static av_cold int overlay_opencl_init(AVFilterContext *avctx)
275 OverlayOpenCLContext *ctx = avctx->priv;
277 ctx->fs.on_event = &overlay_opencl_blend;
279 return ff_opencl_filter_init(avctx);
282 static int overlay_opencl_activate(AVFilterContext *avctx)
284 OverlayOpenCLContext *ctx = avctx->priv;
286 return ff_framesync_activate(&ctx->fs);
289 static av_cold void overlay_opencl_uninit(AVFilterContext *avctx)
291 OverlayOpenCLContext *ctx = avctx->priv;
295 cle = clReleaseKernel(ctx->kernel);
296 if (cle != CL_SUCCESS)
297 av_log(avctx, AV_LOG_ERROR, "Failed to release "
298 "kernel: %d.\n", cle);
301 if (ctx->command_queue) {
302 cle = clReleaseCommandQueue(ctx->command_queue);
303 if (cle != CL_SUCCESS)
304 av_log(avctx, AV_LOG_ERROR, "Failed to release "
305 "command queue: %d.\n", cle);
308 ff_opencl_filter_uninit(avctx);
310 ff_framesync_uninit(&ctx->fs);
313 #define OFFSET(x) offsetof(OverlayOpenCLContext, x)
314 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
315 static const AVOption overlay_opencl_options[] = {
316 { "x", "Overlay x position",
317 OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
318 { "y", "Overlay y position",
319 OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
323 AVFILTER_DEFINE_CLASS(overlay_opencl);
325 static const AVFilterPad overlay_opencl_inputs[] = {
328 .type = AVMEDIA_TYPE_VIDEO,
329 .config_props = &ff_opencl_filter_config_input,
333 .type = AVMEDIA_TYPE_VIDEO,
334 .config_props = &ff_opencl_filter_config_input,
339 static const AVFilterPad overlay_opencl_outputs[] = {
342 .type = AVMEDIA_TYPE_VIDEO,
343 .config_props = &overlay_opencl_config_output,
348 AVFilter ff_vf_overlay_opencl = {
349 .name = "overlay_opencl",
350 .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"),
351 .priv_size = sizeof(OverlayOpenCLContext),
352 .priv_class = &overlay_opencl_class,
353 .init = &overlay_opencl_init,
354 .uninit = &overlay_opencl_uninit,
355 .query_formats = &ff_opencl_filter_query_formats,
356 .activate = &overlay_opencl_activate,
357 .inputs = overlay_opencl_inputs,
358 .outputs = overlay_opencl_outputs,
359 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,