2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "libavutil/log.h"
20 #include "libavutil/mem.h"
21 #include "libavutil/opt.h"
22 #include "libavutil/pixdesc.h"
25 #include "framesync.h"
28 #include "opencl_source.h"
31 typedef struct OverlayOpenCLContext {
32 OpenCLFilterContext ocf;
36 cl_command_queue command_queue;
47 } OverlayOpenCLContext;
49 static int overlay_opencl_load(AVFilterContext *avctx,
50 enum AVPixelFormat main_format,
51 enum AVPixelFormat overlay_format)
53 OverlayOpenCLContext *ctx = avctx->priv;
55 const char *source = ff_opencl_source_overlay;
57 const AVPixFmtDescriptor *main_desc, *overlay_desc;
58 int err, i, main_planes, overlay_planes;
60 main_desc = av_pix_fmt_desc_get(main_format);
61 overlay_desc = av_pix_fmt_desc_get(overlay_format);
63 main_planes = overlay_planes = 0;
64 for (i = 0; i < main_desc->nb_components; i++)
65 main_planes = FFMAX(main_planes,
66 main_desc->comp[i].plane + 1);
67 for (i = 0; i < overlay_desc->nb_components; i++)
68 overlay_planes = FFMAX(overlay_planes,
69 overlay_desc->comp[i].plane + 1);
71 ctx->nb_planes = main_planes;
72 ctx->x_subsample = 1 << main_desc->log2_chroma_w;
73 ctx->y_subsample = 1 << main_desc->log2_chroma_h;
75 if (ctx->x_position % ctx->x_subsample ||
76 ctx->y_position % ctx->y_subsample) {
77 av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) "
78 "does not match subsampling (%d, %d).\n",
79 ctx->x_position, ctx->y_position,
80 ctx->x_subsample, ctx->y_subsample);
83 if (main_planes == overlay_planes) {
84 if (main_desc->nb_components == overlay_desc->nb_components)
85 kernel = "overlay_no_alpha";
87 kernel = "overlay_internal_alpha";
88 ctx->alpha_separate = 0;
90 kernel = "overlay_external_alpha";
91 ctx->alpha_separate = 1;
94 av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
96 err = ff_opencl_filter_load_program(avctx, &source, 1);
100 ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
101 ctx->ocf.hwctx->device_id,
103 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
104 "command queue %d.\n", cle);
106 ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle);
107 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
109 ctx->initialised = 1;
113 if (ctx->command_queue)
114 clReleaseCommandQueue(ctx->command_queue);
116 clReleaseKernel(ctx->kernel);
120 static int overlay_opencl_blend(FFFrameSync *fs)
122 AVFilterContext *avctx = fs->parent;
123 AVFilterLink *outlink = avctx->outputs[0];
124 OverlayOpenCLContext *ctx = avctx->priv;
125 AVFrame *input_main, *input_overlay;
129 size_t global_work[2];
133 err = ff_framesync_get_frame(fs, 0, &input_main, 0);
136 err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
140 if (!ctx->initialised) {
141 AVHWFramesContext *main_fc =
142 (AVHWFramesContext*)input_main->hw_frames_ctx->data;
143 AVHWFramesContext *overlay_fc =
144 (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
146 err = overlay_opencl_load(avctx, main_fc->sw_format,
147 overlay_fc->sw_format);
152 output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
154 err = AVERROR(ENOMEM);
158 for (plane = 0; plane < ctx->nb_planes; plane++) {
161 mem = (cl_mem)output->data[plane];
162 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
165 mem = (cl_mem)input_main->data[plane];
166 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
169 mem = (cl_mem)input_overlay->data[plane];
170 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
173 if (ctx->alpha_separate) {
174 mem = (cl_mem)input_overlay->data[ctx->nb_planes];
175 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
179 x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample);
180 y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample);
182 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &x);
184 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &y);
187 if (ctx->alpha_separate) {
188 cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample;
189 cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample;
191 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_x);
193 CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_y);
197 err = ff_opencl_filter_work_size_from_image(avctx, global_work,
202 cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
203 global_work, NULL, 0, NULL, NULL);
204 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel "
205 "for plane %d: %d.\n", plane, cle);
208 cle = clFinish(ctx->command_queue);
209 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
211 err = av_frame_copy_props(output, input_main);
213 av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
214 av_get_pix_fmt_name(output->format),
215 output->width, output->height, output->pts);
217 return ff_filter_frame(outlink, output);
220 av_frame_free(&output);
224 static int overlay_opencl_config_output(AVFilterLink *outlink)
226 AVFilterContext *avctx = outlink->src;
227 OverlayOpenCLContext *ctx = avctx->priv;
230 err = ff_opencl_filter_config_output(outlink);
234 err = ff_framesync_init_dualinput(&ctx->fs, avctx);
238 return ff_framesync_configure(&ctx->fs);
241 static av_cold int overlay_opencl_init(AVFilterContext *avctx)
243 OverlayOpenCLContext *ctx = avctx->priv;
245 ctx->fs.on_event = &overlay_opencl_blend;
247 return ff_opencl_filter_init(avctx);
250 static int overlay_opencl_activate(AVFilterContext *avctx)
252 OverlayOpenCLContext *ctx = avctx->priv;
254 return ff_framesync_activate(&ctx->fs);
257 static av_cold void overlay_opencl_uninit(AVFilterContext *avctx)
259 OverlayOpenCLContext *ctx = avctx->priv;
263 cle = clReleaseKernel(ctx->kernel);
264 if (cle != CL_SUCCESS)
265 av_log(avctx, AV_LOG_ERROR, "Failed to release "
266 "kernel: %d.\n", cle);
269 if (ctx->command_queue) {
270 cle = clReleaseCommandQueue(ctx->command_queue);
271 if (cle != CL_SUCCESS)
272 av_log(avctx, AV_LOG_ERROR, "Failed to release "
273 "command queue: %d.\n", cle);
276 ff_opencl_filter_uninit(avctx);
278 ff_framesync_uninit(&ctx->fs);
281 #define OFFSET(x) offsetof(OverlayOpenCLContext, x)
282 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
283 static const AVOption overlay_opencl_options[] = {
284 { "x", "Overlay x position",
285 OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
286 { "y", "Overlay y position",
287 OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
291 AVFILTER_DEFINE_CLASS(overlay_opencl);
293 static const AVFilterPad overlay_opencl_inputs[] = {
296 .type = AVMEDIA_TYPE_VIDEO,
297 .config_props = &ff_opencl_filter_config_input,
301 .type = AVMEDIA_TYPE_VIDEO,
302 .config_props = &ff_opencl_filter_config_input,
307 static const AVFilterPad overlay_opencl_outputs[] = {
310 .type = AVMEDIA_TYPE_VIDEO,
311 .config_props = &overlay_opencl_config_output,
316 AVFilter ff_vf_overlay_opencl = {
317 .name = "overlay_opencl",
318 .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"),
319 .priv_size = sizeof(OverlayOpenCLContext),
320 .priv_class = &overlay_opencl_class,
321 .init = &overlay_opencl_init,
322 .uninit = &overlay_opencl_uninit,
323 .query_formats = &ff_opencl_filter_query_formats,
324 .activate = &overlay_opencl_activate,
325 .inputs = overlay_opencl_inputs,
326 .outputs = overlay_opencl_outputs,
327 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,