2 * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/avassert.h"
23 #include "libavutil/hwcontext_cuda.h"
27 extern char vf_yadif_cuda_ptx[];
29 typedef struct DeintCUDAContext {
32 AVCUDADeviceContext *hwctx;
33 AVBufferRef *device_ref;
34 AVBufferRef *input_frames_ref;
35 AVHWFramesContext *input_frames;
40 CUfunction cu_func_uchar;
41 CUfunction cu_func_uchar2;
42 CUfunction cu_func_ushort;
43 CUfunction cu_func_ushort2;
46 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
47 #define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
51 static int check_cu(AVFilterContext *avctx, CUresult err, const char *func)
54 const char *err_string;
56 av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func);
58 if (err == CUDA_SUCCESS)
61 cuGetErrorName(err, &err_name);
62 cuGetErrorString(err, &err_string);
64 av_log(avctx, AV_LOG_ERROR, "%s failed", func);
65 if (err_name && err_string)
66 av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string);
67 av_log(avctx, AV_LOG_ERROR, "\n");
69 return AVERROR_EXTERNAL;
72 #define CHECK_CU(x) check_cu(ctx, (x), #x)
74 static CUresult call_kernel(AVFilterContext *ctx, CUfunction func,
75 CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next,
76 CUarray_format format, int channels,
77 int src_width, // Width is pixels per channel
78 int src_height, // Height is pixels per channel
79 int src_pitch, // Pitch is bytes
81 int dst_width, // Width is pixels per channel
82 int dst_height, // Height is pixels per channel
83 int dst_pitch, // Pitch is pixels per channel
86 DeintCUDAContext *s = ctx->priv;
87 CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0;
89 int skip_spatial_check = s->yadif.mode&2;
91 void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next,
92 &dst_width, &dst_height, &dst_pitch,
93 &src_width, &src_height, &parity, &tff,
94 &skip_spatial_check };
96 CUDA_TEXTURE_DESC tex_desc = {
97 .filterMode = CU_TR_FILTER_MODE_POINT,
98 .flags = CU_TRSF_READ_AS_INTEGER,
101 CUDA_RESOURCE_DESC res_desc = {
102 .resType = CU_RESOURCE_TYPE_PITCH2D,
103 .res.pitch2D.format = format,
104 .res.pitch2D.numChannels = channels,
105 .res.pitch2D.width = src_width,
106 .res.pitch2D.height = src_height,
107 .res.pitch2D.pitchInBytes = src_pitch,
110 res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev;
111 err = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL));
112 if (err != CUDA_SUCCESS) {
116 res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur;
117 err = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL));
118 if (err != CUDA_SUCCESS) {
122 res_desc.res.pitch2D.devPtr = (CUdeviceptr)next;
123 err = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL));
124 if (err != CUDA_SUCCESS) {
128 err = CHECK_CU(cuLaunchKernel(func,
129 DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
131 0, s->stream, args, NULL));
135 CHECK_CU(cuTexObjectDestroy(tex_prev));
137 CHECK_CU(cuTexObjectDestroy(tex_cur));
139 CHECK_CU(cuTexObjectDestroy(tex_next));
144 static void filter(AVFilterContext *ctx, AVFrame *dst,
147 DeintCUDAContext *s = ctx->priv;
148 YADIFContext *y = &s->yadif;
153 err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
154 if (err != CUDA_SUCCESS) {
158 for (i = 0; i < y->csp->nb_components; i++) {
160 CUarray_format format;
161 int pixel_size, channels;
162 const AVComponentDescriptor *comp = &y->csp->comp[i];
164 if (comp->plane < i) {
165 // We process planes as a whole, so don't reprocess
166 // them for additional components
170 pixel_size = (comp->depth + comp->shift) / 8;
171 channels = comp->step / pixel_size;
172 if (pixel_size > 2 || channels > 2) {
173 av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
176 switch (pixel_size) {
178 func = channels == 1 ? s->cu_func_uchar : s->cu_func_uchar2;
179 format = CU_AD_FORMAT_UNSIGNED_INT8;
182 func = channels == 1 ? s->cu_func_ushort : s->cu_func_ushort2;
183 format = CU_AD_FORMAT_UNSIGNED_INT16;
186 av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
189 av_log(ctx, AV_LOG_TRACE,
190 "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
191 comp->plane, pixel_size, channels);
192 call_kernel(ctx, func,
193 (CUdeviceptr)y->prev->data[i],
194 (CUdeviceptr)y->cur->data[i],
195 (CUdeviceptr)y->next->data[i],
197 AV_CEIL_RSHIFT(y->cur->width, i ? y->csp->log2_chroma_w : 0),
198 AV_CEIL_RSHIFT(y->cur->height, i ? y->csp->log2_chroma_h : 0),
200 (CUdeviceptr)dst->data[i],
201 AV_CEIL_RSHIFT(dst->width, i ? y->csp->log2_chroma_w : 0),
202 AV_CEIL_RSHIFT(dst->height, i ? y->csp->log2_chroma_h : 0),
203 dst->linesize[i] / comp->step,
207 err = CHECK_CU(cuStreamSynchronize(s->stream));
208 if (err != CUDA_SUCCESS) {
213 CHECK_CU(cuCtxPopCurrent(&dummy));
217 static av_cold void deint_cuda_uninit(AVFilterContext *ctx)
220 DeintCUDAContext *s = ctx->priv;
221 YADIFContext *y = &s->yadif;
224 CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
225 CHECK_CU(cuModuleUnload(s->cu_module));
226 CHECK_CU(cuCtxPopCurrent(&dummy));
229 av_frame_free(&y->prev);
230 av_frame_free(&y->cur);
231 av_frame_free(&y->next);
233 av_buffer_unref(&s->device_ref);
235 av_buffer_unref(&s->input_frames_ref);
236 s->input_frames = NULL;
239 static int deint_cuda_query_formats(AVFilterContext *ctx)
241 enum AVPixelFormat pix_fmts[] = {
242 AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
246 if ((ret = ff_formats_ref(ff_make_format_list(pix_fmts),
247 &ctx->inputs[0]->out_formats)) < 0)
249 if ((ret = ff_formats_ref(ff_make_format_list(pix_fmts),
250 &ctx->outputs[0]->in_formats)) < 0)
256 static int config_input(AVFilterLink *inlink)
258 AVFilterContext *ctx = inlink->dst;
259 DeintCUDAContext *s = ctx->priv;
261 if (!inlink->hw_frames_ctx) {
262 av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
263 "required to associate the processing device.\n");
264 return AVERROR(EINVAL);
267 s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
268 if (!s->input_frames_ref) {
269 av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
271 return AVERROR(ENOMEM);
273 s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
278 static int config_output(AVFilterLink *link)
280 AVHWFramesContext *output_frames;
281 AVFilterContext *ctx = link->src;
282 DeintCUDAContext *s = ctx->priv;
283 YADIFContext *y = &s->yadif;
288 av_assert0(s->input_frames);
289 s->device_ref = av_buffer_ref(s->input_frames->device_ref);
290 if (!s->device_ref) {
291 av_log(ctx, AV_LOG_ERROR, "A device reference create "
293 return AVERROR(ENOMEM);
295 s->hwctx = ((AVHWDeviceContext*)s->device_ref->data)->hwctx;
296 s->cu_ctx = s->hwctx->cuda_ctx;
297 s->stream = s->hwctx->stream;
299 link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
300 if (!link->hw_frames_ctx) {
301 av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
303 ret = AVERROR(ENOMEM);
307 output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
309 output_frames->format = AV_PIX_FMT_CUDA;
310 output_frames->sw_format = s->input_frames->sw_format;
311 output_frames->width = ctx->inputs[0]->w;
312 output_frames->height = ctx->inputs[0]->h;
314 output_frames->initial_pool_size = 4;
316 ret = ff_filter_init_hw_frames(ctx, link, 10);
320 ret = av_hwframe_ctx_init(link->hw_frames_ctx);
322 av_log(ctx, AV_LOG_ERROR, "Failed to initialise CUDA frame "
323 "context for output: %d\n", ret);
327 link->time_base.num = ctx->inputs[0]->time_base.num;
328 link->time_base.den = ctx->inputs[0]->time_base.den * 2;
329 link->w = ctx->inputs[0]->w;
330 link->h = ctx->inputs[0]->h;
333 link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
336 if (link->w < 3 || link->h < 3) {
337 av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
338 ret = AVERROR(EINVAL);
342 y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
345 err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx));
346 if (err != CUDA_SUCCESS) {
347 ret = AVERROR_EXTERNAL;
351 err = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx));
352 if (err != CUDA_SUCCESS) {
353 ret = AVERROR_INVALIDDATA;
357 err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar"));
358 if (err != CUDA_SUCCESS) {
359 ret = AVERROR_INVALIDDATA;
363 err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2"));
364 if (err != CUDA_SUCCESS) {
365 ret = AVERROR_INVALIDDATA;
369 err= CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort"));
370 if (err != CUDA_SUCCESS) {
371 ret = AVERROR_INVALIDDATA;
375 err = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2"));
376 if (err != CUDA_SUCCESS) {
377 ret = AVERROR_INVALIDDATA;
382 CHECK_CU(cuCtxPopCurrent(&dummy));
387 static const AVClass yadif_cuda_class = {
388 .class_name = "yadif_cuda",
389 .item_name = av_default_item_name,
390 .option = ff_yadif_options,
391 .version = LIBAVUTIL_VERSION_INT,
392 .category = AV_CLASS_CATEGORY_FILTER,
395 static const AVFilterPad deint_cuda_inputs[] = {
398 .type = AVMEDIA_TYPE_VIDEO,
399 .filter_frame = ff_yadif_filter_frame,
400 .config_props = config_input,
405 static const AVFilterPad deint_cuda_outputs[] = {
408 .type = AVMEDIA_TYPE_VIDEO,
409 .request_frame = ff_yadif_request_frame,
410 .config_props = config_output,
415 AVFilter ff_vf_yadif_cuda = {
416 .name = "yadif_cuda",
417 .description = NULL_IF_CONFIG_SMALL("Deinterlace CUDA frames"),
418 .priv_size = sizeof(DeintCUDAContext),
419 .priv_class = &yadif_cuda_class,
420 .uninit = deint_cuda_uninit,
421 .query_formats = deint_cuda_query_formats,
422 .inputs = deint_cuda_inputs,
423 .outputs = deint_cuda_outputs,
424 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
425 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,