git.sesse.net Git - ffmpeg/blob - libavfilter/vf_scale_cuda.c

   1 /*
   2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20 * DEALINGS IN THE SOFTWARE.
  21 */
  22
  23 #include <cuda.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26
  27 #include "libavutil/avstring.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/hwcontext.h"
  30 #include "libavutil/hwcontext_cuda_internal.h"
  31 #include "libavutil/internal.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34
  35 #include "avfilter.h"
  36 #include "formats.h"
  37 #include "internal.h"
  38 #include "scale.h"
  39 #include "video.h"
  40
  41 static const enum AVPixelFormat supported_formats[] = {
  42     AV_PIX_FMT_YUV420P,
  43     AV_PIX_FMT_NV12,
  44     AV_PIX_FMT_YUV444P,
  45     AV_PIX_FMT_P010,
  46     AV_PIX_FMT_P016
  47 };
  48
  49 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
  50 #define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
  51 #define NUM_BUFFERS 2
  52 #define BLOCKX 32
  53 #define BLOCKY 16
  54
  55 typedef struct CUDAScaleContext {
  56     const AVClass *class;
  57     enum AVPixelFormat in_fmt;
  58     enum AVPixelFormat out_fmt;
  59
  60     struct {
  61         int width;
  62         int height;
  63     } planes_in[3], planes_out[3];
  64
  65     AVBufferRef *frames_ctx;
  66     AVFrame     *frame;
  67
  68     AVFrame *tmp_frame;
  69     int passthrough;
  70
  71     /**
  72      * Output sw format. AV_PIX_FMT_NONE for no conversion.
  73      */
  74     enum AVPixelFormat format;
  75
  76     char *w_expr;               ///< width  expression string
  77     char *h_expr;               ///< height expression string
  78
  79     CUcontext   cu_ctx;
  80     CUevent     cu_event;
  81     CUmodule    cu_module;
  82     CUfunction  cu_func_uchar;
  83     CUfunction  cu_func_uchar2;
  84     CUfunction  cu_func_uchar4;
  85     CUfunction  cu_func_ushort;
  86     CUfunction  cu_func_ushort2;
  87     CUfunction  cu_func_ushort4;
  88     CUtexref    cu_tex_uchar;
  89     CUtexref    cu_tex_uchar2;
  90     CUtexref    cu_tex_uchar4;
  91     CUtexref    cu_tex_ushort;
  92     CUtexref    cu_tex_ushort2;
  93     CUtexref    cu_tex_ushort4;
  94
  95     CUdeviceptr srcBuffer;
  96     CUdeviceptr dstBuffer;
  97     int         tex_alignment;
  98 } CUDAScaleContext;
  99
 100 static av_cold int cudascale_init(AVFilterContext *ctx)
 101 {
 102     CUDAScaleContext *s = ctx->priv;
 103
 104     s->format = AV_PIX_FMT_NONE;
 105     s->frame = av_frame_alloc();
 106     if (!s->frame)
 107         return AVERROR(ENOMEM);
 108
 109     s->tmp_frame = av_frame_alloc();
 110     if (!s->tmp_frame)
 111         return AVERROR(ENOMEM);
 112
 113     return 0;
 114 }
 115
 116 static av_cold void cudascale_uninit(AVFilterContext *ctx)
 117 {
 118     CUDAScaleContext *s = ctx->priv;
 119
 120     av_frame_free(&s->frame);
 121     av_buffer_unref(&s->frames_ctx);
 122     av_frame_free(&s->tmp_frame);
 123 }
 124
 125 static int cudascale_query_formats(AVFilterContext *ctx)
 126 {
 127     static const enum AVPixelFormat pixel_formats[] = {
 128         AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
 129     };
 130     AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
 131
 132     return ff_set_common_formats(ctx, pix_fmts);
 133 }
 134
 135 static av_cold int init_stage(CUDAScaleContext *s, AVBufferRef *device_ctx)
 136 {
 137     AVBufferRef *out_ref = NULL;
 138     AVHWFramesContext *out_ctx;
 139     int in_sw, in_sh, out_sw, out_sh;
 140     int ret, i;
 141
 142     av_pix_fmt_get_chroma_sub_sample(s->in_fmt,  &in_sw,  &in_sh);
 143     av_pix_fmt_get_chroma_sub_sample(s->out_fmt, &out_sw, &out_sh);
 144     if (!s->planes_out[0].width) {
 145         s->planes_out[0].width  = s->planes_in[0].width;
 146         s->planes_out[0].height = s->planes_in[0].height;
 147     }
 148
 149     for (i = 1; i < FF_ARRAY_ELEMS(s->planes_in); i++) {
 150         s->planes_in[i].width   = s->planes_in[0].width   >> in_sw;
 151         s->planes_in[i].height  = s->planes_in[0].height  >> in_sh;
 152         s->planes_out[i].width  = s->planes_out[0].width  >> out_sw;
 153         s->planes_out[i].height = s->planes_out[0].height >> out_sh;
 154     }
 155
 156     out_ref = av_hwframe_ctx_alloc(device_ctx);
 157     if (!out_ref)
 158         return AVERROR(ENOMEM);
 159     out_ctx = (AVHWFramesContext*)out_ref->data;
 160
 161     out_ctx->format    = AV_PIX_FMT_CUDA;
 162     out_ctx->sw_format = s->out_fmt;
 163     out_ctx->width     = FFALIGN(s->planes_out[0].width,  32);
 164     out_ctx->height    = FFALIGN(s->planes_out[0].height, 32);
 165
 166     ret = av_hwframe_ctx_init(out_ref);
 167     if (ret < 0)
 168         goto fail;
 169
 170     av_frame_unref(s->frame);
 171     ret = av_hwframe_get_buffer(out_ref, s->frame, 0);
 172     if (ret < 0)
 173         goto fail;
 174
 175     s->frame->width  = s->planes_out[0].width;
 176     s->frame->height = s->planes_out[0].height;
 177
 178     av_buffer_unref(&s->frames_ctx);
 179     s->frames_ctx = out_ref;
 180
 181     return 0;
 182 fail:
 183     av_buffer_unref(&out_ref);
 184     return ret;
 185 }
 186
 187 static int format_is_supported(enum AVPixelFormat fmt)
 188 {
 189     int i;
 190
 191     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
 192         if (supported_formats[i] == fmt)
 193             return 1;
 194     return 0;
 195 }
 196
 197 static av_cold int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
 198                                          int out_width, int out_height)
 199 {
 200     CUDAScaleContext *s = ctx->priv;
 201
 202     AVHWFramesContext *in_frames_ctx;
 203
 204     enum AVPixelFormat in_format;
 205     enum AVPixelFormat out_format;
 206     int ret;
 207
 208     /* check that we have a hw context */
 209     if (!ctx->inputs[0]->hw_frames_ctx) {
 210         av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
 211         return AVERROR(EINVAL);
 212     }
 213     in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
 214     in_format     = in_frames_ctx->sw_format;
 215     out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
 216
 217     if (!format_is_supported(in_format)) {
 218         av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
 219                av_get_pix_fmt_name(in_format));
 220         return AVERROR(ENOSYS);
 221     }
 222     if (!format_is_supported(out_format)) {
 223         av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
 224                av_get_pix_fmt_name(out_format));
 225         return AVERROR(ENOSYS);
 226     }
 227
 228     if (in_width == out_width && in_height == out_height)
 229         s->passthrough = 1;
 230
 231     s->in_fmt = in_format;
 232     s->out_fmt = out_format;
 233
 234     s->planes_in[0].width   = in_width;
 235     s->planes_in[0].height  = in_height;
 236     s->planes_out[0].width  = out_width;
 237     s->planes_out[0].height = out_height;
 238
 239     ret = init_stage(s, in_frames_ctx->device_ref);
 240     if (ret < 0)
 241         return ret;
 242
 243     ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->frames_ctx);
 244     if (!ctx->outputs[0]->hw_frames_ctx)
 245         return AVERROR(ENOMEM);
 246
 247     return 0;
 248 }
 249
 250 static av_cold int cudascale_config_props(AVFilterLink *outlink)
 251 {
 252     AVFilterContext *ctx = outlink->src;
 253     AVFilterLink *inlink = outlink->src->inputs[0];
 254     CUDAScaleContext *s  = ctx->priv;
 255     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
 256     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 257     CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
 258     CUresult err;
 259     int w, h;
 260     int ret;
 261
 262     extern char vf_scale_cuda_ptx[];
 263
 264     err = cuCtxPushCurrent(cuda_ctx);
 265     if (err != CUDA_SUCCESS) {
 266         av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n");
 267         ret = AVERROR_UNKNOWN;
 268         goto fail;
 269     }
 270
 271     err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx);
 272     if (err != CUDA_SUCCESS) {
 273         av_log(ctx, AV_LOG_ERROR, "Error loading module data\n");
 274         ret = AVERROR_UNKNOWN;
 275         goto fail;
 276     }
 277
 278     cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar");
 279     cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2");
 280     cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4");
 281     cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort");
 282     cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2");
 283     cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4");
 284
 285     cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex");
 286     cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex");
 287     cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex");
 288     cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex");
 289     cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex");
 290     cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex");
 291
 292     cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER);
 293     cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER);
 294     cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER);
 295     cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER);
 296     cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER);
 297     cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER);
 298
 299     cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR);
 300     cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR);
 301     cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR);
 302     cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR);
 303     cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR);
 304     cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR);
 305
 306     cuCtxPopCurrent(&dummy);
 307
 308     if ((ret = ff_scale_eval_dimensions(s,
 309                                         s->w_expr, s->h_expr,
 310                                         inlink, outlink,
 311                                         &w, &h)) < 0)
 312         goto fail;
 313
 314     if (((int64_t)h * inlink->w) > INT_MAX  ||
 315         ((int64_t)w * inlink->h) > INT_MAX)
 316         av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
 317
 318     outlink->w = w;
 319     outlink->h = h;
 320
 321     ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
 322     if (ret < 0)
 323         return ret;
 324
 325     av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
 326            inlink->w, inlink->h, outlink->w, outlink->h);
 327
 328     if (inlink->sample_aspect_ratio.num) {
 329         outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
 330                                                              outlink->w*inlink->h},
 331                                                 inlink->sample_aspect_ratio);
 332     } else {
 333         outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 334     }
 335
 336     return 0;
 337
 338 fail:
 339     return ret;
 340 }
 341
 342 static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels,
 343                               uint8_t *src_dptr, int src_width, int src_height, int src_pitch,
 344                               uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch,
 345                               int pixel_size)
 346 {
 347     CUdeviceptr src_devptr = (CUdeviceptr)src_dptr;
 348     CUdeviceptr dst_devptr = (CUdeviceptr)dst_dptr;
 349     void *args_uchar[] = { &dst_devptr, &dst_width, &dst_height, &dst_pitch, &src_width, &src_height };
 350     CUDA_ARRAY_DESCRIPTOR desc;
 351
 352     desc.Width  = src_width;
 353     desc.Height = src_height;
 354     desc.NumChannels = channels;
 355     if (pixel_size == 1) {
 356         desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
 357     } else {
 358         desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
 359     }
 360
 361     cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size);
 362     cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL);
 363
 364     return 0;
 365 }
 366
 367 static int scalecuda_resize(AVFilterContext *ctx,
 368                             AVFrame *out, AVFrame *in)
 369 {
 370     AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
 371     CUDAScaleContext *s = ctx->priv;
 372
 373     switch (in_frames_ctx->sw_format) {
 374     case AV_PIX_FMT_YUV420P:
 375         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 376                            in->data[0], in->width, in->height, in->linesize[0],
 377                            out->data[0], out->width, out->height, out->linesize[0],
 378                            1);
 379         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 380                            in->data[0]+in->linesize[0]*in->height, in->width/2, in->height/2, in->linesize[0]/2,
 381                            out->data[0]+out->linesize[0]*out->height, out->width/2, out->height/2, out->linesize[0]/2,
 382                            1);
 383         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 384                            in->data[0]+ ALIGN_UP((in->linesize[0]*in->height*5)/4, s->tex_alignment), in->width/2, in->height/2, in->linesize[0]/2,
 385                            out->data[0]+(out->linesize[0]*out->height*5)/4, out->width/2, out->height/2, out->linesize[0]/2,
 386                            1);
 387         break;
 388     case AV_PIX_FMT_YUV444P:
 389         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 390                            in->data[0], in->width, in->height, in->linesize[0],
 391                            out->data[0], out->width, out->height, out->linesize[0],
 392                            1);
 393         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 394                            in->data[0]+in->linesize[0]*in->height, in->width, in->height, in->linesize[0],
 395                            out->data[0]+out->linesize[0]*out->height, out->width, out->height, out->linesize[0],
 396                            1);
 397         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 398                            in->data[0]+in->linesize[0]*in->height*2, in->width, in->height, in->linesize[0],
 399                            out->data[0]+out->linesize[0]*out->height*2, out->width, out->height, out->linesize[0],
 400                            1);
 401         break;
 402     case AV_PIX_FMT_NV12:
 403         call_resize_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1,
 404                            in->data[0], in->width, in->height, in->linesize[0],
 405                            out->data[0], out->width, out->height, out->linesize[0],
 406                            1);
 407         call_resize_kernel(s, s->cu_func_uchar2, s->cu_tex_uchar2, 2,
 408                            in->data[1], in->width/2, in->height/2, in->linesize[1],
 409                            out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width/2, out->height/2, out->linesize[1]/2,
 410                            1);
 411         break;
 412     case AV_PIX_FMT_P010LE:
 413         call_resize_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
 414                            in->data[0], in->width, in->height, in->linesize[0]/2,
 415                            out->data[0], out->width, out->height, out->linesize[0]/2,
 416                            2);
 417         call_resize_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
 418                            in->data[1], in->width / 2, in->height / 2, in->linesize[1]/2,
 419                            out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
 420                            2);
 421         break;
 422     case AV_PIX_FMT_P016LE:
 423         call_resize_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1,
 424                            in->data[0], in->width, in->height, in->linesize[0] / 2,
 425                            out->data[0], out->width, out->height, out->linesize[0] / 2,
 426                            2);
 427         call_resize_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2,
 428                            in->data[1], in->width / 2, in->height / 2, in->linesize[1] / 2,
 429                            out->data[0] + out->linesize[0] * ((out->height + 31) & ~0x1f), out->width / 2, out->height / 2, out->linesize[1] / 4,
 430                            2);
 431         break;
 432     default:
 433         return AVERROR_BUG;
 434     }
 435
 436     return 0;
 437 }
 438
 439 static int cudascale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
 440 {
 441     CUDAScaleContext *s = ctx->priv;
 442     AVFrame *src = in;
 443     int ret;
 444
 445     ret = scalecuda_resize(ctx, s->frame, src);
 446     if (ret < 0)
 447         return ret;
 448
 449     src = s->frame;
 450     ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
 451     if (ret < 0)
 452         return ret;
 453
 454     av_frame_move_ref(out, s->frame);
 455     av_frame_move_ref(s->frame, s->tmp_frame);
 456
 457     ret = av_frame_copy_props(out, in);
 458     if (ret < 0)
 459         return ret;
 460
 461     return 0;
 462 }
 463
 464 static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in)
 465 {
 466     AVFilterContext              *ctx = link->dst;
 467     CUDAScaleContext               *s = ctx->priv;
 468     AVFilterLink             *outlink = ctx->outputs[0];
 469     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)s->frames_ctx->data;
 470     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
 471
 472     AVFrame *out = NULL;
 473     CUresult err;
 474     CUcontext dummy;
 475     int ret = 0;
 476
 477     out = av_frame_alloc();
 478     if (!out) {
 479         ret = AVERROR(ENOMEM);
 480         goto fail;
 481     }
 482
 483     err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
 484     if (err != CUDA_SUCCESS) {
 485         ret = AVERROR_UNKNOWN;
 486         goto fail;
 487     }
 488
 489     ret = cudascale_scale(ctx, out, in);
 490
 491     cuCtxPopCurrent(&dummy);
 492     if (ret < 0)
 493         goto fail;
 494
 495     av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
 496               (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
 497               (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
 498               INT_MAX);
 499
 500     av_frame_free(&in);
 501     return ff_filter_frame(outlink, out);
 502 fail:
 503     av_frame_free(&in);
 504     av_frame_free(&out);
 505     return ret;
 506 }
 507
 508 #define OFFSET(x) offsetof(CUDAScaleContext, x)
 509 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 510 static const AVOption options[] = {
 511     { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
 512     { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
 513     { NULL },
 514 };
 515
 516 static const AVClass cudascale_class = {
 517     .class_name = "cudascale",
 518     .item_name  = av_default_item_name,
 519     .option     = options,
 520     .version    = LIBAVUTIL_VERSION_INT,
 521 };
 522
 523 static const AVFilterPad cudascale_inputs[] = {
 524     {
 525         .name        = "default",
 526         .type        = AVMEDIA_TYPE_VIDEO,
 527         .filter_frame = cudascale_filter_frame,
 528     },
 529     { NULL }
 530 };
 531
 532 static const AVFilterPad cudascale_outputs[] = {
 533     {
 534         .name         = "default",
 535         .type         = AVMEDIA_TYPE_VIDEO,
 536         .config_props = cudascale_config_props,
 537     },
 538     { NULL }
 539 };
 540
 541 AVFilter ff_vf_scale_cuda = {
 542     .name      = "scale_cuda",
 543     .description = NULL_IF_CONFIG_SMALL("GPU accelerated video resizer"),
 544
 545     .init          = cudascale_init,
 546     .uninit        = cudascale_uninit,
 547     .query_formats = cudascale_query_formats,
 548
 549     .priv_size = sizeof(CUDAScaleContext),
 550     .priv_class = &cudascale_class,
 551
 552     .inputs    = cudascale_inputs,
 553     .outputs   = cudascale_outputs,
 554
 555     .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
 556 };