git.sesse.net Git - ffmpeg/blob - libavfilter/vf_vmafmotion.c

   1 /*
   2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
   3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * Calculate VMAF Motion score.
  25  */
  26
  27 #include "libavutil/opt.h"
  28 #include "libavutil/pixdesc.h"
  29 #include "avfilter.h"
  30 #include "drawutils.h"
  31 #include "formats.h"
  32 #include "internal.h"
  33 #include "vmaf_motion.h"
  34
  35 #define BIT_SHIFT 15
  36
  37 static const float FILTER_5[5] = {
  38     0.054488685,
  39     0.244201342,
  40     0.402619947,
  41     0.244201342,
  42     0.054488685
  43 };
  44
  45 typedef struct VMAFMotionContext {
  46     const AVClass *class;
  47     VMAFMotionData data;
  48     FILE *stats_file;
  49     char *stats_file_str;
  50 } VMAFMotionContext;
  51
  52 #define OFFSET(x) offsetof(VMAFMotionContext, x)
  53 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  54
  55 static const AVOption vmafmotion_options[] = {
  56     {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
  57     { NULL }
  58 };
  59
  60 AVFILTER_DEFINE_CLASS(vmafmotion);
  61
  62 static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
  63                           int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
  64 {
  65     ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
  66     ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
  67     uint64_t sum = 0;
  68     int i, j;
  69
  70     for (i = 0; i < h; i++) {
  71         for (j = 0; j < w; j++) {
  72             sum += abs(img1[j] - img2[j]);
  73         }
  74         img1 += img1_stride;
  75         img2 += img2_stride;
  76     }
  77
  78     return sum;
  79 }
  80
  81 static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
  82                           uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
  83                           ptrdiff_t _dst_stride)
  84 {
  85     ptrdiff_t src_stride = _src_stride / sizeof(*src);
  86     ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
  87     int radius = filt_w / 2;
  88     int borders_left = radius;
  89     int borders_right = w - (filt_w - radius);
  90     int i, j, k;
  91     int sum = 0;
  92
  93     for (i = 0; i < h; i++) {
  94         for (j = 0; j < borders_left; j++) {
  95             sum = 0;
  96             for (k = 0; k < filt_w; k++) {
  97                 int j_tap = FFABS(j - radius + k);
  98                 if (j_tap >= w) {
  99                     j_tap = w - (j_tap - w + 1);
 100                 }
 101                 sum += filter[k] * src[i * src_stride + j_tap];
 102             }
 103             dst[i * dst_stride + j] = sum >> BIT_SHIFT;
 104         }
 105
 106         for (j = borders_left; j < borders_right; j++) {
 107             int sum = 0;
 108             for (k = 0; k < filt_w; k++) {
 109                 sum += filter[k] * src[i * src_stride + j - radius + k];
 110             }
 111             dst[i * dst_stride + j] = sum >> BIT_SHIFT;
 112         }
 113
 114         for (j = borders_right; j < w; j++) {
 115             sum = 0;
 116             for (k = 0; k < filt_w; k++) {
 117                 int j_tap = FFABS(j - radius + k);
 118                 if (j_tap >= w) {
 119                     j_tap = w - (j_tap - w + 1);
 120                 }
 121                 sum += filter[k] * src[i * src_stride + j_tap];
 122             }
 123             dst[i * dst_stride + j] = sum >> BIT_SHIFT;
 124         }
 125     }
 126 }
 127
 128 #define conv_y_fn(type, bits) \
 129 static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
 130                                       const uint8_t *_src, uint16_t *dst, \
 131                                       int w, int h, ptrdiff_t _src_stride, \
 132                                       ptrdiff_t _dst_stride) \
 133 { \
 134     const type *src = (const type *) _src; \
 135     ptrdiff_t src_stride = _src_stride / sizeof(*src); \
 136     ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
 137     int radius = filt_w / 2; \
 138     int borders_top = radius; \
 139     int borders_bottom = h - (filt_w - radius); \
 140     int i, j, k; \
 141     int sum = 0; \
 142     \
 143     for (i = 0; i < borders_top; i++) { \
 144         for (j = 0; j < w; j++) { \
 145             sum = 0; \
 146             for (k = 0; k < filt_w; k++) { \
 147                 int i_tap = FFABS(i - radius + k); \
 148                 if (i_tap >= h) { \
 149                     i_tap = h - (i_tap - h + 1); \
 150                 } \
 151                 sum += filter[k] * src[i_tap * src_stride + j]; \
 152             } \
 153             dst[i * dst_stride + j] = sum >> bits; \
 154         } \
 155     } \
 156     for (i = borders_top; i < borders_bottom; i++) { \
 157         for (j = 0; j < w; j++) { \
 158             sum = 0; \
 159             for (k = 0; k < filt_w; k++) { \
 160                 sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
 161             } \
 162             dst[i * dst_stride + j] = sum >> bits; \
 163         } \
 164     } \
 165     for (i = borders_bottom; i < h; i++) { \
 166         for (j = 0; j < w; j++) { \
 167             sum = 0; \
 168             for (k = 0; k < filt_w; k++) { \
 169                 int i_tap = FFABS(i - radius + k); \
 170                 if (i_tap >= h) { \
 171                     i_tap = h - (i_tap - h + 1); \
 172                 } \
 173                 sum += filter[k] * src[i_tap * src_stride + j]; \
 174             } \
 175             dst[i * dst_stride + j] = sum >> bits; \
 176         } \
 177     } \
 178 }
 179
 180 conv_y_fn(uint8_t, 8);
 181 conv_y_fn(uint16_t, 10);
 182
 183 static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
 184     dsp->convolution_x = convolution_x;
 185     dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
 186     dsp->sad = image_sad;
 187 }
 188
 189 double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
 190 {
 191     double score;
 192
 193     s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
 194                              s->width, s->height, ref->linesize[0], s->stride);
 195     s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
 196                              s->width, s->height, s->stride, s->stride);
 197
 198     if (!s->nb_frames) {
 199         score = 0.0;
 200     } else {
 201         uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
 202                                       s->width, s->height, s->stride, s->stride);
 203         // the output score is always normalized to 8 bits
 204         score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
 205     }
 206
 207     FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
 208     s->nb_frames++;
 209     s->motion_sum += score;
 210
 211     return score;
 212 }
 213
 214 static void set_meta(AVDictionary **metadata, const char *key, float d)
 215 {
 216     char value[128];
 217     snprintf(value, sizeof(value), "%0.2f", d);
 218     av_dict_set(metadata, key, value, 0);
 219 }
 220
 221 static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
 222 {
 223     VMAFMotionContext *s = ctx->priv;
 224     double score;
 225
 226     score = ff_vmafmotion_process(&s->data, ref);
 227     set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
 228     if (s->stats_file) {
 229         fprintf(s->stats_file,
 230                 "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
 231     }
 232 }
 233
 234
 235 int ff_vmafmotion_init(VMAFMotionData *s,
 236                        int w, int h, enum AVPixelFormat fmt)
 237 {
 238     size_t data_sz;
 239     int i;
 240     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
 241
 242     s->width = w;
 243     s->height = h;
 244     s->stride = FFALIGN(w * sizeof(uint16_t), 32);
 245
 246     data_sz = (size_t) s->stride * h;
 247     if (!(s->blur_data[0] = av_malloc(data_sz)) ||
 248         !(s->blur_data[1] = av_malloc(data_sz)) ||
 249         !(s->temp_data    = av_malloc(data_sz))) {
 250         return AVERROR(ENOMEM);
 251     }
 252
 253     for (i = 0; i < 5; i++) {
 254         s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
 255     }
 256
 257     vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
 258
 259     return 0;
 260 }
 261
 262 static int query_formats(AVFilterContext *ctx)
 263 {
 264     AVFilterFormats *fmts_list = NULL;
 265     int format, ret;
 266
 267     for (format = 0; av_pix_fmt_desc_get(format); format++) {
 268         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
 269         if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) &&
 270             (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
 271             (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
 272             (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
 273             (ret = ff_add_format(&fmts_list, format)) < 0)
 274             return ret;
 275     }
 276
 277     return ff_set_common_formats(ctx, fmts_list);
 278 }
 279
 280 static int config_input_ref(AVFilterLink *inlink)
 281 {
 282     AVFilterContext *ctx  = inlink->dst;
 283     VMAFMotionContext *s = ctx->priv;
 284
 285     return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
 286                               ctx->inputs[0]->h, ctx->inputs[0]->format);
 287 }
 288
 289 double ff_vmafmotion_uninit(VMAFMotionData *s)
 290 {
 291     av_free(s->blur_data[0]);
 292     av_free(s->blur_data[1]);
 293     av_free(s->temp_data);
 294
 295     return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
 296 }
 297
 298 static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
 299 {
 300     AVFilterContext *ctx = inlink->dst;
 301     do_vmafmotion(ctx, ref);
 302     return ff_filter_frame(ctx->outputs[0], ref);
 303 }
 304
 305 static av_cold int init(AVFilterContext *ctx)
 306 {
 307     VMAFMotionContext *s = ctx->priv;
 308
 309     if (s->stats_file_str) {
 310         if (!strcmp(s->stats_file_str, "-")) {
 311             s->stats_file = stdout;
 312         } else {
 313             s->stats_file = fopen(s->stats_file_str, "w");
 314             if (!s->stats_file) {
 315                 int err = AVERROR(errno);
 316                 char buf[128];
 317                 av_strerror(err, buf, sizeof(buf));
 318                 av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
 319                        s->stats_file_str, buf);
 320                 return err;
 321             }
 322         }
 323     }
 324
 325     return 0;
 326 }
 327
 328 static av_cold void uninit(AVFilterContext *ctx)
 329 {
 330     VMAFMotionContext *s = ctx->priv;
 331     double avg_motion = ff_vmafmotion_uninit(&s->data);
 332
 333     if (s->data.nb_frames > 0) {
 334         av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
 335     }
 336
 337     if (s->stats_file && s->stats_file != stdout)
 338         fclose(s->stats_file);
 339 }
 340
 341 static const AVFilterPad vmafmotion_inputs[] = {
 342     {
 343         .name         = "reference",
 344         .type         = AVMEDIA_TYPE_VIDEO,
 345         .filter_frame = filter_frame,
 346         .config_props = config_input_ref,
 347     },
 348     { NULL }
 349 };
 350
 351 static const AVFilterPad vmafmotion_outputs[] = {
 352     {
 353         .name          = "default",
 354         .type          = AVMEDIA_TYPE_VIDEO,
 355     },
 356     { NULL }
 357 };
 358
 359 AVFilter ff_vf_vmafmotion = {
 360     .name          = "vmafmotion",
 361     .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
 362     .init          = init,
 363     .uninit        = uninit,
 364     .query_formats = query_formats,
 365     .priv_size     = sizeof(VMAFMotionContext),
 366     .priv_class    = &vmafmotion_class,
 367     .inputs        = vmafmotion_inputs,
 368     .outputs       = vmafmotion_outputs,
 369 };