git.sesse.net Git - ffmpeg/blob - libavfilter/vf_libvmaf.c

   1 /*
   2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
   3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * Calculate the VMAF between two input videos.
  25  */
  26
  27 #include <inttypes.h>
  28 #include <pthread.h>
  29 #include <libvmaf.h>
  30 #include "libavutil/avstring.h"
  31 #include "libavutil/opt.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "avfilter.h"
  34 #include "dualinput.h"
  35 #include "drawutils.h"
  36 #include "formats.h"
  37 #include "internal.h"
  38 #include "video.h"
  39
  40 typedef struct LIBVMAFContext {
  41     const AVClass *class;
  42     FFDualInputContext dinput;
  43     const AVPixFmtDescriptor *desc;
  44     char *format;
  45     int width;
  46     int height;
  47     double vmaf_score;
  48     pthread_t vmaf_thread;
  49     pthread_mutex_t lock;
  50     pthread_cond_t cond;
  51     int eof;
  52     AVFrame *gmain;
  53     AVFrame *gref;
  54     int frame_set;
  55     char *model_path;
  56     char *log_path;
  57     char *log_fmt;
  58     int disable_clip;
  59     int disable_avx;
  60     int enable_transform;
  61     int phone_model;
  62     int psnr;
  63     int ssim;
  64     int ms_ssim;
  65     char *pool;
  66 } LIBVMAFContext;
  67
  68 #define OFFSET(x) offsetof(LIBVMAFContext, x)
  69 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  70
  71 static const AVOption libvmaf_options[] = {
  72     {"model_path",  "Set the model to be used for computing vmaf.",                     OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
  73     {"log_path",  "Set the file path to be used to store logs.",                        OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
  74     {"log_fmt",  "Set the format of the log (xml or json).",                            OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
  75     {"enable_transform",  "Enables transform for computing vmaf.",                      OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
  76     {"phone_model",  "Invokes the phone model that will generate higher VMAF scores.",  OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
  77     {"psnr",  "Enables computing psnr along with vmaf.",                                OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
  78     {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
  79     {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
  80     {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
  81     { NULL }
  82 };
  83
  84 AVFILTER_DEFINE_CLASS(libvmaf);
  85
  86 #define read_frame_fn(type, bits)                                               \
  87     static int read_frame_##bits##bit(float *ref_data, float *main_data,            \
  88                                       float *temp_data, int stride,             \
  89                                       double *score, void *ctx)                 \
  90 {                                                                               \
  91     LIBVMAFContext *s = (LIBVMAFContext *) ctx;                                       \
  92     int ret;                                                                    \
  93     \
  94     pthread_mutex_lock(&s->lock);                                               \
  95     \
  96     while (!s->frame_set && !s->eof) {                                          \
  97         pthread_cond_wait(&s->cond, &s->lock);                                  \
  98     }                                                                           \
  99     \
 100     if (s->frame_set) {                                                         \
 101         int ref_stride = s->gref->linesize[0];                                  \
 102         int main_stride = s->gmain->linesize[0];                                \
 103         \
 104         const type *ref_ptr = (const type *) s->gref->data[0];                  \
 105         const type *main_ptr = (const type *) s->gmain->data[0];                \
 106         \
 107         float *ptr = ref_data;                                                  \
 108         \
 109         int h = s->height;                                                      \
 110         int w = s->width;                                                       \
 111         \
 112         int i,j;                                                                \
 113         \
 114         for (i = 0; i < h; i++) {                                               \
 115             for ( j = 0; j < w; j++) {                                          \
 116                 ptr[j] = (float)ref_ptr[j];                                     \
 117             }                                                                   \
 118             ref_ptr += ref_stride / sizeof(*ref_ptr);                           \
 119             ptr += stride / sizeof(*ptr);                                       \
 120         }                                                                       \
 121         \
 122         ptr = main_data;                                                        \
 123         \
 124         for (i = 0; i < h; i++) {                                               \
 125             for (j = 0; j < w; j++) {                                           \
 126                 ptr[j] = (float)main_ptr[j];                                    \
 127             }                                                                   \
 128             main_ptr += main_stride / sizeof(*main_ptr);                        \
 129             ptr += stride / sizeof(*ptr);                                       \
 130         }                                                                       \
 131     }                                                                           \
 132     \
 133     ret = !s->frame_set;                                                        \
 134     \
 135     s->frame_set = 0;                                                           \
 136     \
 137     pthread_cond_signal(&s->cond);                                              \
 138     pthread_mutex_unlock(&s->lock);                                             \
 139     \
 140     if (ret) {                                                                  \
 141         return 2;                                                               \
 142     }                                                                           \
 143     \
 144     return 0;                                                                   \
 145 }
 146
 147 read_frame_fn(uint8_t, 8);
 148 read_frame_fn(uint16_t, 10);
 149
 150 static void compute_vmaf_score(LIBVMAFContext *s)
 151 {
 152     int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
 153                       int stride, double *score, void *ctx);
 154
 155     if (s->desc->comp[0].depth <= 8) {
 156         read_frame = read_frame_8bit;
 157     } else {
 158         read_frame = read_frame_10bit;
 159     }
 160
 161     s->vmaf_score = compute_vmaf(s->format, s->width, s->height, read_frame, s,
 162                                  s->model_path, s->log_path, s->log_fmt, 0, 0,
 163                                  s->enable_transform, s->phone_model, s->psnr,
 164                                  s->ssim, s->ms_ssim, s->pool);
 165 }
 166
 167 static void *call_vmaf(void *ctx)
 168 {
 169     LIBVMAFContext *s = (LIBVMAFContext *) ctx;
 170     compute_vmaf_score(s);
 171     av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
 172     pthread_exit(NULL);
 173 }
 174
 175 static AVFrame *do_vmaf(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
 176 {
 177     LIBVMAFContext *s = ctx->priv;
 178
 179     pthread_mutex_lock(&s->lock);
 180
 181     while (s->frame_set != 0) {
 182         pthread_cond_wait(&s->cond, &s->lock);
 183     }
 184
 185     av_frame_ref(s->gref, ref);
 186     av_frame_ref(s->gmain, main);
 187
 188     s->frame_set = 1;
 189
 190     pthread_cond_signal(&s->cond);
 191     pthread_mutex_unlock(&s->lock);
 192
 193     return main;
 194 }
 195
 196 static av_cold int init(AVFilterContext *ctx)
 197 {
 198     LIBVMAFContext *s = ctx->priv;
 199
 200     s->gref = av_frame_alloc();
 201     s->gmain = av_frame_alloc();
 202
 203     pthread_mutex_init(&s->lock, NULL);
 204     pthread_cond_init (&s->cond, NULL);
 205
 206     s->dinput.process = do_vmaf;
 207     return 0;
 208 }
 209
 210 static int query_formats(AVFilterContext *ctx)
 211 {
 212     static const enum AVPixelFormat pix_fmts[] = {
 213         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
 214         AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
 215         AV_PIX_FMT_NONE
 216     };
 217
 218     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
 219     if (!fmts_list)
 220         return AVERROR(ENOMEM);
 221     return ff_set_common_formats(ctx, fmts_list);
 222 }
 223
 224
 225 static int config_input_ref(AVFilterLink *inlink)
 226 {
 227     AVFilterContext *ctx  = inlink->dst;
 228     LIBVMAFContext *s = ctx->priv;
 229     int th;
 230
 231     if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
 232         ctx->inputs[0]->h != ctx->inputs[1]->h) {
 233         av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
 234         return AVERROR(EINVAL);
 235     }
 236     if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
 237         av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
 238         return AVERROR(EINVAL);
 239     }
 240
 241     s->desc = av_pix_fmt_desc_get(inlink->format);
 242     s->width = ctx->inputs[0]->w;
 243     s->height = ctx->inputs[0]->h;
 244
 245     th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
 246     if (th) {
 247         av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
 248         return AVERROR(EINVAL);
 249     }
 250
 251     return 0;
 252 }
 253
 254
 255 static int config_output(AVFilterLink *outlink)
 256 {
 257     AVFilterContext *ctx = outlink->src;
 258     LIBVMAFContext *s = ctx->priv;
 259     AVFilterLink *mainlink = ctx->inputs[0];
 260     int ret;
 261
 262     outlink->w = mainlink->w;
 263     outlink->h = mainlink->h;
 264     outlink->time_base = mainlink->time_base;
 265     outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
 266     outlink->frame_rate = mainlink->frame_rate;
 267     if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
 268         return ret;
 269
 270     return 0;
 271 }
 272
 273 static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
 274 {
 275     LIBVMAFContext *s = inlink->dst->priv;
 276     return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref);
 277 }
 278
 279 static int request_frame(AVFilterLink *outlink)
 280 {
 281     LIBVMAFContext *s = outlink->src->priv;
 282     return ff_dualinput_request_frame(&s->dinput, outlink);
 283 }
 284
 285 static av_cold void uninit(AVFilterContext *ctx)
 286 {
 287     LIBVMAFContext *s = ctx->priv;
 288
 289     ff_dualinput_uninit(&s->dinput);
 290
 291     pthread_mutex_lock(&s->lock);
 292     s->eof = 1;
 293     pthread_cond_signal(&s->cond);
 294     pthread_mutex_unlock(&s->lock);
 295
 296     pthread_join(s->vmaf_thread, NULL);
 297
 298     av_frame_free(&s->gref);
 299     av_frame_free(&s->gmain);
 300
 301     pthread_mutex_destroy(&s->lock);
 302     pthread_cond_destroy(&s->cond);
 303 }
 304
 305 static const AVFilterPad libvmaf_inputs[] = {
 306     {
 307         .name         = "main",
 308         .type         = AVMEDIA_TYPE_VIDEO,
 309         .filter_frame = filter_frame,
 310     },{
 311         .name         = "reference",
 312         .type         = AVMEDIA_TYPE_VIDEO,
 313         .filter_frame = filter_frame,
 314         .config_props = config_input_ref,
 315     },
 316     { NULL }
 317 };
 318
 319 static const AVFilterPad libvmaf_outputs[] = {
 320     {
 321         .name          = "default",
 322         .type          = AVMEDIA_TYPE_VIDEO,
 323         .config_props  = config_output,
 324         .request_frame = request_frame,
 325     },
 326     { NULL }
 327 };
 328
 329 AVFilter ff_vf_libvmaf = {
 330     .name          = "libvmaf",
 331     .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
 332     .init          = init,
 333     .uninit        = uninit,
 334     .query_formats = query_formats,
 335     .priv_size     = sizeof(LIBVMAFContext),
 336     .priv_class    = &libvmaf_class,
 337     .inputs        = libvmaf_inputs,
 338     .outputs       = libvmaf_outputs,
 339 };