]> git.sesse.net Git - ffmpeg/blob - libavfilter/vf_libvmaf.c
Merge commit '5b6213ef6bf5e0781c83e86926eb0b33a98dc185'
[ffmpeg] / libavfilter / vf_libvmaf.c
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * Calculate the VMAF between two input videos.
25  */
26
27 #include <pthread.h>
28 #include <libvmaf.h>
29 #include "libavutil/avstring.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "drawutils.h"
34 #include "formats.h"
35 #include "framesync.h"
36 #include "internal.h"
37 #include "video.h"
38
39 typedef struct LIBVMAFContext {
40     const AVClass *class;
41     FFFrameSync fs;
42     const AVPixFmtDescriptor *desc;
43     int width;
44     int height;
45     double vmaf_score;
46     pthread_t vmaf_thread;
47     pthread_mutex_t lock;
48     pthread_cond_t cond;
49     int eof;
50     AVFrame *gmain;
51     AVFrame *gref;
52     int frame_set;
53     char *model_path;
54     char *log_path;
55     char *log_fmt;
56     int disable_clip;
57     int disable_avx;
58     int enable_transform;
59     int phone_model;
60     int psnr;
61     int ssim;
62     int ms_ssim;
63     char *pool;
64     int error;
65 } LIBVMAFContext;
66
67 #define OFFSET(x) offsetof(LIBVMAFContext, x)
68 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
69
70 static const AVOption libvmaf_options[] = {
71     {"model_path",  "Set the model to be used for computing vmaf.",                     OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
72     {"log_path",  "Set the file path to be used to store logs.",                        OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
73     {"log_fmt",  "Set the format of the log (xml or json).",                            OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
74     {"enable_transform",  "Enables transform for computing vmaf.",                      OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
75     {"phone_model",  "Invokes the phone model that will generate higher VMAF scores.",  OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
76     {"psnr",  "Enables computing psnr along with vmaf.",                                OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
77     {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
78     {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
79     {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
80     { NULL }
81 };
82
83 FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs);
84
85 #define read_frame_fn(type, bits)                                               \
86     static int read_frame_##bits##bit(float *ref_data, float *main_data,        \
87                                       float *temp_data, int stride, void *ctx)  \
88 {                                                                               \
89     LIBVMAFContext *s = (LIBVMAFContext *) ctx;                                 \
90     int ret;                                                                    \
91     \
92     pthread_mutex_lock(&s->lock);                                               \
93     \
94     while (!s->frame_set && !s->eof) {                                          \
95         pthread_cond_wait(&s->cond, &s->lock);                                  \
96     }                                                                           \
97     \
98     if (s->frame_set) {                                                         \
99         int ref_stride = s->gref->linesize[0];                                  \
100         int main_stride = s->gmain->linesize[0];                                \
101         \
102         const type *ref_ptr = (const type *) s->gref->data[0];                  \
103         const type *main_ptr = (const type *) s->gmain->data[0];                \
104         \
105         float *ptr = ref_data;                                                  \
106         \
107         int h = s->height;                                                      \
108         int w = s->width;                                                       \
109         \
110         int i,j;                                                                \
111         \
112         for (i = 0; i < h; i++) {                                               \
113             for ( j = 0; j < w; j++) {                                          \
114                 ptr[j] = (float)ref_ptr[j];                                     \
115             }                                                                   \
116             ref_ptr += ref_stride / sizeof(*ref_ptr);                           \
117             ptr += stride / sizeof(*ptr);                                       \
118         }                                                                       \
119         \
120         ptr = main_data;                                                        \
121         \
122         for (i = 0; i < h; i++) {                                               \
123             for (j = 0; j < w; j++) {                                           \
124                 ptr[j] = (float)main_ptr[j];                                    \
125             }                                                                   \
126             main_ptr += main_stride / sizeof(*main_ptr);                        \
127             ptr += stride / sizeof(*ptr);                                       \
128         }                                                                       \
129     }                                                                           \
130     \
131     ret = !s->frame_set;                                                        \
132     \
133     s->frame_set = 0;                                                           \
134     \
135     pthread_cond_signal(&s->cond);                                              \
136     pthread_mutex_unlock(&s->lock);                                             \
137     \
138     if (ret) {                                                                  \
139         return 2;                                                               \
140     }                                                                           \
141     \
142     return 0;                                                                   \
143 }
144
145 read_frame_fn(uint8_t, 8);
146 read_frame_fn(uint16_t, 10);
147
148 static void compute_vmaf_score(LIBVMAFContext *s)
149 {
150     int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
151                       int stride, void *ctx);
152     char *format;
153
154     if (s->desc->comp[0].depth <= 8) {
155         read_frame = read_frame_8bit;
156     } else {
157         read_frame = read_frame_10bit;
158     }
159
160     format = (char *) s->desc->name;
161
162     s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
163                             read_frame, s, s->model_path, s->log_path,
164                             s->log_fmt, 0, 0, s->enable_transform,
165                             s->phone_model, s->psnr, s->ssim,
166                             s->ms_ssim, s->pool);
167 }
168
169 static void *call_vmaf(void *ctx)
170 {
171     LIBVMAFContext *s = (LIBVMAFContext *) ctx;
172     compute_vmaf_score(s);
173     if (!s->error) {
174         av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
175     } else {
176         pthread_mutex_lock(&s->lock);
177         pthread_cond_signal(&s->cond);
178         pthread_mutex_unlock(&s->lock);
179     }
180     pthread_exit(NULL);
181     return NULL;
182 }
183
184 static int do_vmaf(FFFrameSync *fs)
185 {
186     AVFilterContext *ctx = fs->parent;
187     LIBVMAFContext *s = ctx->priv;
188     AVFrame *master, *ref;
189     int ret;
190
191     ret = ff_framesync_dualinput_get(fs, &master, &ref);
192     if (ret < 0)
193         return ret;
194     if (!ref)
195         return ff_filter_frame(ctx->outputs[0], master);
196
197     pthread_mutex_lock(&s->lock);
198
199     while (s->frame_set && !s->error) {
200         pthread_cond_wait(&s->cond, &s->lock);
201     }
202
203     if (s->error) {
204         av_log(ctx, AV_LOG_ERROR,
205                "libvmaf encountered an error, check log for details\n");
206         pthread_mutex_unlock(&s->lock);
207         return AVERROR(EINVAL);
208     }
209
210     av_frame_ref(s->gref, ref);
211     av_frame_ref(s->gmain, master);
212
213     s->frame_set = 1;
214
215     pthread_cond_signal(&s->cond);
216     pthread_mutex_unlock(&s->lock);
217
218     return ff_filter_frame(ctx->outputs[0], master);
219 }
220
221 static av_cold int init(AVFilterContext *ctx)
222 {
223     LIBVMAFContext *s = ctx->priv;
224
225     s->gref = av_frame_alloc();
226     s->gmain = av_frame_alloc();
227     s->error = 0;
228
229     pthread_mutex_init(&s->lock, NULL);
230     pthread_cond_init (&s->cond, NULL);
231
232     s->fs.on_event = do_vmaf;
233     return 0;
234 }
235
236 static int query_formats(AVFilterContext *ctx)
237 {
238     static const enum AVPixelFormat pix_fmts[] = {
239         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
240         AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
241         AV_PIX_FMT_NONE
242     };
243
244     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
245     if (!fmts_list)
246         return AVERROR(ENOMEM);
247     return ff_set_common_formats(ctx, fmts_list);
248 }
249
250
251 static int config_input_ref(AVFilterLink *inlink)
252 {
253     AVFilterContext *ctx  = inlink->dst;
254     LIBVMAFContext *s = ctx->priv;
255     int th;
256
257     if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
258         ctx->inputs[0]->h != ctx->inputs[1]->h) {
259         av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
260         return AVERROR(EINVAL);
261     }
262     if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
263         av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
264         return AVERROR(EINVAL);
265     }
266
267     s->desc = av_pix_fmt_desc_get(inlink->format);
268     s->width = ctx->inputs[0]->w;
269     s->height = ctx->inputs[0]->h;
270
271     th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
272     if (th) {
273         av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
274         return AVERROR(EINVAL);
275     }
276
277     return 0;
278 }
279
280 static int config_output(AVFilterLink *outlink)
281 {
282     AVFilterContext *ctx = outlink->src;
283     LIBVMAFContext *s = ctx->priv;
284     AVFilterLink *mainlink = ctx->inputs[0];
285     int ret;
286
287     ret = ff_framesync_init_dualinput(&s->fs, ctx);
288     if (ret < 0)
289         return ret;
290     outlink->w = mainlink->w;
291     outlink->h = mainlink->h;
292     outlink->time_base = mainlink->time_base;
293     outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
294     outlink->frame_rate = mainlink->frame_rate;
295     if ((ret = ff_framesync_configure(&s->fs)) < 0)
296         return ret;
297
298     return 0;
299 }
300
301 static int activate(AVFilterContext *ctx)
302 {
303     LIBVMAFContext *s = ctx->priv;
304     return ff_framesync_activate(&s->fs);
305 }
306
307 static av_cold void uninit(AVFilterContext *ctx)
308 {
309     LIBVMAFContext *s = ctx->priv;
310
311     ff_framesync_uninit(&s->fs);
312
313     pthread_mutex_lock(&s->lock);
314     s->eof = 1;
315     pthread_cond_signal(&s->cond);
316     pthread_mutex_unlock(&s->lock);
317
318     pthread_join(s->vmaf_thread, NULL);
319
320     av_frame_free(&s->gref);
321     av_frame_free(&s->gmain);
322
323     pthread_mutex_destroy(&s->lock);
324     pthread_cond_destroy(&s->cond);
325 }
326
327 static const AVFilterPad libvmaf_inputs[] = {
328     {
329         .name         = "main",
330         .type         = AVMEDIA_TYPE_VIDEO,
331     },{
332         .name         = "reference",
333         .type         = AVMEDIA_TYPE_VIDEO,
334         .config_props = config_input_ref,
335     },
336     { NULL }
337 };
338
339 static const AVFilterPad libvmaf_outputs[] = {
340     {
341         .name          = "default",
342         .type          = AVMEDIA_TYPE_VIDEO,
343         .config_props  = config_output,
344     },
345     { NULL }
346 };
347
348 AVFilter ff_vf_libvmaf = {
349     .name          = "libvmaf",
350     .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
351     .preinit       = libvmaf_framesync_preinit,
352     .init          = init,
353     .uninit        = uninit,
354     .query_formats = query_formats,
355     .activate      = activate,
356     .priv_size     = sizeof(LIBVMAFContext),
357     .priv_class    = &libvmaf_class,
358     .inputs        = libvmaf_inputs,
359     .outputs       = libvmaf_outputs,
360 };