]> git.sesse.net Git - ffmpeg/blob - libavfilter/vf_libvmaf.c
Merge commit '07a2b155949eb267cdfc7805f42c7b3375f9c7c5'
[ffmpeg] / libavfilter / vf_libvmaf.c
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * Calculate the VMAF between two input videos.
25  */
26
27 #include <pthread.h>
28 #include <libvmaf.h>
29 #include "libavutil/avstring.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "drawutils.h"
34 #include "formats.h"
35 #include "framesync.h"
36 #include "internal.h"
37 #include "video.h"
38
39 typedef struct LIBVMAFContext {
40     const AVClass *class;
41     FFFrameSync fs;
42     const AVPixFmtDescriptor *desc;
43     char *format;
44     int width;
45     int height;
46     double vmaf_score;
47     pthread_t vmaf_thread;
48     pthread_mutex_t lock;
49     pthread_cond_t cond;
50     int eof;
51     AVFrame *gmain;
52     AVFrame *gref;
53     int frame_set;
54     char *model_path;
55     char *log_path;
56     char *log_fmt;
57     int disable_clip;
58     int disable_avx;
59     int enable_transform;
60     int phone_model;
61     int psnr;
62     int ssim;
63     int ms_ssim;
64     char *pool;
65 } LIBVMAFContext;
66
67 #define OFFSET(x) offsetof(LIBVMAFContext, x)
68 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
69
70 static const AVOption libvmaf_options[] = {
71     {"model_path",  "Set the model to be used for computing vmaf.",                     OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
72     {"log_path",  "Set the file path to be used to store logs.",                        OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
73     {"log_fmt",  "Set the format of the log (xml or json).",                            OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
74     {"enable_transform",  "Enables transform for computing vmaf.",                      OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
75     {"phone_model",  "Invokes the phone model that will generate higher VMAF scores.",  OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
76     {"psnr",  "Enables computing psnr along with vmaf.",                                OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
77     {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
78     {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
79     {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
80     { NULL }
81 };
82
83 FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs);
84
85 #define read_frame_fn(type, bits)                                               \
86     static int read_frame_##bits##bit(float *ref_data, float *main_data,        \
87                                       float *temp_data, int stride, void *ctx)  \
88 {                                                                               \
89     LIBVMAFContext *s = (LIBVMAFContext *) ctx;                                 \
90     int ret;                                                                    \
91     \
92     pthread_mutex_lock(&s->lock);                                               \
93     \
94     while (!s->frame_set && !s->eof) {                                          \
95         pthread_cond_wait(&s->cond, &s->lock);                                  \
96     }                                                                           \
97     \
98     if (s->frame_set) {                                                         \
99         int ref_stride = s->gref->linesize[0];                                  \
100         int main_stride = s->gmain->linesize[0];                                \
101         \
102         const type *ref_ptr = (const type *) s->gref->data[0];                  \
103         const type *main_ptr = (const type *) s->gmain->data[0];                \
104         \
105         float *ptr = ref_data;                                                  \
106         \
107         int h = s->height;                                                      \
108         int w = s->width;                                                       \
109         \
110         int i,j;                                                                \
111         \
112         for (i = 0; i < h; i++) {                                               \
113             for ( j = 0; j < w; j++) {                                          \
114                 ptr[j] = (float)ref_ptr[j];                                     \
115             }                                                                   \
116             ref_ptr += ref_stride / sizeof(*ref_ptr);                           \
117             ptr += stride / sizeof(*ptr);                                       \
118         }                                                                       \
119         \
120         ptr = main_data;                                                        \
121         \
122         for (i = 0; i < h; i++) {                                               \
123             for (j = 0; j < w; j++) {                                           \
124                 ptr[j] = (float)main_ptr[j];                                    \
125             }                                                                   \
126             main_ptr += main_stride / sizeof(*main_ptr);                        \
127             ptr += stride / sizeof(*ptr);                                       \
128         }                                                                       \
129     }                                                                           \
130     \
131     ret = !s->frame_set;                                                        \
132     \
133     s->frame_set = 0;                                                           \
134     \
135     pthread_cond_signal(&s->cond);                                              \
136     pthread_mutex_unlock(&s->lock);                                             \
137     \
138     if (ret) {                                                                  \
139         return 2;                                                               \
140     }                                                                           \
141     \
142     return 0;                                                                   \
143 }
144
145 read_frame_fn(uint8_t, 8);
146 read_frame_fn(uint16_t, 10);
147
148 static void compute_vmaf_score(LIBVMAFContext *s)
149 {
150     int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
151                       int stride, void *ctx);
152
153     if (s->desc->comp[0].depth <= 8) {
154         read_frame = read_frame_8bit;
155     } else {
156         read_frame = read_frame_10bit;
157     }
158
159     s->vmaf_score = compute_vmaf(s->format, s->width, s->height, read_frame, s,
160                                  s->model_path, s->log_path, s->log_fmt, 0, 0,
161                                  s->enable_transform, s->phone_model, s->psnr,
162                                  s->ssim, s->ms_ssim, s->pool);
163 }
164
165 static void *call_vmaf(void *ctx)
166 {
167     LIBVMAFContext *s = (LIBVMAFContext *) ctx;
168     compute_vmaf_score(s);
169     av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
170     pthread_exit(NULL);
171 }
172
173 static int do_vmaf(FFFrameSync *fs)
174 {
175     AVFilterContext *ctx = fs->parent;
176     LIBVMAFContext *s = ctx->priv;
177     AVFrame *main, *ref;
178     int ret;
179
180     ret = ff_framesync_dualinput_get(fs, &main, &ref);
181     if (ret < 0)
182         return ret;
183     if (!ref)
184         return ff_filter_frame(ctx->outputs[0], main);
185
186     pthread_mutex_lock(&s->lock);
187
188     while (s->frame_set != 0) {
189         pthread_cond_wait(&s->cond, &s->lock);
190     }
191
192     av_frame_ref(s->gref, ref);
193     av_frame_ref(s->gmain, main);
194
195     s->frame_set = 1;
196
197     pthread_cond_signal(&s->cond);
198     pthread_mutex_unlock(&s->lock);
199
200     return ff_filter_frame(ctx->outputs[0], main);
201 }
202
203 static av_cold int init(AVFilterContext *ctx)
204 {
205     LIBVMAFContext *s = ctx->priv;
206
207     s->gref = av_frame_alloc();
208     s->gmain = av_frame_alloc();
209
210     pthread_mutex_init(&s->lock, NULL);
211     pthread_cond_init (&s->cond, NULL);
212
213     s->fs.on_event = do_vmaf;
214     return 0;
215 }
216
217 static int query_formats(AVFilterContext *ctx)
218 {
219     static const enum AVPixelFormat pix_fmts[] = {
220         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
221         AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
222         AV_PIX_FMT_NONE
223     };
224
225     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
226     if (!fmts_list)
227         return AVERROR(ENOMEM);
228     return ff_set_common_formats(ctx, fmts_list);
229 }
230
231
232 static int config_input_ref(AVFilterLink *inlink)
233 {
234     AVFilterContext *ctx  = inlink->dst;
235     LIBVMAFContext *s = ctx->priv;
236     int th;
237
238     if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
239         ctx->inputs[0]->h != ctx->inputs[1]->h) {
240         av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
241         return AVERROR(EINVAL);
242     }
243     if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
244         av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
245         return AVERROR(EINVAL);
246     }
247
248     s->desc = av_pix_fmt_desc_get(inlink->format);
249     s->width = ctx->inputs[0]->w;
250     s->height = ctx->inputs[0]->h;
251
252     th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
253     if (th) {
254         av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
255         return AVERROR(EINVAL);
256     }
257
258     return 0;
259 }
260
261
262 static int config_output(AVFilterLink *outlink)
263 {
264     AVFilterContext *ctx = outlink->src;
265     LIBVMAFContext *s = ctx->priv;
266     AVFilterLink *mainlink = ctx->inputs[0];
267     int ret;
268
269     ret = ff_framesync_init_dualinput(&s->fs, ctx);
270     if (ret < 0)
271         return ret;
272     outlink->w = mainlink->w;
273     outlink->h = mainlink->h;
274     outlink->time_base = mainlink->time_base;
275     outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
276     outlink->frame_rate = mainlink->frame_rate;
277     if ((ret = ff_framesync_configure(&s->fs)) < 0)
278         return ret;
279
280     return 0;
281 }
282
283 static int activate(AVFilterContext *ctx)
284 {
285     LIBVMAFContext *s = ctx->priv;
286     return ff_framesync_activate(&s->fs);
287 }
288
289 static av_cold void uninit(AVFilterContext *ctx)
290 {
291     LIBVMAFContext *s = ctx->priv;
292
293     ff_framesync_uninit(&s->fs);
294
295     pthread_mutex_lock(&s->lock);
296     s->eof = 1;
297     pthread_cond_signal(&s->cond);
298     pthread_mutex_unlock(&s->lock);
299
300     pthread_join(s->vmaf_thread, NULL);
301
302     av_frame_free(&s->gref);
303     av_frame_free(&s->gmain);
304
305     pthread_mutex_destroy(&s->lock);
306     pthread_cond_destroy(&s->cond);
307 }
308
309 static const AVFilterPad libvmaf_inputs[] = {
310     {
311         .name         = "main",
312         .type         = AVMEDIA_TYPE_VIDEO,
313     },{
314         .name         = "reference",
315         .type         = AVMEDIA_TYPE_VIDEO,
316         .config_props = config_input_ref,
317     },
318     { NULL }
319 };
320
321 static const AVFilterPad libvmaf_outputs[] = {
322     {
323         .name          = "default",
324         .type          = AVMEDIA_TYPE_VIDEO,
325         .config_props  = config_output,
326     },
327     { NULL }
328 };
329
330 AVFilter ff_vf_libvmaf = {
331     .name          = "libvmaf",
332     .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
333     .preinit       = libvmaf_framesync_preinit,
334     .init          = init,
335     .uninit        = uninit,
336     .query_formats = query_formats,
337     .activate      = activate,
338     .priv_size     = sizeof(LIBVMAFContext),
339     .priv_class    = &libvmaf_class,
340     .inputs        = libvmaf_inputs,
341     .outputs       = libvmaf_outputs,
342 };