]> git.sesse.net Git - ffmpeg/blob - libavfilter/vf_libvmaf.c
vf_zscale: Fix alpha destination graph for floating point pixel formats
[ffmpeg] / libavfilter / vf_libvmaf.c
1 /*
2  * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3  * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * Calculate the VMAF between two input videos.
25  */
26
27 #include <pthread.h>
28 #include <libvmaf.h>
29 #include "libavutil/avstring.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/pixdesc.h"
32 #include "avfilter.h"
33 #include "drawutils.h"
34 #include "formats.h"
35 #include "framesync.h"
36 #include "internal.h"
37 #include "video.h"
38
39 typedef struct LIBVMAFContext {
40     const AVClass *class;
41     FFFrameSync fs;
42     const AVPixFmtDescriptor *desc;
43     int width;
44     int height;
45     double vmaf_score;
46     pthread_t vmaf_thread;
47     pthread_mutex_t lock;
48     pthread_cond_t cond;
49     int eof;
50     AVFrame *gmain;
51     AVFrame *gref;
52     int frame_set;
53     char *model_path;
54     char *log_path;
55     char *log_fmt;
56     int disable_clip;
57     int disable_avx;
58     int enable_transform;
59     int phone_model;
60     int psnr;
61     int ssim;
62     int ms_ssim;
63     char *pool;
64 } LIBVMAFContext;
65
66 #define OFFSET(x) offsetof(LIBVMAFContext, x)
67 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
68
69 static const AVOption libvmaf_options[] = {
70     {"model_path",  "Set the model to be used for computing vmaf.",                     OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
71     {"log_path",  "Set the file path to be used to store logs.",                        OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
72     {"log_fmt",  "Set the format of the log (xml or json).",                            OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
73     {"enable_transform",  "Enables transform for computing vmaf.",                      OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
74     {"phone_model",  "Invokes the phone model that will generate higher VMAF scores.",  OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
75     {"psnr",  "Enables computing psnr along with vmaf.",                                OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
76     {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
77     {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
78     {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
79     { NULL }
80 };
81
82 FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs);
83
84 #define read_frame_fn(type, bits)                                               \
85     static int read_frame_##bits##bit(float *ref_data, float *main_data,        \
86                                       float *temp_data, int stride, void *ctx)  \
87 {                                                                               \
88     LIBVMAFContext *s = (LIBVMAFContext *) ctx;                                 \
89     int ret;                                                                    \
90     \
91     pthread_mutex_lock(&s->lock);                                               \
92     \
93     while (!s->frame_set && !s->eof) {                                          \
94         pthread_cond_wait(&s->cond, &s->lock);                                  \
95     }                                                                           \
96     \
97     if (s->frame_set) {                                                         \
98         int ref_stride = s->gref->linesize[0];                                  \
99         int main_stride = s->gmain->linesize[0];                                \
100         \
101         const type *ref_ptr = (const type *) s->gref->data[0];                  \
102         const type *main_ptr = (const type *) s->gmain->data[0];                \
103         \
104         float *ptr = ref_data;                                                  \
105         \
106         int h = s->height;                                                      \
107         int w = s->width;                                                       \
108         \
109         int i,j;                                                                \
110         \
111         for (i = 0; i < h; i++) {                                               \
112             for ( j = 0; j < w; j++) {                                          \
113                 ptr[j] = (float)ref_ptr[j];                                     \
114             }                                                                   \
115             ref_ptr += ref_stride / sizeof(*ref_ptr);                           \
116             ptr += stride / sizeof(*ptr);                                       \
117         }                                                                       \
118         \
119         ptr = main_data;                                                        \
120         \
121         for (i = 0; i < h; i++) {                                               \
122             for (j = 0; j < w; j++) {                                           \
123                 ptr[j] = (float)main_ptr[j];                                    \
124             }                                                                   \
125             main_ptr += main_stride / sizeof(*main_ptr);                        \
126             ptr += stride / sizeof(*ptr);                                       \
127         }                                                                       \
128     }                                                                           \
129     \
130     ret = !s->frame_set;                                                        \
131     \
132     s->frame_set = 0;                                                           \
133     \
134     pthread_cond_signal(&s->cond);                                              \
135     pthread_mutex_unlock(&s->lock);                                             \
136     \
137     if (ret) {                                                                  \
138         return 2;                                                               \
139     }                                                                           \
140     \
141     return 0;                                                                   \
142 }
143
144 read_frame_fn(uint8_t, 8);
145 read_frame_fn(uint16_t, 10);
146
147 static void compute_vmaf_score(LIBVMAFContext *s)
148 {
149     int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
150                       int stride, void *ctx);
151     char *format;
152
153     if (s->desc->comp[0].depth <= 8) {
154         read_frame = read_frame_8bit;
155     } else {
156         read_frame = read_frame_10bit;
157     }
158
159     format = (char *) s->desc->name;
160
161     s->vmaf_score = compute_vmaf(format, s->width, s->height, read_frame, s,
162                                  s->model_path, s->log_path, s->log_fmt, 0, 0,
163                                  s->enable_transform, s->phone_model, s->psnr,
164                                  s->ssim, s->ms_ssim, s->pool);
165 }
166
167 static void *call_vmaf(void *ctx)
168 {
169     LIBVMAFContext *s = (LIBVMAFContext *) ctx;
170     compute_vmaf_score(s);
171     av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
172     pthread_exit(NULL);
173 }
174
175 static int do_vmaf(FFFrameSync *fs)
176 {
177     AVFilterContext *ctx = fs->parent;
178     LIBVMAFContext *s = ctx->priv;
179     AVFrame *master, *ref;
180     int ret;
181
182     ret = ff_framesync_dualinput_get(fs, &master, &ref);
183     if (ret < 0)
184         return ret;
185     if (!ref)
186         return ff_filter_frame(ctx->outputs[0], master);
187
188     pthread_mutex_lock(&s->lock);
189
190     while (s->frame_set != 0) {
191         pthread_cond_wait(&s->cond, &s->lock);
192     }
193
194     av_frame_ref(s->gref, ref);
195     av_frame_ref(s->gmain, master);
196
197     s->frame_set = 1;
198
199     pthread_cond_signal(&s->cond);
200     pthread_mutex_unlock(&s->lock);
201
202     return ff_filter_frame(ctx->outputs[0], master);
203 }
204
205 static av_cold int init(AVFilterContext *ctx)
206 {
207     LIBVMAFContext *s = ctx->priv;
208
209     s->gref = av_frame_alloc();
210     s->gmain = av_frame_alloc();
211
212     pthread_mutex_init(&s->lock, NULL);
213     pthread_cond_init (&s->cond, NULL);
214
215     s->fs.on_event = do_vmaf;
216     return 0;
217 }
218
219 static int query_formats(AVFilterContext *ctx)
220 {
221     static const enum AVPixelFormat pix_fmts[] = {
222         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
223         AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
224         AV_PIX_FMT_NONE
225     };
226
227     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
228     if (!fmts_list)
229         return AVERROR(ENOMEM);
230     return ff_set_common_formats(ctx, fmts_list);
231 }
232
233
234 static int config_input_ref(AVFilterLink *inlink)
235 {
236     AVFilterContext *ctx  = inlink->dst;
237     LIBVMAFContext *s = ctx->priv;
238     int th;
239
240     if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
241         ctx->inputs[0]->h != ctx->inputs[1]->h) {
242         av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
243         return AVERROR(EINVAL);
244     }
245     if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
246         av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
247         return AVERROR(EINVAL);
248     }
249
250     s->desc = av_pix_fmt_desc_get(inlink->format);
251     s->width = ctx->inputs[0]->w;
252     s->height = ctx->inputs[0]->h;
253
254     th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
255     if (th) {
256         av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
257         return AVERROR(EINVAL);
258     }
259
260     return 0;
261 }
262
263 static int config_output(AVFilterLink *outlink)
264 {
265     AVFilterContext *ctx = outlink->src;
266     LIBVMAFContext *s = ctx->priv;
267     AVFilterLink *mainlink = ctx->inputs[0];
268     int ret;
269
270     ret = ff_framesync_init_dualinput(&s->fs, ctx);
271     if (ret < 0)
272         return ret;
273     outlink->w = mainlink->w;
274     outlink->h = mainlink->h;
275     outlink->time_base = mainlink->time_base;
276     outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
277     outlink->frame_rate = mainlink->frame_rate;
278     if ((ret = ff_framesync_configure(&s->fs)) < 0)
279         return ret;
280
281     return 0;
282 }
283
284 static int activate(AVFilterContext *ctx)
285 {
286     LIBVMAFContext *s = ctx->priv;
287     return ff_framesync_activate(&s->fs);
288 }
289
290 static av_cold void uninit(AVFilterContext *ctx)
291 {
292     LIBVMAFContext *s = ctx->priv;
293
294     ff_framesync_uninit(&s->fs);
295
296     pthread_mutex_lock(&s->lock);
297     s->eof = 1;
298     pthread_cond_signal(&s->cond);
299     pthread_mutex_unlock(&s->lock);
300
301     pthread_join(s->vmaf_thread, NULL);
302
303     av_frame_free(&s->gref);
304     av_frame_free(&s->gmain);
305
306     pthread_mutex_destroy(&s->lock);
307     pthread_cond_destroy(&s->cond);
308 }
309
310 static const AVFilterPad libvmaf_inputs[] = {
311     {
312         .name         = "main",
313         .type         = AVMEDIA_TYPE_VIDEO,
314     },{
315         .name         = "reference",
316         .type         = AVMEDIA_TYPE_VIDEO,
317         .config_props = config_input_ref,
318     },
319     { NULL }
320 };
321
322 static const AVFilterPad libvmaf_outputs[] = {
323     {
324         .name          = "default",
325         .type          = AVMEDIA_TYPE_VIDEO,
326         .config_props  = config_output,
327     },
328     { NULL }
329 };
330
331 AVFilter ff_vf_libvmaf = {
332     .name          = "libvmaf",
333     .description   = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
334     .preinit       = libvmaf_framesync_preinit,
335     .init          = init,
336     .uninit        = uninit,
337     .query_formats = query_formats,
338     .activate      = activate,
339     .priv_size     = sizeof(LIBVMAFContext),
340     .priv_class    = &libvmaf_class,
341     .inputs        = libvmaf_inputs,
342     .outputs       = libvmaf_outputs,
343 };