git.sesse.net Git - ffmpeg/blob - libavfilter/vf_identity.c

   1 /*
   2  * Copyright (c) 2021 Paul B Mahol
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * Caculate the Identity between two input videos.
  24  */
  25
  26 #include "libavutil/avstring.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/pixdesc.h"
  29 #include "avfilter.h"
  30 #include "drawutils.h"
  31 #include "formats.h"
  32 #include "framesync.h"
  33 #include "internal.h"
  34 #include "video.h"
  35 #include "scene_sad.h"
  36
  37 typedef struct IdentityContext {
  38     const AVClass *class;
  39     FFFrameSync fs;
  40     double score, min_score, max_score, score_comp[4];
  41     uint64_t nb_frames;
  42     int is_rgb;
  43     int is_msad;
  44     uint8_t rgba_map[4];
  45     int max[4];
  46     char comps[4];
  47     int nb_components;
  48     int nb_threads;
  49     int planewidth[4];
  50     int planeheight[4];
  51     uint64_t **scores;
  52     unsigned (*filter_line)(const uint8_t *buf, const uint8_t *ref, int w);
  53     int (*filter_slice)(AVFilterContext *ctx, void *arg,
  54                         int jobnr, int nb_jobs);
  55     ff_scene_sad_fn sad;
  56 } IdentityContext;
  57
  58 #define OFFSET(x) offsetof(IdentityContext, x)
  59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  60
  61 static unsigned identity_line_8bit(const uint8_t *main_line,  const uint8_t *ref_line, int outw)
  62 {
  63     unsigned score = 0;
  64
  65     for (int j = 0; j < outw; j++)
  66         score += main_line[j] == ref_line[j];
  67
  68     return score;
  69 }
  70
  71 static unsigned identity_line_16bit(const uint8_t *mmain_line, const uint8_t *rref_line, int outw)
  72 {
  73     const uint16_t *main_line = (const uint16_t *)mmain_line;
  74     const uint16_t *ref_line = (const uint16_t *)rref_line;
  75     unsigned score = 0;
  76
  77     for (int j = 0; j < outw; j++)
  78         score += main_line[j] == ref_line[j];
  79
  80     return score;
  81 }
  82
  83 typedef struct ThreadData {
  84     const uint8_t *main_data[4];
  85     const uint8_t *ref_data[4];
  86     int main_linesize[4];
  87     int ref_linesize[4];
  88     int planewidth[4];
  89     int planeheight[4];
  90     uint64_t **score;
  91     int nb_components;
  92 } ThreadData;
  93
  94 static
  95 int compute_images_msad(AVFilterContext *ctx, void *arg,
  96                         int jobnr, int nb_jobs)
  97 {
  98     IdentityContext *s = ctx->priv;
  99     ThreadData *td = arg;
 100     uint64_t *score = td->score[jobnr];
 101
 102     for (int c = 0; c < td->nb_components; c++) {
 103         const int outw = td->planewidth[c];
 104         const int outh = td->planeheight[c];
 105         const int slice_start = (outh * jobnr) / nb_jobs;
 106         const int slice_end = (outh * (jobnr+1)) / nb_jobs;
 107         const int ref_linesize = td->ref_linesize[c];
 108         const int main_linesize = td->main_linesize[c];
 109         const uint8_t *main_line = td->main_data[c] + main_linesize * slice_start;
 110         const uint8_t *ref_line = td->ref_data[c] + ref_linesize * slice_start;
 111         uint64_t m = 0;
 112
 113         s->sad(main_line, main_linesize, ref_line, ref_linesize,
 114                outw, slice_end - slice_start, &m);
 115
 116         score[c] = m;
 117     }
 118
 119     return 0;
 120 }
 121
 122 static
 123 int compute_images_identity(AVFilterContext *ctx, void *arg,
 124                             int jobnr, int nb_jobs)
 125 {
 126     IdentityContext *s = ctx->priv;
 127     ThreadData *td = arg;
 128     uint64_t *score = td->score[jobnr];
 129
 130     for (int c = 0; c < td->nb_components; c++) {
 131         const int outw = td->planewidth[c];
 132         const int outh = td->planeheight[c];
 133         const int slice_start = (outh * jobnr) / nb_jobs;
 134         const int slice_end = (outh * (jobnr+1)) / nb_jobs;
 135         const int ref_linesize = td->ref_linesize[c];
 136         const int main_linesize = td->main_linesize[c];
 137         const uint8_t *main_line = td->main_data[c] + main_linesize * slice_start;
 138         const uint8_t *ref_line = td->ref_data[c] + ref_linesize * slice_start;
 139         uint64_t m = 0;
 140
 141         for (int i = slice_start; i < slice_end; i++) {
 142             m += s->filter_line(main_line, ref_line, outw);
 143             ref_line += ref_linesize;
 144             main_line += main_linesize;
 145         }
 146         score[c] = m;
 147     }
 148
 149     return 0;
 150 }
 151
 152 static void set_meta(AVFilterContext *ctx,
 153                      AVDictionary **metadata, const char *key, char comp, float d)
 154 {
 155     char value[128];
 156     snprintf(value, sizeof(value), "%f", d);
 157     if (comp) {
 158         char key2[128];
 159         snprintf(key2, sizeof(key2), "lavfi.%s.%s%s%c",
 160                  ctx->filter->name, ctx->filter->name, key, comp);
 161         av_dict_set(metadata, key2, value, 0);
 162     } else {
 163         char key2[128];
 164         snprintf(key2, sizeof(key2), "lavfi.%s.%s%s",
 165                  ctx->filter->name, ctx->filter->name, key);
 166         av_dict_set(metadata, key2, value, 0);
 167     }
 168 }
 169
 170 static int do_identity(FFFrameSync *fs)
 171 {
 172     AVFilterContext *ctx = fs->parent;
 173     IdentityContext *s = ctx->priv;
 174     AVFrame *master, *ref;
 175     double comp_score[4], score = 0.;
 176     uint64_t comp_sum[4] = { 0 };
 177     AVDictionary **metadata;
 178     ThreadData td;
 179     int ret;
 180
 181     ret = ff_framesync_dualinput_get(fs, &master, &ref);
 182     if (ret < 0)
 183         return ret;
 184     if (ctx->is_disabled || !ref)
 185         return ff_filter_frame(ctx->outputs[0], master);
 186     metadata = &master->metadata;
 187
 188     td.nb_components = s->nb_components;
 189     td.score = s->scores;
 190     for (int c = 0; c < s->nb_components; c++) {
 191         td.main_data[c] = master->data[c];
 192         td.ref_data[c] = ref->data[c];
 193         td.main_linesize[c] = master->linesize[c];
 194         td.ref_linesize[c] = ref->linesize[c];
 195         td.planewidth[c] = s->planewidth[c];
 196         td.planeheight[c] = s->planeheight[c];
 197     }
 198
 199     ctx->internal->execute(ctx, s->filter_slice, &td, NULL, FFMIN(s->planeheight[1], s->nb_threads));
 200
 201     for (int j = 0; j < s->nb_threads; j++) {
 202         for (int c = 0; c < s->nb_components; c++)
 203             comp_sum[c] += s->scores[j][c];
 204     }
 205
 206     for (int c = 0; c < s->nb_components; c++)
 207         comp_score[c] = comp_sum[c] / ((double)s->planewidth[c] * s->planeheight[c]);
 208
 209     for (int c = 0; c < s->nb_components && s->is_msad; c++)
 210         comp_score[c] /= (double)s->max[c];
 211
 212     for (int c = 0; c < s->nb_components; c++)
 213         score += comp_score[c];
 214     score /= s->nb_components;
 215
 216     s->min_score = FFMIN(s->min_score, score);
 217     s->max_score = FFMAX(s->max_score, score);
 218
 219     s->score += score;
 220
 221     for (int j = 0; j < s->nb_components; j++)
 222         s->score_comp[j] += comp_score[j];
 223     s->nb_frames++;
 224
 225     for (int j = 0; j < s->nb_components; j++) {
 226         int c = s->is_rgb ? s->rgba_map[j] : j;
 227         set_meta(ctx, metadata, ".", s->comps[j], comp_score[c]);
 228     }
 229     set_meta(ctx, metadata, "_avg", 0, score);
 230
 231     return ff_filter_frame(ctx->outputs[0], master);
 232 }
 233
 234 static av_cold int init(AVFilterContext *ctx)
 235 {
 236     IdentityContext *s = ctx->priv;
 237
 238     s->fs.on_event = do_identity;
 239
 240     return 0;
 241 }
 242
 243 static int query_formats(AVFilterContext *ctx)
 244 {
 245     static const enum AVPixelFormat pix_fmts[] = {
 246         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
 247 #define PF_NOALPHA(suf) AV_PIX_FMT_YUV420##suf,  AV_PIX_FMT_YUV422##suf,  AV_PIX_FMT_YUV444##suf
 248 #define PF_ALPHA(suf)   AV_PIX_FMT_YUVA420##suf, AV_PIX_FMT_YUVA422##suf, AV_PIX_FMT_YUVA444##suf
 249 #define PF(suf)         PF_NOALPHA(suf), PF_ALPHA(suf)
 250         PF(P), PF(P9), PF(P10), PF_NOALPHA(P12), PF_NOALPHA(P14), PF(P16),
 251         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
 252         AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
 253         AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
 254         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
 255         AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
 256         AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
 257         AV_PIX_FMT_NONE
 258     };
 259
 260     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
 261     if (!fmts_list)
 262         return AVERROR(ENOMEM);
 263     return ff_set_common_formats(ctx, fmts_list);
 264 }
 265
 266 static int config_input_ref(AVFilterLink *inlink)
 267 {
 268     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 269     AVFilterContext *ctx  = inlink->dst;
 270     IdentityContext *s = ctx->priv;
 271
 272     s->nb_threads = ff_filter_get_nb_threads(ctx);
 273     s->nb_components = desc->nb_components;
 274     if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
 275         ctx->inputs[0]->h != ctx->inputs[1]->h) {
 276         av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
 277         return AVERROR(EINVAL);
 278     }
 279     if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
 280         av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
 281         return AVERROR(EINVAL);
 282     }
 283
 284     s->is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0;
 285     s->comps[0] = s->is_rgb ? 'R' : 'Y' ;
 286     s->comps[1] = s->is_rgb ? 'G' : 'U' ;
 287     s->comps[2] = s->is_rgb ? 'B' : 'V' ;
 288     s->comps[3] = 'A';
 289
 290     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
 291     s->planeheight[0] = s->planeheight[3] = inlink->h;
 292     s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
 293     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
 294
 295     s->scores = av_calloc(s->nb_threads, sizeof(*s->scores));
 296     if (!s->scores)
 297         return AVERROR(ENOMEM);
 298
 299     for (int t = 0; t < s->nb_threads && s->scores; t++) {
 300         s->scores[t] = av_calloc(s->nb_components, sizeof(*s->scores[0]));
 301         if (!s->scores[t])
 302             return AVERROR(ENOMEM);
 303     }
 304
 305     s->min_score = +INFINITY;
 306     s->max_score = -INFINITY;
 307
 308     s->max[0] = (1 << desc->comp[0].depth) - 1;
 309     s->max[1] = (1 << desc->comp[1].depth) - 1;
 310     s->max[2] = (1 << desc->comp[2].depth) - 1;
 311     s->max[3] = (1 << desc->comp[3].depth) - 1;
 312
 313     s->is_msad = !strcmp(ctx->filter->name, "msad");
 314     s->filter_slice = !s->is_msad ? compute_images_identity : compute_images_msad;
 315     s->filter_line = desc->comp[0].depth > 8 ? identity_line_16bit : identity_line_8bit;
 316
 317     s->sad = ff_scene_sad_get_fn(desc->comp[0].depth <= 8 ? 8 : 16);
 318     if (!s->sad)
 319         return AVERROR(EINVAL);
 320
 321     return 0;
 322 }
 323
 324 static int config_output(AVFilterLink *outlink)
 325 {
 326     AVFilterContext *ctx = outlink->src;
 327     IdentityContext *s = ctx->priv;
 328     AVFilterLink *mainlink = ctx->inputs[0];
 329     int ret;
 330
 331     ret = ff_framesync_init_dualinput(&s->fs, ctx);
 332     if (ret < 0)
 333         return ret;
 334     outlink->w = mainlink->w;
 335     outlink->h = mainlink->h;
 336     outlink->time_base = mainlink->time_base;
 337     outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
 338     outlink->frame_rate = mainlink->frame_rate;
 339     if ((ret = ff_framesync_configure(&s->fs)) < 0)
 340         return ret;
 341
 342     outlink->time_base = s->fs.time_base;
 343
 344     if (av_cmp_q(mainlink->time_base, outlink->time_base) ||
 345         av_cmp_q(ctx->inputs[1]->time_base, outlink->time_base))
 346         av_log(ctx, AV_LOG_WARNING, "not matching timebases found between first input: %d/%d and second input %d/%d, results may be incorrect!\n",
 347                mainlink->time_base.num, mainlink->time_base.den,
 348                ctx->inputs[1]->time_base.num, ctx->inputs[1]->time_base.den);
 349
 350     return 0;
 351 }
 352
 353 static int activate(AVFilterContext *ctx)
 354 {
 355     IdentityContext *s = ctx->priv;
 356     return ff_framesync_activate(&s->fs);
 357 }
 358
 359 static av_cold void uninit(AVFilterContext *ctx)
 360 {
 361     IdentityContext *s = ctx->priv;
 362
 363     if (s->nb_frames > 0) {
 364         char buf[256];
 365
 366         buf[0] = 0;
 367         for (int j = 0; j < s->nb_components; j++) {
 368             int c = s->is_rgb ? s->rgba_map[j] : j;
 369             av_strlcatf(buf, sizeof(buf), " %c:%f", s->comps[j], s->score_comp[c] / s->nb_frames);
 370         }
 371
 372         av_log(ctx, AV_LOG_INFO, "%s%s average:%f min:%f max:%f\n",
 373                ctx->filter->name,
 374                buf,
 375                s->score / s->nb_frames,
 376                s->min_score,
 377                s->max_score);
 378     }
 379
 380     ff_framesync_uninit(&s->fs);
 381     for (int t = 0; t < s->nb_threads && s->scores; t++)
 382         av_freep(&s->scores[t]);
 383     av_freep(&s->scores);
 384 }
 385
 386 static const AVFilterPad identity_inputs[] = {
 387     {
 388         .name         = "main",
 389         .type         = AVMEDIA_TYPE_VIDEO,
 390     },{
 391         .name         = "reference",
 392         .type         = AVMEDIA_TYPE_VIDEO,
 393         .config_props = config_input_ref,
 394     },
 395     { NULL }
 396 };
 397
 398 static const AVFilterPad identity_outputs[] = {
 399     {
 400         .name          = "default",
 401         .type          = AVMEDIA_TYPE_VIDEO,
 402         .config_props  = config_output,
 403     },
 404     { NULL }
 405 };
 406
 407 static const AVOption options[] = {
 408     { NULL }
 409 };
 410
 411 #if CONFIG_IDENTITY_FILTER
 412
 413 #define identity_options options
 414 FRAMESYNC_DEFINE_CLASS(identity, IdentityContext, fs);
 415
 416 AVFilter ff_vf_identity = {
 417     .name          = "identity",
 418     .description   = NULL_IF_CONFIG_SMALL("Calculate the Identity between two video streams."),
 419     .preinit       = identity_framesync_preinit,
 420     .init          = init,
 421     .uninit        = uninit,
 422     .query_formats = query_formats,
 423     .activate      = activate,
 424     .priv_size     = sizeof(IdentityContext),
 425     .priv_class    = &identity_class,
 426     .inputs        = identity_inputs,
 427     .outputs       = identity_outputs,
 428     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 429 };
 430
 431 #endif /* CONFIG_IDENTITY_FILTER */
 432
 433 #if CONFIG_MSAD_FILTER
 434
 435 #define msad_options options
 436 FRAMESYNC_DEFINE_CLASS(msad, IdentityContext, fs);
 437
 438 AVFilter ff_vf_msad = {
 439     .name          = "msad",
 440     .description   = NULL_IF_CONFIG_SMALL("Calculate the MSAD between two video streams."),
 441     .preinit       = msad_framesync_preinit,
 442     .init          = init,
 443     .uninit        = uninit,
 444     .query_formats = query_formats,
 445     .activate      = activate,
 446     .priv_size     = sizeof(IdentityContext),
 447     .priv_class    = &msad_class,
 448     .inputs        = identity_inputs,
 449     .outputs       = identity_outputs,
 450     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 451 };
 452
 453 #endif /* CONFIG_MSAD_FILTER */