git.sesse.net Git - ffmpeg/blob - libavfilter/vf_thumbnail.c

   1 /*
   2  * Copyright (c) 2011 Smartjog S.A.S, Clément Bœsch <clement.boesch@smartjog.com>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * Potential thumbnail lookup filter to reduce the risk of an inappropriate
  24  * selection (such as a black frame) we could get with an absolute seek.
  25  *
  26  * Simplified version of algorithm by Vadim Zaliva <lord@crocodile.org>.
  27  * @see http://notbrainsurgery.livejournal.com/29773.html
  28  */
  29
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "avfilter.h"
  33 #include "internal.h"
  34
  35 #define HIST_SIZE (3*256)
  36
  37 struct thumb_frame {
  38     AVFrame *buf;               ///< cached frame
  39     int histogram[HIST_SIZE];   ///< RGB color distribution histogram of the frame
  40 };
  41
  42 typedef struct ThumbContext {
  43     const AVClass *class;
  44     int n;                      ///< current frame
  45     int n_frames;               ///< number of frames for analysis
  46     struct thumb_frame *frames; ///< the n_frames frames
  47     AVRational tb;              ///< copy of the input timebase to ease access
  48
  49     int planewidth[4];
  50     int planeheight[4];
  51 } ThumbContext;
  52
  53 #define OFFSET(x) offsetof(ThumbContext, x)
  54 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  55
  56 static const AVOption thumbnail_options[] = {
  57     { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
  58     { NULL }
  59 };
  60
  61 AVFILTER_DEFINE_CLASS(thumbnail);
  62
  63 static av_cold int init(AVFilterContext *ctx)
  64 {
  65     ThumbContext *s = ctx->priv;
  66
  67     s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
  68     if (!s->frames) {
  69         av_log(ctx, AV_LOG_ERROR,
  70                "Allocation failure, try to lower the number of frames\n");
  71         return AVERROR(ENOMEM);
  72     }
  73     av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
  74     return 0;
  75 }
  76
  77 /**
  78  * @brief        Compute Sum-square deviation to estimate "closeness".
  79  * @param hist   color distribution histogram
  80  * @param median average color distribution histogram
  81  * @return       sum of squared errors
  82  */
  83 static double frame_sum_square_err(const int *hist, const double *median)
  84 {
  85     int i;
  86     double err, sum_sq_err = 0;
  87
  88     for (i = 0; i < HIST_SIZE; i++) {
  89         err = median[i] - (double)hist[i];
  90         sum_sq_err += err*err;
  91     }
  92     return sum_sq_err;
  93 }
  94
  95 static AVFrame *get_best_frame(AVFilterContext *ctx)
  96 {
  97     AVFrame *picref;
  98     ThumbContext *s = ctx->priv;
  99     int i, j, best_frame_idx = 0;
 100     int nb_frames = s->n;
 101     double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
 102
 103     // average histogram of the N frames
 104     for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
 105         for (i = 0; i < nb_frames; i++)
 106             avg_hist[j] += (double)s->frames[i].histogram[j];
 107         avg_hist[j] /= nb_frames;
 108     }
 109
 110     // find the frame closer to the average using the sum of squared errors
 111     for (i = 0; i < nb_frames; i++) {
 112         sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
 113         if (i == 0 || sq_err < min_sq_err)
 114             best_frame_idx = i, min_sq_err = sq_err;
 115     }
 116
 117     // free and reset everything (except the best frame buffer)
 118     for (i = 0; i < nb_frames; i++) {
 119         memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
 120         if (i != best_frame_idx)
 121             av_frame_free(&s->frames[i].buf);
 122     }
 123     s->n = 0;
 124
 125     // raise the chosen one
 126     picref = s->frames[best_frame_idx].buf;
 127     av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
 128            "from a set of %d images\n", best_frame_idx,
 129            picref->pts * av_q2d(s->tb), nb_frames);
 130     s->frames[best_frame_idx].buf = NULL;
 131
 132     return picref;
 133 }
 134
 135 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 136 {
 137     int i, j;
 138     AVFilterContext *ctx  = inlink->dst;
 139     ThumbContext *s   = ctx->priv;
 140     AVFilterLink *outlink = ctx->outputs[0];
 141     int *hist = s->frames[s->n].histogram;
 142     const uint8_t *p = frame->data[0];
 143
 144     // keep a reference of each frame
 145     s->frames[s->n].buf = frame;
 146
 147     // update current frame histogram
 148     switch (inlink->format) {
 149     case AV_PIX_FMT_RGB24:
 150     case AV_PIX_FMT_BGR24:
 151         for (j = 0; j < inlink->h; j++) {
 152             for (i = 0; i < inlink->w; i++) {
 153                 hist[0*256 + p[i*3    ]]++;
 154                 hist[1*256 + p[i*3 + 1]]++;
 155                 hist[2*256 + p[i*3 + 2]]++;
 156             }
 157             p += frame->linesize[0];
 158         }
 159         break;
 160     default:
 161         for (int plane = 0; plane < 3; plane++) {
 162             const uint8_t *p = frame->data[plane];
 163             for (j = 0; j < s->planeheight[plane]; j++) {
 164                 for (i = 0; i < s->planewidth[plane]; i++)
 165                     hist[256*plane + p[i]]++;
 166                 p += frame->linesize[plane];
 167             }
 168         }
 169         break;
 170     }
 171
 172     // no selection until the buffer of N frames is filled up
 173     s->n++;
 174     if (s->n < s->n_frames)
 175         return 0;
 176
 177     return ff_filter_frame(outlink, get_best_frame(ctx));
 178 }
 179
 180 static av_cold void uninit(AVFilterContext *ctx)
 181 {
 182     int i;
 183     ThumbContext *s = ctx->priv;
 184     for (i = 0; i < s->n_frames && s->frames && s->frames[i].buf; i++)
 185         av_frame_free(&s->frames[i].buf);
 186     av_freep(&s->frames);
 187 }
 188
 189 static int request_frame(AVFilterLink *link)
 190 {
 191     AVFilterContext *ctx = link->src;
 192     ThumbContext *s = ctx->priv;
 193     int ret = ff_request_frame(ctx->inputs[0]);
 194
 195     if (ret == AVERROR_EOF && s->n) {
 196         ret = ff_filter_frame(link, get_best_frame(ctx));
 197         if (ret < 0)
 198             return ret;
 199         ret = AVERROR_EOF;
 200     }
 201     if (ret < 0)
 202         return ret;
 203     return 0;
 204 }
 205
 206 static int config_props(AVFilterLink *inlink)
 207 {
 208     AVFilterContext *ctx = inlink->dst;
 209     ThumbContext *s = ctx->priv;
 210     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 211
 212     s->tb = inlink->time_base;
 213     s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
 214     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
 215     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
 216     s->planeheight[0] = s->planeheight[3] = inlink->h;
 217
 218     return 0;
 219 }
 220
 221 static int query_formats(AVFilterContext *ctx)
 222 {
 223     static const enum AVPixelFormat pix_fmts[] = {
 224         AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
 225         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
 226         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
 227         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
 228         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
 229         AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
 230         AV_PIX_FMT_YUVJ411P,
 231         AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
 232         AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
 233         AV_PIX_FMT_NONE
 234     };
 235     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
 236     if (!fmts_list)
 237         return AVERROR(ENOMEM);
 238     return ff_set_common_formats(ctx, fmts_list);
 239 }
 240
 241 static const AVFilterPad thumbnail_inputs[] = {
 242     {
 243         .name         = "default",
 244         .type         = AVMEDIA_TYPE_VIDEO,
 245         .config_props = config_props,
 246         .filter_frame = filter_frame,
 247     },
 248     { NULL }
 249 };
 250
 251 static const AVFilterPad thumbnail_outputs[] = {
 252     {
 253         .name          = "default",
 254         .type          = AVMEDIA_TYPE_VIDEO,
 255         .request_frame = request_frame,
 256     },
 257     { NULL }
 258 };
 259
 260 AVFilter ff_vf_thumbnail = {
 261     .name          = "thumbnail",
 262     .description   = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
 263     .priv_size     = sizeof(ThumbContext),
 264     .init          = init,
 265     .uninit        = uninit,
 266     .query_formats = query_formats,
 267     .inputs        = thumbnail_inputs,
 268     .outputs       = thumbnail_outputs,
 269     .priv_class    = &thumbnail_class,
 270     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
 271 };