git.sesse.net Git - ffmpeg/blob - libavfilter/vf_normalize.c

   1 /*
   2  * Copyright (c) 2017 Richard Ling
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /*
  22  * Normalize RGB video (aka histogram stretching, contrast stretching).
  23  * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
  24  *
  25  * For each channel of each frame, the filter computes the input range and maps
  26  * it linearly to the user-specified output range. The output range defaults
  27  * to the full dynamic range from pure black to pure white.
  28  *
  29  * Naively maximising the dynamic range of each frame of video in isolation
  30  * may cause flickering (rapid changes in brightness of static objects in the
  31  * scene) when small dark or bright objects enter or leave the scene. This
  32  * filter can apply temporal smoothing to the input range to reduce flickering.
  33  * Temporal smoothing is similar to the auto-exposure (automatic gain control)
  34  * on a video camera, which performs the same function; and, like a video
  35  * camera, it may cause a period of over- or under-exposure of the video.
  36  *
  37  * The filter can normalize the R,G,B channels independently, which may cause
  38  * color shifting, or link them together as a single channel, which prevents
  39  * color shifting. More precisely, linked normalization preserves hue (as it's
  40  * defined in HSV/HSL color spaces) while independent normalization does not.
  41  * Independent normalization can be used to remove color casts, such as the
  42  * blue cast from underwater video, restoring more natural colors. The filter
  43  * can also combine independent and linked normalization in any ratio.
  44  *
  45  * Finally the overall strength of the filter can be adjusted, from no effect
  46  * to full normalization.
  47  *
  48  * The 5 AVOptions are:
  49  *   blackpt,   Colors which define the output range. The minimum input value
  50  *   whitept    is mapped to the blackpt. The maximum input value is mapped to
  51  *              the whitept. The defaults are black and white respectively.
  52  *              Specifying white for blackpt and black for whitept will give
  53  *              color-inverted, normalized video. Shades of grey can be used
  54  *              to reduce the dynamic range (contrast). Specifying saturated
  55  *              colors here can create some interesting effects.
  56  *
  57  *   smoothing  The amount of temporal smoothing, expressed in frames (>=0).
  58  *              the minimum and maximum input values of each channel are
  59  *              smoothed using a rolling average over the current frame and
  60  *              that many previous frames of video.  Defaults to 0 (no temporal
  61  *              smoothing).
  62  *
  63  *   independence
  64  *              Controls the ratio of independent (color shifting) channel
  65  *              normalization to linked (color preserving) normalization. 0.0
  66  *              is fully linked, 1.0 is fully independent. Defaults to fully
  67  *              independent.
  68  *
  69  *   strength   Overall strength of the filter. 1.0 is full strength. 0.0 is
  70  *              a rather expensive no-op. Values in between can give a gentle
  71  *              boost to low-contrast video without creating an artificial
  72  *              over-processed look. The default is full strength.
  73  */
  74
  75 #include "libavutil/imgutils.h"
  76 #include "libavutil/opt.h"
  77 #include "libavutil/pixdesc.h"
  78 #include "avfilter.h"
  79 #include "drawutils.h"
  80 #include "formats.h"
  81 #include "internal.h"
  82 #include "video.h"
  83
  84 typedef struct NormalizeHistory {
  85     uint8_t *history;       // History entries.
  86     uint32_t history_sum;   // Sum of history entries.
  87 } NormalizeHistory;
  88
  89 typedef struct NormalizeLocal {
  90     uint8_t in;     // Original input byte value for this frame.
  91     float smoothed; // Smoothed input value [0,255].
  92     float out;      // Output value [0,255]
  93 } NormalizeLocal;
  94
  95 typedef struct NormalizeContext {
  96     const AVClass *class;
  97
  98     // Storage for the corresponding AVOptions
  99     uint8_t blackpt[4];
 100     uint8_t whitept[4];
 101     int smoothing;
 102     float independence;
 103     float strength;
 104
 105     uint8_t co[4];      // Offsets to R,G,B,A bytes respectively in each pixel
 106     int num_components; // Number of components in the pixel format
 107     int step;
 108     int history_len;    // Number of frames to average; based on smoothing factor
 109     int frame_num;      // Increments on each frame, starting from 0.
 110
 111     // Per-extremum, per-channel history, for temporal smoothing.
 112     NormalizeHistory min[3], max[3];           // Min and max for each channel in {R,G,B}.
 113     uint8_t *history_mem;       // Single allocation for above history entries
 114
 115     uint8_t lut[3][256];    // Lookup table
 116
 117     void (*find_min_max)(struct NormalizeContext *s, AVFrame *in, NormalizeLocal min[3], NormalizeLocal max[3]);
 118     void (*process)(struct NormalizeContext *s, AVFrame *in, AVFrame *out);
 119 } NormalizeContext;
 120
 121 #define OFFSET(x) offsetof(NormalizeContext, x)
 122 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 123 #define FLAGSR AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 124
 125 static const AVOption normalize_options[] = {
 126     { "blackpt",  "output color to which darkest input color is mapped",  OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGSR },
 127     { "whitept",  "output color to which brightest input color is mapped",  OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGSR },
 128     { "smoothing",  "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
 129     { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGSR },
 130     { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGSR },
 131     { NULL }
 132 };
 133
 134 AVFILTER_DEFINE_CLASS(normalize);
 135
 136 static void find_min_max(NormalizeContext *s, AVFrame *in, NormalizeLocal min[3], NormalizeLocal max[3])
 137 {
 138     for (int c = 0; c < 3; c++)
 139         min[c].in = max[c].in = in->data[0][s->co[c]];
 140     for (int y = 0; y < in->height; y++) {
 141         uint8_t *inp = in->data[0] + y * in->linesize[0];
 142         for (int x = 0; x < in->width; x++) {
 143             for (int c = 0; c < 3; c++) {
 144                 min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
 145                 max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
 146             }
 147             inp += s->step;
 148         }
 149     }
 150 }
 151
 152 static void process(NormalizeContext *s, AVFrame *in, AVFrame *out)
 153 {
 154     for (int y = 0; y < in->height; y++) {
 155         uint8_t *inp = in->data[0] + y * in->linesize[0];
 156         uint8_t *outp = out->data[0] + y * out->linesize[0];
 157         for (int x = 0; x < in->width; x++) {
 158             for (int c = 0; c < 3; c++)
 159                 outp[s->co[c]] = s->lut[c][inp[s->co[c]]];
 160             if (s->num_components == 4)
 161                 // Copy alpha as-is.
 162                 outp[s->co[3]] = inp[s->co[3]];
 163             inp += s->step;
 164             outp += s->step;
 165         }
 166     }
 167 }
 168
 169 static void find_min_max_planar(NormalizeContext *s, AVFrame *in, NormalizeLocal min[3], NormalizeLocal max[3])
 170 {
 171     min[0].in = max[0].in = in->data[2][0];
 172     min[1].in = max[1].in = in->data[0][0];
 173     min[2].in = max[2].in = in->data[1][0];
 174     for (int y = 0; y < in->height; y++) {
 175         uint8_t *inrp = in->data[2] + y * in->linesize[2];
 176         uint8_t *ingp = in->data[0] + y * in->linesize[0];
 177         uint8_t *inbp = in->data[1] + y * in->linesize[1];
 178         for (int x = 0; x < in->width; x++) {
 179             min[0].in = FFMIN(min[0].in, inrp[x]);
 180             max[0].in = FFMAX(max[0].in, inrp[x]);
 181             min[1].in = FFMIN(min[1].in, ingp[x]);
 182             max[1].in = FFMAX(max[1].in, ingp[x]);
 183             min[2].in = FFMIN(min[2].in, inbp[x]);
 184             max[2].in = FFMAX(max[2].in, inbp[x]);
 185         }
 186     }
 187 }
 188
 189 static void process_planar(NormalizeContext *s, AVFrame *in, AVFrame *out)
 190 {
 191     for (int y = 0; y < in->height; y++) {
 192         uint8_t *inrp = in->data[2] + y * in->linesize[2];
 193         uint8_t *ingp = in->data[0] + y * in->linesize[0];
 194         uint8_t *inbp = in->data[1] + y * in->linesize[1];
 195         uint8_t *inap = in->data[3] + y * in->linesize[3];
 196         uint8_t *outrp = out->data[2] + y * out->linesize[2];
 197         uint8_t *outgp = out->data[0] + y * out->linesize[0];
 198         uint8_t *outbp = out->data[1] + y * out->linesize[1];
 199         uint8_t *outap = out->data[3] + y * out->linesize[3];
 200         for (int x = 0; x < in->width; x++) {
 201             outrp[x] = s->lut[0][inrp[x]];
 202             outgp[x] = s->lut[1][ingp[x]];
 203             outbp[x] = s->lut[2][inbp[x]];
 204             if (s->num_components == 4)
 205                 outap[x] = inap[x];
 206         }
 207     }
 208 }
 209
 210 // This function is the main guts of the filter. Normalizes the input frame
 211 // into the output frame. The frames are known to have the same dimensions
 212 // and pixel format.
 213 static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
 214 {
 215     // Per-extremum, per-channel local variables.
 216     NormalizeLocal min[3], max[3];   // Min and max for each channel in {R,G,B}.
 217
 218     float rgb_min_smoothed; // Min input range for linked normalization
 219     float rgb_max_smoothed; // Max input range for linked normalization
 220     int c;
 221
 222     // First, scan the input frame to find, for each channel, the minimum
 223     // (min.in) and maximum (max.in) values present in the channel.
 224     s->find_min_max(s, in, min, max);
 225
 226     // Next, for each channel, push min.in and max.in into their respective
 227     // histories, to determine the min.smoothed and max.smoothed for this frame.
 228     {
 229         int history_idx = s->frame_num % s->history_len;
 230         // Assume the history is not yet full; num_history_vals is the number
 231         // of frames received so far including the current frame.
 232         int num_history_vals = s->frame_num + 1;
 233         if (s->frame_num >= s->history_len) {
 234             //The history is full; drop oldest value and cap num_history_vals.
 235             for (c = 0; c < 3; c++) {
 236                 s->min[c].history_sum -= s->min[c].history[history_idx];
 237                 s->max[c].history_sum -= s->max[c].history[history_idx];
 238             }
 239             num_history_vals = s->history_len;
 240         }
 241         // For each extremum, update history_sum and calculate smoothed value
 242         // as the rolling average of the history entries.
 243         for (c = 0; c < 3; c++) {
 244             s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
 245             min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
 246             s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
 247             max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
 248         }
 249     }
 250
 251     // Determine the input range for linked normalization. This is simply the
 252     // minimum of the per-channel minimums, and the maximum of the per-channel
 253     // maximums.
 254     rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
 255     rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);
 256
 257     // Now, process each channel to determine the input and output range and
 258     // build the lookup tables.
 259     for (c = 0; c < 3; c++) {
 260         int in_val;
 261         // Adjust the input range for this channel [min.smoothed,max.smoothed]
 262         // by mixing in the correct proportion of the linked normalization
 263         // input range [rgb_min_smoothed,rgb_max_smoothed].
 264         min[c].smoothed = (min[c].smoothed  *         s->independence)
 265                         + (rgb_min_smoothed * (1.0f - s->independence));
 266         max[c].smoothed = (max[c].smoothed  *         s->independence)
 267                         + (rgb_max_smoothed * (1.0f - s->independence));
 268
 269         // Calculate the output range [min.out,max.out] as a ratio of the full-
 270         // strength output range [blackpt,whitept] and the original input range
 271         // [min.in,max.in], based on the user-specified filter strength.
 272         min[c].out = (s->blackpt[c] *         s->strength)
 273                    + (min[c].in     * (1.0f - s->strength));
 274         max[c].out = (s->whitept[c] *         s->strength)
 275                    + (max[c].in     * (1.0f - s->strength));
 276
 277         // Now, build a lookup table which linearly maps the adjusted input range
 278         // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
 279         // Perform the linear interpolation for each x:
 280         //     lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
 281         // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
 282         if (min[c].smoothed == max[c].smoothed) {
 283             // There is no dynamic range to expand. No mapping for this channel.
 284             for (in_val = min[c].in; in_val <= max[c].in; in_val++)
 285                 s->lut[c][in_val] = min[c].out;
 286         } else {
 287             // We must set lookup values for all values in the original input
 288             // range [min.in,max.in]. Since the original input range may be
 289             // larger than [min.smoothed,max.smoothed], some output values may
 290             // fall outside the [0,255] dynamic range. We need to clamp them.
 291             float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
 292             for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
 293                 int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
 294                 out_val = FFMAX(out_val, 0);
 295                 out_val = FFMIN(out_val, 255);
 296                 s->lut[c][in_val] = out_val;
 297             }
 298         }
 299     }
 300
 301     // Finally, process the pixels of the input frame using the lookup tables.
 302     s->process(s, in, out);
 303
 304     s->frame_num++;
 305 }
 306
 307 // Now we define all the functions accessible from the ff_vf_normalize class,
 308 // which is ffmpeg's interface to our filter.  See doc/filter_design.txt and
 309 // doc/writing_filters.txt for descriptions of what these interface functions
 310 // are expected to do.
 311
 312 // Set the pixel formats that our filter supports. We should be able to process
 313 // any 8-bit RGB formats. 16-bit support might be useful one day.
 314 static int query_formats(AVFilterContext *ctx)
 315 {
 316     static const enum AVPixelFormat pixel_fmts[] = {
 317         AV_PIX_FMT_RGB24,
 318         AV_PIX_FMT_BGR24,
 319         AV_PIX_FMT_ARGB,
 320         AV_PIX_FMT_RGBA,
 321         AV_PIX_FMT_ABGR,
 322         AV_PIX_FMT_BGRA,
 323         AV_PIX_FMT_0RGB,
 324         AV_PIX_FMT_RGB0,
 325         AV_PIX_FMT_0BGR,
 326         AV_PIX_FMT_BGR0,
 327         AV_PIX_FMT_GBRAP,
 328         AV_PIX_FMT_GBRP,
 329         AV_PIX_FMT_NONE
 330     };
 331     // According to filter_design.txt, using ff_set_common_formats() this way
 332     // ensures the pixel formats of the input and output will be the same. That
 333     // saves a bit of effort possibly needing to handle format conversions.
 334     AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
 335     if (!formats)
 336         return AVERROR(ENOMEM);
 337     return ff_set_common_formats(ctx, formats);
 338 }
 339
 340 // At this point we know the pixel format used for both input and output.  We
 341 // can also access the frame rate of the input video and allocate some memory
 342 // appropriately
 343 static int config_input(AVFilterLink *inlink)
 344 {
 345     NormalizeContext *s = inlink->dst->priv;
 346     // Store offsets to R,G,B,A bytes respectively in each pixel
 347     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 348     int c, planar;
 349
 350     ff_fill_rgba_map(s->co, inlink->format);
 351     s->num_components = desc->nb_components;
 352     s->step = av_get_padded_bits_per_pixel(desc) >> 3;
 353     // Convert smoothing value to history_len (a count of frames to average,
 354     // must be at least 1).  Currently this is a direct assignment, but the
 355     // smoothing value was originally envisaged as a number of seconds.  In
 356     // future it would be nice to set history_len using a number of seconds,
 357     // but VFR video is currently an obstacle to doing so.
 358     s->history_len = s->smoothing + 1;
 359     // Allocate the history buffers -- there are 6 -- one for each extrema.
 360     // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
 361     // can't overflow on 32bit causing a too-small allocation.
 362     s->history_mem = av_malloc(s->history_len * 6);
 363     if (s->history_mem == NULL)
 364         return AVERROR(ENOMEM);
 365
 366     for (c = 0; c < 3; c++) {
 367         s->min[c].history = s->history_mem + (c*2)   * s->history_len;
 368         s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
 369     }
 370
 371     planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
 372
 373     s->find_min_max = planar ? find_min_max_planar : find_min_max;
 374     s->process = planar? process_planar : process;
 375
 376     return 0;
 377 }
 378
 379 // Free any memory allocations here
 380 static av_cold void uninit(AVFilterContext *ctx)
 381 {
 382     NormalizeContext *s = ctx->priv;
 383
 384     av_freep(&s->history_mem);
 385 }
 386
 387 // This function is pretty much standard from doc/writing_filters.txt.  It
 388 // tries to do in-place filtering where possible, only allocating a new output
 389 // frame when absolutely necessary.
 390 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 391 {
 392     AVFilterContext *ctx = inlink->dst;
 393     AVFilterLink *outlink = ctx->outputs[0];
 394     NormalizeContext *s = ctx->priv;
 395     AVFrame *out;
 396     // Set 'direct' if we can modify the input frame in-place.  Otherwise we
 397     // need to retrieve a new frame from the output link.
 398     int direct = av_frame_is_writable(in) && !ctx->is_disabled;
 399
 400     if (direct) {
 401         out = in;
 402     } else {
 403         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
 404         if (!out) {
 405             av_frame_free(&in);
 406             return AVERROR(ENOMEM);
 407         }
 408         av_frame_copy_props(out, in);
 409     }
 410
 411     // Now we've got the input and output frames (which may be the same frame)
 412     // perform the filtering with our custom function.
 413     normalize(s, in, out);
 414
 415     if (ctx->is_disabled) {
 416         av_frame_free(&out);
 417         return ff_filter_frame(outlink, in);
 418     }
 419
 420     if (!direct)
 421         av_frame_free(&in);
 422
 423     return ff_filter_frame(outlink, out);
 424 }
 425
 426 static const AVFilterPad inputs[] = {
 427     {
 428         .name         = "default",
 429         .type         = AVMEDIA_TYPE_VIDEO,
 430         .filter_frame = filter_frame,
 431         .config_props = config_input,
 432     },
 433     { NULL }
 434 };
 435
 436 static const AVFilterPad outputs[] = {
 437     {
 438         .name = "default",
 439         .type = AVMEDIA_TYPE_VIDEO,
 440     },
 441     { NULL }
 442 };
 443
 444 AVFilter ff_vf_normalize = {
 445     .name          = "normalize",
 446     .description   = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
 447     .priv_size     = sizeof(NormalizeContext),
 448     .priv_class    = &normalize_class,
 449     .uninit        = uninit,
 450     .query_formats = query_formats,
 451     .inputs        = inputs,
 452     .outputs       = outputs,
 453     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
 454     .process_command = ff_filter_process_command,
 455 };