2 * Copyright (c) 2017 Richard Ling
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 * Normalize RGB video (aka histogram stretching, contrast stretching).
23 * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
25 * For each channel of each frame, the filter computes the input range and maps
26 * it linearly to the user-specified output range. The output range defaults
27 * to the full dynamic range from pure black to pure white.
29 * Naively maximising the dynamic range of each frame of video in isolation
30 * may cause flickering (rapid changes in brightness of static objects in the
31 * scene) when small dark or bright objects enter or leave the scene. This
32 * filter can apply temporal smoothing to the input range to reduce flickering.
33 * Temporal smoothing is similar to the auto-exposure (automatic gain control)
34 * on a video camera, which performs the same function; and, like a video
35 * camera, it may cause a period of over- or under-exposure of the video.
37 * The filter can normalize the R,G,B channels independently, which may cause
38 * color shifting, or link them together as a single channel, which prevents
39 * color shifting. More precisely, linked normalization preserves hue (as it's
40 * defined in HSV/HSL color spaces) while independent normalization does not.
41 * Independent normalization can be used to remove color casts, such as the
42 * blue cast from underwater video, restoring more natural colors. The filter
43 * can also combine independent and linked normalization in any ratio.
45 * Finally the overall strength of the filter can be adjusted, from no effect
46 * to full normalization.
48 * The 5 AVOptions are:
49 * blackpt, Colors which define the output range. The minimum input value
50 * whitept is mapped to the blackpt. The maximum input value is mapped to
51 * the whitept. The defaults are black and white respectively.
52 * Specifying white for blackpt and black for whitept will give
53 * color-inverted, normalized video. Shades of grey can be used
54 * to reduce the dynamic range (contrast). Specifying saturated
55 * colors here can create some interesting effects.
57 * smoothing The amount of temporal smoothing, expressed in frames (>=0).
58 * the minimum and maximum input values of each channel are
59 * smoothed using a rolling average over the current frame and
60 * that many previous frames of video. Defaults to 0 (no temporal
64 * Controls the ratio of independent (color shifting) channel
65 * normalization to linked (color preserving) normalization. 0.0
66 * is fully linked, 1.0 is fully independent. Defaults to fully
69 * strength Overall strength of the filter. 1.0 is full strength. 0.0 is
70 * a rather expensive no-op. Values in between can give a gentle
71 * boost to low-contrast video without creating an artificial
72 * over-processed look. The default is full strength.
75 #include "libavutil/imgutils.h"
76 #include "libavutil/opt.h"
77 #include "libavutil/pixdesc.h"
79 #include "drawutils.h"
84 typedef struct NormalizeHistory {
85 uint8_t *history; // History entries.
86 uint32_t history_sum; // Sum of history entries.
89 typedef struct NormalizeLocal {
90 uint8_t in; // Original input byte value for this frame.
91 float smoothed; // Smoothed input value [0,255].
92 float out; // Output value [0,255]
95 typedef struct NormalizeContext {
98 // Storage for the corresponding AVOptions
105 uint8_t co[4]; // Offsets to R,G,B,A bytes respectively in each pixel
106 int num_components; // Number of components in the pixel format
108 int history_len; // Number of frames to average; based on smoothing factor
109 int frame_num; // Increments on each frame, starting from 0.
111 // Per-extremum, per-channel history, for temporal smoothing.
112 NormalizeHistory min[3], max[3]; // Min and max for each channel in {R,G,B}.
113 uint8_t *history_mem; // Single allocation for above history entries
115 uint8_t lut[3][256]; // Lookup table
117 void (*find_min_max)(struct NormalizeContext *s, AVFrame *in, NormalizeLocal min[3], NormalizeLocal max[3]);
118 void (*process)(struct NormalizeContext *s, AVFrame *in, AVFrame *out);
121 #define OFFSET(x) offsetof(NormalizeContext, x)
122 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
123 #define FLAGSR AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
125 static const AVOption normalize_options[] = {
126 { "blackpt", "output color to which darkest input color is mapped", OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGSR },
127 { "whitept", "output color to which brightest input color is mapped", OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGSR },
128 { "smoothing", "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
129 { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGSR },
130 { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGSR },
134 AVFILTER_DEFINE_CLASS(normalize);
136 static void find_min_max(NormalizeContext *s, AVFrame *in, NormalizeLocal min[3], NormalizeLocal max[3])
138 for (int c = 0; c < 3; c++)
139 min[c].in = max[c].in = in->data[0][s->co[c]];
140 for (int y = 0; y < in->height; y++) {
141 uint8_t *inp = in->data[0] + y * in->linesize[0];
142 for (int x = 0; x < in->width; x++) {
143 for (int c = 0; c < 3; c++) {
144 min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
145 max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
152 static void process(NormalizeContext *s, AVFrame *in, AVFrame *out)
154 for (int y = 0; y < in->height; y++) {
155 uint8_t *inp = in->data[0] + y * in->linesize[0];
156 uint8_t *outp = out->data[0] + y * out->linesize[0];
157 for (int x = 0; x < in->width; x++) {
158 for (int c = 0; c < 3; c++)
159 outp[s->co[c]] = s->lut[c][inp[s->co[c]]];
160 if (s->num_components == 4)
162 outp[s->co[3]] = inp[s->co[3]];
169 static void find_min_max_planar(NormalizeContext *s, AVFrame *in, NormalizeLocal min[3], NormalizeLocal max[3])
171 min[0].in = max[0].in = in->data[2][0];
172 min[1].in = max[1].in = in->data[0][0];
173 min[2].in = max[2].in = in->data[1][0];
174 for (int y = 0; y < in->height; y++) {
175 uint8_t *inrp = in->data[2] + y * in->linesize[2];
176 uint8_t *ingp = in->data[0] + y * in->linesize[0];
177 uint8_t *inbp = in->data[1] + y * in->linesize[1];
178 for (int x = 0; x < in->width; x++) {
179 min[0].in = FFMIN(min[0].in, inrp[x]);
180 max[0].in = FFMAX(max[0].in, inrp[x]);
181 min[1].in = FFMIN(min[1].in, ingp[x]);
182 max[1].in = FFMAX(max[1].in, ingp[x]);
183 min[2].in = FFMIN(min[2].in, inbp[x]);
184 max[2].in = FFMAX(max[2].in, inbp[x]);
189 static void process_planar(NormalizeContext *s, AVFrame *in, AVFrame *out)
191 for (int y = 0; y < in->height; y++) {
192 uint8_t *inrp = in->data[2] + y * in->linesize[2];
193 uint8_t *ingp = in->data[0] + y * in->linesize[0];
194 uint8_t *inbp = in->data[1] + y * in->linesize[1];
195 uint8_t *inap = in->data[3] + y * in->linesize[3];
196 uint8_t *outrp = out->data[2] + y * out->linesize[2];
197 uint8_t *outgp = out->data[0] + y * out->linesize[0];
198 uint8_t *outbp = out->data[1] + y * out->linesize[1];
199 uint8_t *outap = out->data[3] + y * out->linesize[3];
200 for (int x = 0; x < in->width; x++) {
201 outrp[x] = s->lut[0][inrp[x]];
202 outgp[x] = s->lut[1][ingp[x]];
203 outbp[x] = s->lut[2][inbp[x]];
204 if (s->num_components == 4)
210 // This function is the main guts of the filter. Normalizes the input frame
211 // into the output frame. The frames are known to have the same dimensions
213 static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
215 // Per-extremum, per-channel local variables.
216 NormalizeLocal min[3], max[3]; // Min and max for each channel in {R,G,B}.
218 float rgb_min_smoothed; // Min input range for linked normalization
219 float rgb_max_smoothed; // Max input range for linked normalization
222 // First, scan the input frame to find, for each channel, the minimum
223 // (min.in) and maximum (max.in) values present in the channel.
224 s->find_min_max(s, in, min, max);
226 // Next, for each channel, push min.in and max.in into their respective
227 // histories, to determine the min.smoothed and max.smoothed for this frame.
229 int history_idx = s->frame_num % s->history_len;
230 // Assume the history is not yet full; num_history_vals is the number
231 // of frames received so far including the current frame.
232 int num_history_vals = s->frame_num + 1;
233 if (s->frame_num >= s->history_len) {
234 //The history is full; drop oldest value and cap num_history_vals.
235 for (c = 0; c < 3; c++) {
236 s->min[c].history_sum -= s->min[c].history[history_idx];
237 s->max[c].history_sum -= s->max[c].history[history_idx];
239 num_history_vals = s->history_len;
241 // For each extremum, update history_sum and calculate smoothed value
242 // as the rolling average of the history entries.
243 for (c = 0; c < 3; c++) {
244 s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
245 min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
246 s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
247 max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
251 // Determine the input range for linked normalization. This is simply the
252 // minimum of the per-channel minimums, and the maximum of the per-channel
254 rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
255 rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);
257 // Now, process each channel to determine the input and output range and
258 // build the lookup tables.
259 for (c = 0; c < 3; c++) {
261 // Adjust the input range for this channel [min.smoothed,max.smoothed]
262 // by mixing in the correct proportion of the linked normalization
263 // input range [rgb_min_smoothed,rgb_max_smoothed].
264 min[c].smoothed = (min[c].smoothed * s->independence)
265 + (rgb_min_smoothed * (1.0f - s->independence));
266 max[c].smoothed = (max[c].smoothed * s->independence)
267 + (rgb_max_smoothed * (1.0f - s->independence));
269 // Calculate the output range [min.out,max.out] as a ratio of the full-
270 // strength output range [blackpt,whitept] and the original input range
271 // [min.in,max.in], based on the user-specified filter strength.
272 min[c].out = (s->blackpt[c] * s->strength)
273 + (min[c].in * (1.0f - s->strength));
274 max[c].out = (s->whitept[c] * s->strength)
275 + (max[c].in * (1.0f - s->strength));
277 // Now, build a lookup table which linearly maps the adjusted input range
278 // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
279 // Perform the linear interpolation for each x:
280 // lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
281 // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
282 if (min[c].smoothed == max[c].smoothed) {
283 // There is no dynamic range to expand. No mapping for this channel.
284 for (in_val = min[c].in; in_val <= max[c].in; in_val++)
285 s->lut[c][in_val] = min[c].out;
287 // We must set lookup values for all values in the original input
288 // range [min.in,max.in]. Since the original input range may be
289 // larger than [min.smoothed,max.smoothed], some output values may
290 // fall outside the [0,255] dynamic range. We need to clamp them.
291 float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
292 for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
293 int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
294 out_val = FFMAX(out_val, 0);
295 out_val = FFMIN(out_val, 255);
296 s->lut[c][in_val] = out_val;
301 // Finally, process the pixels of the input frame using the lookup tables.
302 s->process(s, in, out);
307 // Now we define all the functions accessible from the ff_vf_normalize class,
308 // which is ffmpeg's interface to our filter. See doc/filter_design.txt and
309 // doc/writing_filters.txt for descriptions of what these interface functions
310 // are expected to do.
312 // Set the pixel formats that our filter supports. We should be able to process
313 // any 8-bit RGB formats. 16-bit support might be useful one day.
314 static int query_formats(AVFilterContext *ctx)
316 static const enum AVPixelFormat pixel_fmts[] = {
331 // According to filter_design.txt, using ff_set_common_formats() this way
332 // ensures the pixel formats of the input and output will be the same. That
333 // saves a bit of effort possibly needing to handle format conversions.
334 AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
336 return AVERROR(ENOMEM);
337 return ff_set_common_formats(ctx, formats);
340 // At this point we know the pixel format used for both input and output. We
341 // can also access the frame rate of the input video and allocate some memory
343 static int config_input(AVFilterLink *inlink)
345 NormalizeContext *s = inlink->dst->priv;
346 // Store offsets to R,G,B,A bytes respectively in each pixel
347 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
350 ff_fill_rgba_map(s->co, inlink->format);
351 s->num_components = desc->nb_components;
352 s->step = av_get_padded_bits_per_pixel(desc) >> 3;
353 // Convert smoothing value to history_len (a count of frames to average,
354 // must be at least 1). Currently this is a direct assignment, but the
355 // smoothing value was originally envisaged as a number of seconds. In
356 // future it would be nice to set history_len using a number of seconds,
357 // but VFR video is currently an obstacle to doing so.
358 s->history_len = s->smoothing + 1;
359 // Allocate the history buffers -- there are 6 -- one for each extrema.
360 // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
361 // can't overflow on 32bit causing a too-small allocation.
362 s->history_mem = av_malloc(s->history_len * 6);
363 if (s->history_mem == NULL)
364 return AVERROR(ENOMEM);
366 for (c = 0; c < 3; c++) {
367 s->min[c].history = s->history_mem + (c*2) * s->history_len;
368 s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
371 planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
373 s->find_min_max = planar ? find_min_max_planar : find_min_max;
374 s->process = planar? process_planar : process;
379 // Free any memory allocations here
380 static av_cold void uninit(AVFilterContext *ctx)
382 NormalizeContext *s = ctx->priv;
384 av_freep(&s->history_mem);
387 // This function is pretty much standard from doc/writing_filters.txt. It
388 // tries to do in-place filtering where possible, only allocating a new output
389 // frame when absolutely necessary.
390 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
392 AVFilterContext *ctx = inlink->dst;
393 AVFilterLink *outlink = ctx->outputs[0];
394 NormalizeContext *s = ctx->priv;
396 // Set 'direct' if we can modify the input frame in-place. Otherwise we
397 // need to retrieve a new frame from the output link.
398 int direct = av_frame_is_writable(in) && !ctx->is_disabled;
403 out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
406 return AVERROR(ENOMEM);
408 av_frame_copy_props(out, in);
411 // Now we've got the input and output frames (which may be the same frame)
412 // perform the filtering with our custom function.
413 normalize(s, in, out);
415 if (ctx->is_disabled) {
417 return ff_filter_frame(outlink, in);
423 return ff_filter_frame(outlink, out);
426 static const AVFilterPad inputs[] = {
429 .type = AVMEDIA_TYPE_VIDEO,
430 .filter_frame = filter_frame,
431 .config_props = config_input,
436 static const AVFilterPad outputs[] = {
439 .type = AVMEDIA_TYPE_VIDEO,
444 AVFilter ff_vf_normalize = {
446 .description = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
447 .priv_size = sizeof(NormalizeContext),
448 .priv_class = &normalize_class,
450 .query_formats = query_formats,
453 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
454 .process_command = ff_filter_process_command,