git.sesse.net Git - ffmpeg/blob - libavfilter/af_volumedetect.c

   1 /*
   2  * Copyright (c) 2012 Nicolas George
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public License
   8  * as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public License
  17  * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/channel_layout.h"
  22 #include "libavutil/avassert.h"
  23 #include "audio.h"
  24 #include "avfilter.h"
  25 #include "internal.h"
  26
  27 typedef struct {
  28     /**
  29      * Number of samples at each PCM value.
  30      * histogram[0x8000 + i] is the number of samples at value i.
  31      * The extra element is there for symmetry.
  32      */
  33     uint64_t histogram[0x10001];
  34 } VolDetectContext;
  35
  36 static int query_formats(AVFilterContext *ctx)
  37 {
  38     static const enum AVSampleFormat sample_fmts[] = {
  39         AV_SAMPLE_FMT_S16,
  40         AV_SAMPLE_FMT_S16P,
  41         AV_SAMPLE_FMT_NONE
  42     };
  43     AVFilterFormats *formats;
  44
  45     if (!(formats = ff_make_format_list(sample_fmts)))
  46         return AVERROR(ENOMEM);
  47     ff_set_common_formats(ctx, formats);
  48
  49     return 0;
  50 }
  51
  52 static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
  53 {
  54     AVFilterContext *ctx = inlink->dst;
  55     VolDetectContext *vd = ctx->priv;
  56     int64_t layout  = samples->channel_layout;
  57     int nb_samples  = samples->nb_samples;
  58     int nb_channels = av_get_channel_layout_nb_channels(layout);
  59     int nb_planes   = nb_channels;
  60     int plane, i;
  61     int16_t *pcm;
  62
  63     if (!av_sample_fmt_is_planar(samples->format)) {
  64         nb_samples *= nb_channels;
  65         nb_planes = 1;
  66     }
  67     for (plane = 0; plane < nb_planes; plane++) {
  68         pcm = (int16_t *)samples->extended_data[plane];
  69         for (i = 0; i < nb_samples; i++)
  70             vd->histogram[pcm[i] + 0x8000]++;
  71     }
  72
  73     return ff_filter_frame(inlink->dst->outputs[0], samples);
  74 }
  75
  76 #define MAX_DB 91
  77
  78 static inline double logdb(uint64_t v)
  79 {
  80     double d = v / (double)(0x8000 * 0x8000);
  81     if (!v)
  82         return MAX_DB;
  83     return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
  84 }
  85
  86 static void print_stats(AVFilterContext *ctx)
  87 {
  88     VolDetectContext *vd = ctx->priv;
  89     int i, max_volume, shift;
  90     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
  91     uint64_t histdb[MAX_DB + 1] = { 0 };
  92
  93     for (i = 0; i < 0x10000; i++)
  94         nb_samples += vd->histogram[i];
  95     av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
  96     if (!nb_samples)
  97         return;
  98
  99     /* If nb_samples > 1<<34, there is a risk of overflow in the
 100        multiplication or the sum: shift all histogram values to avoid that.
 101        The total number of samples must be recomputed to avoid rounding
 102        errors. */
 103     shift = av_log2(nb_samples >> 33);
 104     for (i = 0; i < 0x10000; i++) {
 105         nb_samples_shift += vd->histogram[i] >> shift;
 106         power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
 107     }
 108     if (!nb_samples_shift)
 109         return;
 110     power = (power + nb_samples_shift / 2) / nb_samples_shift;
 111     av_assert0(power <= 0x8000 * 0x8000);
 112     av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
 113
 114     max_volume = 0x8000;
 115     while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
 116                              !vd->histogram[0x8000 - max_volume])
 117         max_volume--;
 118     av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
 119
 120     for (i = 0; i < 0x10000; i++)
 121         histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
 122     for (i = 0; i <= MAX_DB && !histdb[i]; i++);
 123     for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
 124         av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
 125         sum += histdb[i];
 126     }
 127 }
 128
 129 static av_cold void uninit(AVFilterContext *ctx)
 130 {
 131     print_stats(ctx);
 132 }
 133
 134 static const AVFilterPad volumedetect_inputs[] = {
 135     {
 136         .name         = "default",
 137         .type         = AVMEDIA_TYPE_AUDIO,
 138         .filter_frame = filter_frame,
 139     },
 140     { NULL }
 141 };
 142
 143 static const AVFilterPad volumedetect_outputs[] = {
 144     {
 145         .name = "default",
 146         .type = AVMEDIA_TYPE_AUDIO,
 147     },
 148     { NULL }
 149 };
 150
 151 AVFilter ff_af_volumedetect = {
 152     .name          = "volumedetect",
 153     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
 154     .priv_size     = sizeof(VolDetectContext),
 155     .query_formats = query_formats,
 156     .uninit        = uninit,
 157     .inputs        = volumedetect_inputs,
 158     .outputs       = volumedetect_outputs,
 159 };