git.sesse.net Git - ffmpeg/blob - libavfilter/af_afftfilt.c

   1 /*
   2  * Copyright (c) 2016 Paul B Mahol
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU Lesser General Public License as published
   8  * by the Free Software Foundation; either version 2.1 of the License,
   9  * or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/audio_fifo.h"
  22 #include "libavutil/avstring.h"
  23 #include "libavfilter/internal.h"
  24 #include "libavutil/common.h"
  25 #include "libavutil/opt.h"
  26 #include "libavcodec/avfft.h"
  27 #include "libavutil/eval.h"
  28 #include "audio.h"
  29 #include "window_func.h"
  30
  31 typedef struct AFFTFiltContext {
  32     const AVClass *class;
  33     char *real_str;
  34     char *img_str;
  35     int fft_bits;
  36
  37     FFTContext *fft, *ifft;
  38     FFTComplex **fft_data;
  39     int nb_exprs;
  40     int window_size;
  41     AVExpr **real;
  42     AVExpr **imag;
  43     AVAudioFifo *fifo;
  44     int64_t pts;
  45     int hop_size;
  46     float overlap;
  47     AVFrame *buffer;
  48     int start, end;
  49     int win_func;
  50     float win_scale;
  51     float *window_func_lut;
  52 } AFFTFiltContext;
  53
  54 static const char *const var_names[] = {            "sr",     "b",       "nb",        "ch",        "chs",   "pts",        NULL };
  55 enum                                   { VAR_SAMPLE_RATE, VAR_BIN, VAR_NBBINS, VAR_CHANNEL, VAR_CHANNELS, VAR_PTS, VAR_VARS_NB };
  56
  57 #define OFFSET(x) offsetof(AFFTFiltContext, x)
  58 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  59
  60 static const AVOption afftfilt_options[] = {
  61     { "real", "set channels real expressions",       OFFSET(real_str), AV_OPT_TYPE_STRING, {.str = "1" }, 0, 0, A },
  62     { "imag",  "set channels imaginary expressions", OFFSET(img_str),  AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, A },
  63     { "win_size", "set window size", OFFSET(fft_bits), AV_OPT_TYPE_INT, {.i64=12}, 4, 17, A, "fft" },
  64         { "w16",    0, 0, AV_OPT_TYPE_CONST, {.i64=4},  0, 0, A, "fft" },
  65         { "w32",    0, 0, AV_OPT_TYPE_CONST, {.i64=5},  0, 0, A, "fft" },
  66         { "w64",    0, 0, AV_OPT_TYPE_CONST, {.i64=6},  0, 0, A, "fft" },
  67         { "w128",   0, 0, AV_OPT_TYPE_CONST, {.i64=7},  0, 0, A, "fft" },
  68         { "w256",   0, 0, AV_OPT_TYPE_CONST, {.i64=8},  0, 0, A, "fft" },
  69         { "w512",   0, 0, AV_OPT_TYPE_CONST, {.i64=9},  0, 0, A, "fft" },
  70         { "w1024",  0, 0, AV_OPT_TYPE_CONST, {.i64=10}, 0, 0, A, "fft" },
  71         { "w2048",  0, 0, AV_OPT_TYPE_CONST, {.i64=11}, 0, 0, A, "fft" },
  72         { "w4096",  0, 0, AV_OPT_TYPE_CONST, {.i64=12}, 0, 0, A, "fft" },
  73         { "w8192",  0, 0, AV_OPT_TYPE_CONST, {.i64=13}, 0, 0, A, "fft" },
  74         { "w16384", 0, 0, AV_OPT_TYPE_CONST, {.i64=14}, 0, 0, A, "fft" },
  75         { "w32768", 0, 0, AV_OPT_TYPE_CONST, {.i64=15}, 0, 0, A, "fft" },
  76         { "w65536", 0, 0, AV_OPT_TYPE_CONST, {.i64=16}, 0, 0, A, "fft" },
  77         { "w131072",0, 0, AV_OPT_TYPE_CONST, {.i64=17}, 0, 0, A, "fft" },
  78     { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, A, "win_func" },
  79         { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, A, "win_func" },
  80         { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, A, "win_func" },
  81         { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
  82         { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
  83         { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, A, "win_func" },
  84         { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, A, "win_func" },
  85     { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.75}, 0,  1, A },
  86     { NULL },
  87 };
  88
  89 AVFILTER_DEFINE_CLASS(afftfilt);
  90
  91 static int config_input(AVFilterLink *inlink)
  92 {
  93     AVFilterContext *ctx = inlink->dst;
  94     AFFTFiltContext *s = ctx->priv;
  95     char *saveptr = NULL;
  96     int ret = 0, ch, i;
  97     float overlap;
  98     char *args;
  99     const char *last_expr = "1";
 100
 101     s->fft  = av_fft_init(s->fft_bits, 0);
 102     s->ifft = av_fft_init(s->fft_bits, 1);
 103     if (!s->fft || !s->ifft)
 104         return AVERROR(ENOMEM);
 105
 106     s->window_size = 1 << s->fft_bits;
 107
 108     s->fft_data = av_calloc(inlink->channels, sizeof(*s->fft_data));
 109     if (!s->fft_data)
 110         return AVERROR(ENOMEM);
 111
 112     for (ch = 0; ch < inlink->channels; ch++) {
 113         s->fft_data[ch] = av_calloc(s->window_size, sizeof(**s->fft_data));
 114         if (!s->fft_data[ch])
 115             return AVERROR(ENOMEM);
 116     }
 117
 118     s->real = av_calloc(inlink->channels, sizeof(*s->real));
 119     if (!s->real)
 120         return AVERROR(ENOMEM);
 121
 122     s->imag = av_calloc(inlink->channels, sizeof(*s->imag));
 123     if (!s->imag)
 124         return AVERROR(ENOMEM);
 125
 126     args = av_strdup(s->real_str);
 127     if (!args)
 128         return AVERROR(ENOMEM);
 129
 130     for (ch = 0; ch < inlink->channels; ch++) {
 131         char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr);
 132
 133         ret = av_expr_parse(&s->real[ch], arg ? arg : last_expr, var_names,
 134                             NULL, NULL, NULL, NULL, 0, ctx);
 135         if (ret < 0)
 136             break;
 137         if (arg)
 138             last_expr = arg;
 139         s->nb_exprs++;
 140     }
 141
 142     av_free(args);
 143
 144     args = av_strdup(s->img_str ? s->img_str : s->real_str);
 145     if (!args)
 146         return AVERROR(ENOMEM);
 147
 148     for (ch = 0; ch < inlink->channels; ch++) {
 149         char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr);
 150
 151         ret = av_expr_parse(&s->imag[ch], arg ? arg : last_expr, var_names,
 152                             NULL, NULL, NULL, NULL, 0, ctx);
 153         if (ret < 0)
 154             break;
 155         if (arg)
 156             last_expr = arg;
 157     }
 158
 159     av_free(args);
 160
 161     s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
 162     if (!s->fifo)
 163         return AVERROR(ENOMEM);
 164
 165     s->window_func_lut = av_realloc_f(s->window_func_lut, s->window_size,
 166                                       sizeof(*s->window_func_lut));
 167     if (!s->window_func_lut)
 168         return AVERROR(ENOMEM);
 169     generate_window_func(s->window_func_lut, s->window_size, s->win_func, &overlap);
 170     if (s->overlap == 1)
 171         s->overlap = overlap;
 172
 173     for (s->win_scale = 0, i = 0; i < s->window_size; i++) {
 174         s->win_scale += s->window_func_lut[i] * s->window_func_lut[i];
 175     }
 176
 177     s->hop_size = s->window_size * (1 - s->overlap);
 178     if (s->hop_size <= 0)
 179         return AVERROR(EINVAL);
 180
 181     s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
 182     if (!s->buffer)
 183         return AVERROR(ENOMEM);
 184
 185     return ret;
 186 }
 187
 188 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 189 {
 190     AVFilterContext *ctx = inlink->dst;
 191     AVFilterLink *outlink = ctx->outputs[0];
 192     AFFTFiltContext *s = ctx->priv;
 193     const int window_size = s->window_size;
 194     const float f = 1. / s->win_scale;
 195     double values[VAR_VARS_NB];
 196     AVFrame *out, *in = NULL;
 197     int ch, n, ret, i, j, k;
 198     int start = s->start, end = s->end;
 199
 200     ret = av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
 201     av_frame_free(&frame);
 202     if (ret < 0)
 203         return ret;
 204
 205     while (av_audio_fifo_size(s->fifo) >= window_size) {
 206         if (!in) {
 207             in = ff_get_audio_buffer(outlink, window_size);
 208             if (!in)
 209                 return AVERROR(ENOMEM);
 210         }
 211
 212         ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, window_size);
 213         if (ret < 0)
 214             break;
 215
 216         for (ch = 0; ch < inlink->channels; ch++) {
 217             const float *src = (float *)in->extended_data[ch];
 218             FFTComplex *fft_data = s->fft_data[ch];
 219
 220             for (n = 0; n < in->nb_samples; n++) {
 221                 fft_data[n].re = src[n] * s->window_func_lut[n];
 222                 fft_data[n].im = 0;
 223             }
 224
 225             for (; n < window_size; n++) {
 226                 fft_data[n].re = 0;
 227                 fft_data[n].im = 0;
 228             }
 229         }
 230
 231         values[VAR_PTS]         = s->pts;
 232         values[VAR_SAMPLE_RATE] = inlink->sample_rate;
 233         values[VAR_NBBINS]      = window_size / 2;
 234         values[VAR_CHANNELS]    = inlink->channels;
 235
 236         for (ch = 0; ch < inlink->channels; ch++) {
 237             FFTComplex *fft_data = s->fft_data[ch];
 238             float *buf = (float *)s->buffer->extended_data[ch];
 239             int x;
 240
 241             values[VAR_CHANNEL] = ch;
 242
 243             av_fft_permute(s->fft, fft_data);
 244             av_fft_calc(s->fft, fft_data);
 245
 246             for (n = 0; n < window_size / 2; n++) {
 247                 float fr, fi;
 248
 249                 values[VAR_BIN] = n;
 250
 251                 fr = av_expr_eval(s->real[ch], values, s);
 252                 fi = av_expr_eval(s->imag[ch], values, s);
 253
 254                 fft_data[n].re *= fr;
 255                 fft_data[n].im *= fi;
 256             }
 257
 258             for (n = window_size / 2 + 1, x = window_size / 2 - 1; n < window_size; n++, x--) {
 259                 fft_data[n].re =  fft_data[x].re;
 260                 fft_data[n].im = -fft_data[x].im;
 261             }
 262
 263             av_fft_permute(s->ifft, fft_data);
 264             av_fft_calc(s->ifft, fft_data);
 265
 266             start = s->start;
 267             end = s->end;
 268             k = end;
 269             for (i = 0, j = start; j < k && i < window_size; i++, j++) {
 270                 buf[j] += s->fft_data[ch][i].re * f;
 271             }
 272
 273             for (; i < window_size; i++, j++) {
 274                 buf[j] = s->fft_data[ch][i].re * f;
 275             }
 276
 277             start += s->hop_size;
 278             end = j;
 279         }
 280
 281         s->start = start;
 282         s->end = end;
 283
 284         if (start >= window_size) {
 285             float *dst, *buf;
 286
 287             start -= window_size;
 288             end   -= window_size;
 289
 290             s->start = start;
 291             s->end = end;
 292
 293             out = ff_get_audio_buffer(outlink, window_size);
 294             if (!out) {
 295                 ret = AVERROR(ENOMEM);
 296                 break;
 297             }
 298
 299             out->pts = s->pts;
 300             s->pts += window_size;
 301
 302             for (ch = 0; ch < inlink->channels; ch++) {
 303                 dst = (float *)out->extended_data[ch];
 304                 buf = (float *)s->buffer->extended_data[ch];
 305
 306                 for (n = 0; n < window_size; n++) {
 307                     dst[n] = buf[n] * (1 - s->overlap);
 308                 }
 309                 memmove(buf, buf + window_size, window_size * 4);
 310             }
 311
 312             ret = ff_filter_frame(outlink, out);
 313             if (ret < 0)
 314                 break;
 315         }
 316
 317         av_audio_fifo_drain(s->fifo, s->hop_size);
 318     }
 319
 320     av_frame_free(&in);
 321     return ret < 0 ? ret : 0;
 322 }
 323
 324 static int query_formats(AVFilterContext *ctx)
 325 {
 326     AVFilterFormats *formats;
 327     AVFilterChannelLayouts *layouts;
 328     static const enum AVSampleFormat sample_fmts[] = {
 329         AV_SAMPLE_FMT_FLTP,
 330         AV_SAMPLE_FMT_NONE
 331     };
 332     int ret;
 333
 334     layouts = ff_all_channel_counts();
 335     if (!layouts)
 336         return AVERROR(ENOMEM);
 337     ret = ff_set_common_channel_layouts(ctx, layouts);
 338     if (ret < 0)
 339         return ret;
 340
 341     formats = ff_make_format_list(sample_fmts);
 342     if (!formats)
 343         return AVERROR(ENOMEM);
 344     ret = ff_set_common_formats(ctx, formats);
 345     if (ret < 0)
 346         return ret;
 347
 348     formats = ff_all_samplerates();
 349     if (!formats)
 350         return AVERROR(ENOMEM);
 351     return ff_set_common_samplerates(ctx, formats);
 352 }
 353
 354 static av_cold void uninit(AVFilterContext *ctx)
 355 {
 356     AFFTFiltContext *s = ctx->priv;
 357     int i;
 358
 359     av_fft_end(s->fft);
 360     av_fft_end(s->ifft);
 361
 362     for (i = 0; i < s->nb_exprs; i++) {
 363         if (s->fft_data)
 364             av_freep(&s->fft_data[i]);
 365     }
 366     av_freep(&s->fft_data);
 367
 368     for (i = 0; i < s->nb_exprs; i++) {
 369         av_expr_free(s->real[i]);
 370         av_expr_free(s->imag[i]);
 371     }
 372
 373     av_freep(&s->real);
 374     av_freep(&s->imag);
 375     av_frame_free(&s->buffer);
 376     av_freep(&s->window_func_lut);
 377
 378     av_audio_fifo_free(s->fifo);
 379 }
 380
 381 static const AVFilterPad inputs[] = {
 382     {
 383         .name         = "default",
 384         .type         = AVMEDIA_TYPE_AUDIO,
 385         .config_props = config_input,
 386         .filter_frame = filter_frame,
 387     },
 388     { NULL }
 389 };
 390
 391 static const AVFilterPad outputs[] = {
 392     {
 393         .name = "default",
 394         .type = AVMEDIA_TYPE_AUDIO,
 395     },
 396     { NULL }
 397 };
 398
 399 AVFilter ff_af_afftfilt = {
 400     .name            = "afftfilt",
 401     .description     = NULL_IF_CONFIG_SMALL("Apply arbitrary expressions to samples in frequency domain."),
 402     .priv_size       = sizeof(AFFTFiltContext),
 403     .priv_class      = &afftfilt_class,
 404     .inputs          = inputs,
 405     .outputs         = outputs,
 406     .query_formats   = query_formats,
 407     .uninit          = uninit,
 408 };