X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Faf_sofalizer.c;h=8a0397f54e33d092e4b627c31503661007cbab11;hb=e625ae609206e0550ff733965c6f5447579320aa;hp=d9098d7679db226388b78eca8b9ef251d2fd3024;hpb=14fe81b3a88dfe4dbac12e8715f9a3f05b5ef1bf;p=ffmpeg diff --git a/libavfilter/af_sofalizer.c b/libavfilter/af_sofalizer.c index d9098d7679d..8a0397f54e3 100644 --- a/libavfilter/af_sofalizer.c +++ b/libavfilter/af_sofalizer.c @@ -35,6 +35,7 @@ #include "libavutil/intmath.h" #include "libavutil/opt.h" #include "avfilter.h" +#include "filters.h" #include "internal.h" #include "audio.h" @@ -42,9 +43,13 @@ #define FREQUENCY_DOMAIN 1 typedef struct MySofa { /* contains data of one SOFA file */ - struct MYSOFA_EASY *easy; - int n_samples; /* length of one impulse response (IR) */ + struct MYSOFA_HRTF *hrtf; + struct MYSOFA_LOOKUP *lookup; + struct MYSOFA_NEIGHBORHOOD *neighborhood; + int ir_samples; /* length of one impulse response (IR) */ + int n_samples; /* ir_samples to next power of 2 */ float *lir, *rir; /* IRs (time-domain) */ + float *fir; int max_delay; } MySofa; @@ -77,6 +82,7 @@ typedef struct SOFAlizerContext { int buffer_length; /* is: longest IR plus max. delay in all SOFA files */ /* then choose next power of 2 */ int n_fft; /* number of samples in one FFT block */ + int nb_samples; /* netCDF variables */ int *delay[2]; /* broadband delay for each channel/IR to be convolved */ @@ -84,7 +90,8 @@ typedef struct SOFAlizerContext { float *data_ir[2]; /* IRs for all channels to be convolved */ /* (this excludes the LFE) */ float *temp_src[2]; - FFTComplex *temp_fft[2]; + FFTComplex *temp_fft[2]; /* Array to hold FFT values */ + FFTComplex *temp_afft[2]; /* Array to accumulate FFT values prior to IFFT */ /* control variables */ float gain; /* filter gain (in dB) */ @@ -92,6 +99,12 @@ typedef struct SOFAlizerContext { float elevation; /* elevation of virtual loudspeakers (in deg.) */ float radius; /* distance virtual loudspeakers to listener (in metres) */ int type; /* processing type */ + int framesize; /* size of buffer */ + int normalize; /* should all IRs be normalized upon import ? */ + int interpolate; /* should wanted IRs be interpolated from neighbors ? */ + int minphase; /* should all IRs be minphased upon import ? */ + float anglestep; /* neighbor search angle step, in agles */ + float radstep; /* neighbor search radius step, in meters */ VirtualSpeaker vspkrpos[64]; @@ -103,8 +116,16 @@ typedef struct SOFAlizerContext { static int close_sofa(struct MySofa *sofa) { - mysofa_close(sofa->easy); - sofa->easy = NULL; + if (sofa->neighborhood) + mysofa_neighborhood_free(sofa->neighborhood); + sofa->neighborhood = NULL; + if (sofa->lookup) + mysofa_lookup_free(sofa->lookup); + sofa->lookup = NULL; + if (sofa->hrtf) + mysofa_free(sofa->hrtf); + sofa->hrtf = NULL; + av_freep(&sofa->fir); return 0; } @@ -113,30 +134,63 @@ static int preload_sofa(AVFilterContext *ctx, char *filename, int *samplingrate) { struct SOFAlizerContext *s = ctx->priv; struct MYSOFA_HRTF *mysofa; + char *license; int ret; mysofa = mysofa_load(filename, &ret); + s->sofa.hrtf = mysofa; if (ret || !mysofa) { av_log(ctx, AV_LOG_ERROR, "Can't find SOFA-file '%s'\n", filename); return AVERROR(EINVAL); } + ret = mysofa_check(mysofa); + if (ret != MYSOFA_OK) { + av_log(ctx, AV_LOG_ERROR, "Selected SOFA file is invalid. Please select valid SOFA file.\n"); + return ret; + } + + if (s->normalize) + mysofa_loudness(s->sofa.hrtf); + + if (s->minphase) + mysofa_minphase(s->sofa.hrtf, 0.01f); + + mysofa_tocartesian(s->sofa.hrtf); + + s->sofa.lookup = mysofa_lookup_init(s->sofa.hrtf); + if (s->sofa.lookup == NULL) + return AVERROR(EINVAL); + + if (s->interpolate) + s->sofa.neighborhood = mysofa_neighborhood_init_withstepdefine(s->sofa.hrtf, + s->sofa.lookup, + s->anglestep, + s->radstep); + + s->sofa.fir = av_calloc(s->sofa.hrtf->N * s->sofa.hrtf->R, sizeof(*s->sofa.fir)); + if (!s->sofa.fir) + return AVERROR(ENOMEM); + if (mysofa->DataSamplingRate.elements != 1) return AVERROR(EINVAL); + av_log(ctx, AV_LOG_DEBUG, "Original IR length: %d.\n", mysofa->N); *samplingrate = mysofa->DataSamplingRate.values[0]; - s->sofa.n_samples = mysofa->N; - mysofa_free(mysofa); + license = mysofa_getAttribute(mysofa->attributes, (char *)"License"); + if (license) + av_log(ctx, AV_LOG_INFO, "SOFA license: %s\n", license); return 0; } -static int parse_channel_name(char **arg, int *rchannel, char *buf) +static int parse_channel_name(AVFilterContext *ctx, char **arg, int *rchannel) { int len, i, channel_id = 0; int64_t layout, layout0; + char buf[8] = {0}; /* try to parse a channel name, e.g. "FL" */ - if (sscanf(*arg, "%7[A-Z]%n", buf, &len)) { + if (av_sscanf(*arg, "%7[A-Z]%n", buf, &len)) { layout0 = layout = av_get_channel_layout(buf); /* channel_id <- first set bit in layout */ for (i = 32; i > 0; i >>= 1) { @@ -146,8 +200,18 @@ static int parse_channel_name(char **arg, int *rchannel, char *buf) } } /* reject layouts that are not a single channel */ - if (channel_id >= 64 || layout0 != 1LL << channel_id) + if (channel_id >= 64 || layout0 != 1LL << channel_id) { + av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%s\' as channel name.\n", buf); return AVERROR(EINVAL); + } + *rchannel = channel_id; + *arg += len; + return 0; + } else if (av_sscanf(*arg, "%d%n", &channel_id, &len) == 1) { + if (channel_id < 0 || channel_id >= 64) { + av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%d\' as channel number.\n", channel_id); + return AVERROR(EINVAL); + } *rchannel = channel_id; *arg += len; return 0; @@ -165,20 +229,18 @@ static void parse_speaker_pos(AVFilterContext *ctx, int64_t in_channel_layout) p = args; while ((arg = av_strtok(p, "|", &tokenizer))) { - char buf[8]; float azim, elev; int out_ch_id; p = NULL; - if (parse_channel_name(&arg, &out_ch_id, buf)) { - av_log(ctx, AV_LOG_WARNING, "Failed to parse \'%s\' as channel name.\n", buf); + if (parse_channel_name(ctx, &arg, &out_ch_id)) { continue; } - if (sscanf(arg, "%f %f", &azim, &elev) == 2) { + if (av_sscanf(arg, "%f %f", &azim, &elev) == 2) { s->vspkrpos[out_ch_id].set = 1; s->vspkrpos[out_ch_id].azim = azim; s->vspkrpos[out_ch_id].elev = elev; - } else if (sscanf(arg, "%f", &azim) == 1) { + } else if (av_sscanf(arg, "%f", &azim) == 1) { s->vspkrpos[out_ch_id].set = 1; s->vspkrpos[out_ch_id].azim = azim; s->vspkrpos[out_ch_id].elev = 0; @@ -193,11 +255,11 @@ static int get_speaker_pos(AVFilterContext *ctx, { struct SOFAlizerContext *s = ctx->priv; uint64_t channels_layout = ctx->inputs[0]->channel_layout; - float azim[16] = { 0 }; - float elev[16] = { 0 }; + float azim[64] = { 0 }; + float elev[64] = { 0 }; int m, ch, n_conv = ctx->inputs[0]->channels; /* get no. input channels */ - if (n_conv > 16) + if (n_conv < 0 || n_conv > 64) return AVERROR(EINVAL); s->lfe_channel = -1; @@ -272,6 +334,7 @@ typedef struct ThreadData { float **ringbuffer; float **temp_src; FFTComplex **temp_fft; + FFTComplex **temp_afft; } ThreadData; static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) @@ -286,20 +349,25 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n int *n_clippings = &td->n_clippings[jobnr]; float *ringbuffer = td->ringbuffer[jobnr]; float *temp_src = td->temp_src[jobnr]; - const int n_samples = s->sofa.n_samples; /* length of one IR */ - const float *src = (const float *)in->data[0]; /* get pointer to audio input buffer */ - float *dst = (float *)out->data[0]; /* get pointer to audio output buffer */ + const int ir_samples = s->sofa.ir_samples; /* length of one IR */ + const int n_samples = s->sofa.n_samples; + const int planar = in->format == AV_SAMPLE_FMT_FLTP; + const int mult = 1 + !planar; + const float *src = (const float *)in->extended_data[0]; /* get pointer to audio input buffer */ + float *dst = (float *)out->extended_data[jobnr * planar]; /* get pointer to audio output buffer */ const int in_channels = s->n_conv; /* number of input channels */ /* ring buffer length is: longest IR plus max. delay -> next power of 2 */ const int buffer_length = s->buffer_length; /* -1 for AND instead of MODULO (applied to powers of 2): */ const uint32_t modulo = (uint32_t)buffer_length - 1; - float *buffer[16]; /* holds ringbuffer for each input channel */ + float *buffer[64]; /* holds ringbuffer for each input channel */ int wr = *write; int read; int i, l; - dst += offset; + if (!planar) + dst += offset; + for (l = 0; l < in_channels; l++) { /* get starting address of ringbuffer for each input channel */ buffer[l] = ringbuffer + l * buffer_length; @@ -309,9 +377,18 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n const float *temp_ir = ir; /* using same set of IRs for each sample */ dst[0] = 0; - for (l = 0; l < in_channels; l++) { - /* write current input sample to ringbuffer (for each channel) */ - buffer[l][wr] = src[l]; + if (planar) { + for (l = 0; l < in_channels; l++) { + const float *srcp = (const float *)in->extended_data[l]; + + /* write current input sample to ringbuffer (for each channel) */ + buffer[l][wr] = srcp[i]; + } + } else { + for (l = 0; l < in_channels; l++) { + /* write current input sample to ringbuffer (for each channel) */ + buffer[l][wr] = src[l]; + } } /* loop goes through all channels to be convolved */ @@ -321,36 +398,36 @@ static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n if (l == s->lfe_channel) { /* LFE is an input channel but requires no convolution */ /* apply gain to LFE signal and add to output buffer */ - *dst += *(buffer[s->lfe_channel] + wr) * s->gain_lfe; - temp_ir += FFALIGN(n_samples, 32); + dst[0] += *(buffer[s->lfe_channel] + wr) * s->gain_lfe; + temp_ir += n_samples; continue; } /* current read position in ringbuffer: input sample write position * - delay for l-th ch. + diff. betw. IR length and buffer length * (mod buffer length) */ - read = (wr - delay[l] - (n_samples - 1) + buffer_length) & modulo; + read = (wr - delay[l] - (ir_samples - 1) + buffer_length) & modulo; - if (read + n_samples < buffer_length) { - memmove(temp_src, bptr + read, n_samples * sizeof(*temp_src)); + if (read + ir_samples < buffer_length) { + memmove(temp_src, bptr + read, ir_samples * sizeof(*temp_src)); } else { - int len = FFMIN(n_samples - (read % n_samples), buffer_length - read); + int len = FFMIN(n_samples - (read % ir_samples), buffer_length - read); memmove(temp_src, bptr + read, len * sizeof(*temp_src)); memmove(temp_src + len, bptr, (n_samples - len) * sizeof(*temp_src)); } /* multiply signal and IR, and add up the results */ - dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, n_samples); - temp_ir += FFALIGN(n_samples, 32); + dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, FFALIGN(ir_samples, 32)); + temp_ir += n_samples; } /* clippings counter */ - if (fabs(dst[0]) > 1) - *n_clippings += 1; + if (fabsf(dst[0]) > 1) + n_clippings[0]++; /* move output buffer pointer by +2 to get to next sample of processed channel: */ - dst += 2; + dst += mult; src += in_channels; wr = (wr + 1) & modulo; /* update ringbuffer write position */ } @@ -370,15 +447,17 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr, FFTComplex *hrtf = s->data_hrtf[jobnr]; /* get pointers to current HRTF data */ int *n_clippings = &td->n_clippings[jobnr]; float *ringbuffer = td->ringbuffer[jobnr]; - const int n_samples = s->sofa.n_samples; /* length of one IR */ - const float *src = (const float *)in->data[0]; /* get pointer to audio input buffer */ - float *dst = (float *)out->data[0]; /* get pointer to audio output buffer */ + const int ir_samples = s->sofa.ir_samples; /* length of one IR */ + const int planar = in->format == AV_SAMPLE_FMT_FLTP; + const int mult = 1 + !planar; + float *dst = (float *)out->extended_data[jobnr * planar]; /* get pointer to audio output buffer */ const int in_channels = s->n_conv; /* number of input channels */ /* ring buffer length is: longest IR plus max. delay -> next power of 2 */ const int buffer_length = s->buffer_length; /* -1 for AND instead of MODULO (applied to powers of 2): */ const uint32_t modulo = (uint32_t)buffer_length - 1; FFTComplex *fft_in = s->temp_fft[jobnr]; /* temporary array for FFT input/output data */ + FFTComplex *fft_acc = s->temp_afft[jobnr]; FFTContext *ifft = s->ifft[jobnr]; FFTContext *fft = s->fft[jobnr]; const int n_conv = s->n_conv; @@ -389,29 +468,42 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr, int n_read; int i, j; - dst += offset; + if (!planar) + dst += offset; /* find minimum between number of samples and output buffer length: * (important, if one IR is longer than the output buffer) */ - n_read = FFMIN(s->sofa.n_samples, in->nb_samples); + n_read = FFMIN(ir_samples, in->nb_samples); for (j = 0; j < n_read; j++) { /* initialize output buf with saved signal from overflow buf */ - dst[2 * j] = ringbuffer[wr]; - ringbuffer[wr] = 0.0; /* re-set read samples to zero */ + dst[mult * j] = ringbuffer[wr]; + ringbuffer[wr] = 0.0f; /* re-set read samples to zero */ /* update ringbuffer read/write position */ wr = (wr + 1) & modulo; } /* initialize rest of output buffer with 0 */ for (j = n_read; j < in->nb_samples; j++) { - dst[2 * j] = 0; + dst[mult * j] = 0; } + /* fill FFT accumulation with 0 */ + memset(fft_acc, 0, sizeof(FFTComplex) * n_fft); + for (i = 0; i < n_conv; i++) { + const float *src = (const float *)in->extended_data[i * planar]; /* get pointer to audio input buffer */ + if (i == s->lfe_channel) { /* LFE */ - for (j = 0; j < in->nb_samples; j++) { - /* apply gain to LFE signal and add to output buffer */ - dst[2 * j] += src[i + j * in_channels] * s->gain_lfe; + if (in->format == AV_SAMPLE_FMT_FLT) { + for (j = 0; j < in->nb_samples; j++) { + /* apply gain to LFE signal and add to output buffer */ + dst[2 * j] += src[i + j * in_channels] * s->gain_lfe; + } + } else { + for (j = 0; j < in->nb_samples; j++) { + /* apply gain to LFE signal and add to output buffer */ + dst[j] += src[j] * s->gain_lfe; + } } continue; } @@ -423,10 +515,18 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr, /* fill FFT input with 0 (we want to zero-pad) */ memset(fft_in, 0, sizeof(FFTComplex) * n_fft); - for (j = 0; j < in->nb_samples; j++) { - /* prepare input for FFT */ - /* write all samples of current input channel to FFT input array */ - fft_in[j].re = src[j * in_channels + i]; + if (in->format == AV_SAMPLE_FMT_FLT) { + for (j = 0; j < in->nb_samples; j++) { + /* prepare input for FFT */ + /* write all samples of current input channel to FFT input array */ + fft_in[j].re = src[j * in_channels + i]; + } + } else { + for (j = 0; j < in->nb_samples; j++) { + /* prepare input for FFT */ + /* write all samples of current input channel to FFT input array */ + fft_in[j].re = src[j]; + } } /* transform input signal of current channel to frequency domain */ @@ -439,37 +539,34 @@ static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr, /* complex multiplication of input signal and HRTFs */ /* output channel (real): */ - fft_in[j].re = re * hcomplex->re - im * hcomplex->im; + fft_acc[j].re += re * hcomplex->re - im * hcomplex->im; /* output channel (imag): */ - fft_in[j].im = re * hcomplex->im + im * hcomplex->re; + fft_acc[j].im += re * hcomplex->im + im * hcomplex->re; } + } - /* transform output signal of current channel back to time domain */ - av_fft_permute(ifft, fft_in); - av_fft_calc(ifft, fft_in); + /* transform output signal of current channel back to time domain */ + av_fft_permute(ifft, fft_acc); + av_fft_calc(ifft, fft_acc); - for (j = 0; j < in->nb_samples; j++) { - /* write output signal of current channel to output buffer */ - dst[2 * j] += fft_in[j].re * fft_scale; - } + for (j = 0; j < in->nb_samples; j++) { + /* write output signal of current channel to output buffer */ + dst[mult * j] += fft_acc[j].re * fft_scale; + } - for (j = 0; j < n_samples - 1; j++) { /* overflow length is IR length - 1 */ - /* write the rest of output signal to overflow buffer */ - int write_pos = (wr + j) & modulo; + for (j = 0; j < ir_samples - 1; j++) { /* overflow length is IR length - 1 */ + /* write the rest of output signal to overflow buffer */ + int write_pos = (wr + j) & modulo; - *(ringbuffer + write_pos) += fft_in[in->nb_samples + j].re * fft_scale; - } + *(ringbuffer + write_pos) += fft_acc[in->nb_samples + j].re * fft_scale; } /* go through all samples of current output buffer: count clippings */ for (i = 0; i < out->nb_samples; i++) { /* clippings counter */ - if (fabs(*dst) > 1) { /* if current output sample > 1 */ + if (fabsf(dst[i * mult]) > 1) { /* if current output sample > 1 */ n_clippings[0]++; } - - /* move output buffer pointer by +2 to get to next sample of processed channel: */ - dst += 2; } /* remember read/write position in ringbuffer for next call */ @@ -498,10 +595,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings; td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src; td.temp_fft = s->temp_fft; + td.temp_afft = s->temp_afft; if (s->type == TIME_DOMAIN) { ctx->internal->execute(ctx, sofalizer_convolute, &td, NULL, 2); - } else { + } else if (s->type == FREQUENCY_DOMAIN) { ctx->internal->execute(ctx, sofalizer_fast_convolute, &td, NULL, 2); } emms_c(); @@ -516,16 +614,45 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) return ff_filter_frame(outlink, out); } +static int activate(AVFilterContext *ctx) +{ + AVFilterLink *inlink = ctx->inputs[0]; + AVFilterLink *outlink = ctx->outputs[0]; + SOFAlizerContext *s = ctx->priv; + AVFrame *in; + int ret; + + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink); + + if (s->nb_samples) + ret = ff_inlink_consume_samples(inlink, s->nb_samples, s->nb_samples, &in); + else + ret = ff_inlink_consume_frame(inlink, &in); + if (ret < 0) + return ret; + if (ret > 0) + return filter_frame(inlink, in); + + FF_FILTER_FORWARD_STATUS(inlink, outlink); + FF_FILTER_FORWARD_WANTED(outlink, inlink); + + return FFERROR_NOT_READY; +} + static int query_formats(AVFilterContext *ctx) { struct SOFAlizerContext *s = ctx->priv; AVFilterFormats *formats = NULL; AVFilterChannelLayouts *layouts = NULL; int ret, sample_rates[] = { 48000, -1 }; + static const enum AVSampleFormat sample_fmts[] = { + AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_NONE + }; - ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT); - if (ret) - return ret; + formats = ff_make_format_list(sample_fmts); + if (!formats) + return AVERROR(ENOMEM); ret = ff_set_common_formats(ctx, formats); if (ret) return ret; @@ -534,7 +661,7 @@ static int query_formats(AVFilterContext *ctx) if (!layouts) return AVERROR(ENOMEM); - ret = ff_channel_layouts_ref(layouts, &ctx->inputs[0]->out_channel_layouts); + ret = ff_channel_layouts_ref(layouts, &ctx->inputs[0]->outcfg.channel_layouts); if (ret) return ret; @@ -543,7 +670,7 @@ static int query_formats(AVFilterContext *ctx) if (ret) return ret; - ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts); + ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->incfg.channel_layouts); if (ret) return ret; @@ -554,10 +681,55 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_samplerates(ctx, formats); } +static int getfilter_float(AVFilterContext *ctx, float x, float y, float z, + float *left, float *right, + float *delay_left, float *delay_right) +{ + struct SOFAlizerContext *s = ctx->priv; + float c[3], delays[2]; + float *fl, *fr; + int nearest; + int *neighbors; + float *res; + + c[0] = x, c[1] = y, c[2] = z; + nearest = mysofa_lookup(s->sofa.lookup, c); + if (nearest < 0) + return AVERROR(EINVAL); + + if (s->interpolate) { + neighbors = mysofa_neighborhood(s->sofa.neighborhood, nearest); + res = mysofa_interpolate(s->sofa.hrtf, c, + nearest, neighbors, + s->sofa.fir, delays); + } else { + if (s->sofa.hrtf->DataDelay.elements > s->sofa.hrtf->R) { + delays[0] = s->sofa.hrtf->DataDelay.values[nearest * s->sofa.hrtf->R]; + delays[1] = s->sofa.hrtf->DataDelay.values[nearest * s->sofa.hrtf->R + 1]; + } else { + delays[0] = s->sofa.hrtf->DataDelay.values[0]; + delays[1] = s->sofa.hrtf->DataDelay.values[1]; + } + res = s->sofa.hrtf->DataIR.values + nearest * s->sofa.hrtf->N * s->sofa.hrtf->R; + } + + *delay_left = delays[0]; + *delay_right = delays[1]; + + fl = res; + fr = res + s->sofa.hrtf->N; + + memcpy(left, fl, sizeof(float) * s->sofa.hrtf->N); + memcpy(right, fr, sizeof(float) * s->sofa.hrtf->N); + + return 0; +} + static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int sample_rate) { struct SOFAlizerContext *s = ctx->priv; int n_samples; + int ir_samples; int n_conv = s->n_conv; /* no. channels to convolve */ int n_fft; float delay_l; /* broadband delay for each IR */ @@ -572,39 +744,46 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int float *data_ir_r = NULL; int offset = 0; /* used for faster pointer arithmetics in for-loop */ int i, j, azim_orig = azim, elev_orig = elev; - int filter_length, ret = 0; + int ret = 0; int n_current; int n_max = 0; - s->sofa.easy = mysofa_open(s->filename, sample_rate, &filter_length, &ret); - if (!s->sofa.easy || ret) { /* if an invalid SOFA file has been selected */ - av_log(ctx, AV_LOG_ERROR, "Selected SOFA file is invalid. Please select valid SOFA file.\n"); - return AVERROR_INVALIDDATA; - } + av_log(ctx, AV_LOG_DEBUG, "IR length: %d.\n", s->sofa.hrtf->N); + s->sofa.ir_samples = s->sofa.hrtf->N; + s->sofa.n_samples = 1 << (32 - ff_clz(s->sofa.ir_samples)); n_samples = s->sofa.n_samples; + ir_samples = s->sofa.ir_samples; + + if (s->type == TIME_DOMAIN) { + s->data_ir[0] = av_calloc(n_samples, sizeof(float) * s->n_conv); + s->data_ir[1] = av_calloc(n_samples, sizeof(float) * s->n_conv); + + if (!s->data_ir[0] || !s->data_ir[1]) { + ret = AVERROR(ENOMEM); + goto fail; + } + } - s->data_ir[0] = av_calloc(FFALIGN(n_samples, 32), sizeof(float) * s->n_conv); - s->data_ir[1] = av_calloc(FFALIGN(n_samples, 32), sizeof(float) * s->n_conv); s->delay[0] = av_calloc(s->n_conv, sizeof(int)); s->delay[1] = av_calloc(s->n_conv, sizeof(int)); - if (!s->data_ir[0] || !s->data_ir[1] || !s->delay[0] || !s->delay[1]) { + if (!s->delay[0] || !s->delay[1]) { ret = AVERROR(ENOMEM); goto fail; } /* get temporary IR for L and R channel */ - data_ir_l = av_calloc(n_conv * FFALIGN(n_samples, 32), sizeof(*data_ir_l)); - data_ir_r = av_calloc(n_conv * FFALIGN(n_samples, 32), sizeof(*data_ir_r)); + data_ir_l = av_calloc(n_conv * n_samples, sizeof(*data_ir_l)); + data_ir_r = av_calloc(n_conv * n_samples, sizeof(*data_ir_r)); if (!data_ir_r || !data_ir_l) { ret = AVERROR(ENOMEM); goto fail; } if (s->type == TIME_DOMAIN) { - s->temp_src[0] = av_calloc(FFALIGN(n_samples, 32), sizeof(float)); - s->temp_src[1] = av_calloc(FFALIGN(n_samples, 32), sizeof(float)); + s->temp_src[0] = av_calloc(n_samples, sizeof(float)); + s->temp_src[1] = av_calloc(n_samples, sizeof(float)); if (!s->temp_src[0] || !s->temp_src[1]) { ret = AVERROR(ENOMEM); goto fail; @@ -638,10 +817,12 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int mysofa_s2c(coordinates); /* get id of IR closest to desired position */ - mysofa_getfilter_float(s->sofa.easy, coordinates[0], coordinates[1], coordinates[2], - data_ir_l + FFALIGN(n_samples, 32) * i, - data_ir_r + FFALIGN(n_samples, 32) * i, - &delay_l, &delay_r); + ret = getfilter_float(ctx, coordinates[0], coordinates[1], coordinates[2], + data_ir_l + n_samples * i, + data_ir_r + n_samples * i, + &delay_l, &delay_r); + if (ret < 0) + goto fail; s->delay[0][i] = delay_l * sample_rate; s->delay[1][i] = delay_r * sample_rate; @@ -651,24 +832,24 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int /* get size of ringbuffer (longest IR plus max. delay) */ /* then choose next power of 2 for performance optimization */ - n_current = s->sofa.n_samples + s->sofa.max_delay; + n_current = n_samples + s->sofa.max_delay; /* length of longest IR plus max. delay */ n_max = FFMAX(n_max, n_current); /* buffer length is longest IR plus max. delay -> next power of 2 (32 - count leading zeros gives required exponent) */ s->buffer_length = 1 << (32 - ff_clz(n_max)); - s->n_fft = n_fft = 1 << (32 - ff_clz(n_max + sample_rate)); + s->n_fft = n_fft = 1 << (32 - ff_clz(n_max + s->framesize)); if (s->type == FREQUENCY_DOMAIN) { av_fft_end(s->fft[0]); av_fft_end(s->fft[1]); - s->fft[0] = av_fft_init(log2(s->n_fft), 0); - s->fft[1] = av_fft_init(log2(s->n_fft), 0); + s->fft[0] = av_fft_init(av_log2(s->n_fft), 0); + s->fft[1] = av_fft_init(av_log2(s->n_fft), 0); av_fft_end(s->ifft[0]); av_fft_end(s->ifft[1]); - s->ifft[0] = av_fft_init(log2(s->n_fft), 1); - s->ifft[1] = av_fft_init(log2(s->n_fft), 1); + s->ifft[0] = av_fft_init(av_log2(s->n_fft), 1); + s->ifft[1] = av_fft_init(av_log2(s->n_fft), 1); if (!s->fft[0] || !s->fft[1] || !s->ifft[0] || !s->ifft[1]) { av_log(ctx, AV_LOG_ERROR, "Unable to create FFT contexts of size %d.\n", s->n_fft); @@ -680,7 +861,7 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int if (s->type == TIME_DOMAIN) { s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels); s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels); - } else { + } else if (s->type == FREQUENCY_DOMAIN) { /* get temporary HRTF memory for L and R channel */ data_hrtf_l = av_malloc_array(n_fft, sizeof(*data_hrtf_l) * n_conv); data_hrtf_r = av_malloc_array(n_fft, sizeof(*data_hrtf_r) * n_conv); @@ -693,7 +874,10 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float)); s->temp_fft[0] = av_malloc_array(s->n_fft, sizeof(FFTComplex)); s->temp_fft[1] = av_malloc_array(s->n_fft, sizeof(FFTComplex)); - if (!s->temp_fft[0] || !s->temp_fft[1]) { + s->temp_afft[0] = av_malloc_array(s->n_fft, sizeof(FFTComplex)); + s->temp_afft[1] = av_malloc_array(s->n_fft, sizeof(FFTComplex)); + if (!s->temp_fft[0] || !s->temp_fft[1] || + !s->temp_afft[0] || !s->temp_afft[1]) { ret = AVERROR(ENOMEM); goto fail; } @@ -716,28 +900,28 @@ static int load_data(AVFilterContext *ctx, int azim, int elev, float radius, int for (i = 0; i < s->n_conv; i++) { float *lir, *rir; - offset = i * FFALIGN(n_samples, 32); /* no. samples already written */ + offset = i * n_samples; /* no. samples already written */ lir = data_ir_l + offset; rir = data_ir_r + offset; if (s->type == TIME_DOMAIN) { - for (j = 0; j < n_samples; j++) { + for (j = 0; j < ir_samples; j++) { /* load reversed IRs of the specified source position * sample-by-sample for left and right ear; and apply gain */ - s->data_ir[0][offset + j] = lir[n_samples - 1 - j] * gain_lin; - s->data_ir[1][offset + j] = rir[n_samples - 1 - j] * gain_lin; + s->data_ir[0][offset + j] = lir[ir_samples - 1 - j] * gain_lin; + s->data_ir[1][offset + j] = rir[ir_samples - 1 - j] * gain_lin; } - } else { + } else if (s->type == FREQUENCY_DOMAIN) { memset(fft_in_l, 0, n_fft * sizeof(*fft_in_l)); memset(fft_in_r, 0, n_fft * sizeof(*fft_in_r)); offset = i * n_fft; /* no. samples already written */ - for (j = 0; j < n_samples; j++) { + for (j = 0; j < ir_samples; j++) { /* load non-reversed IRs of the specified source position * sample-by-sample and apply gain, * L channel is loaded to real part, R channel to imag part, - * IRs ared shifted by L and R delay */ + * IRs are shifted by L and R delay */ fft_in_l[s->delay[0][i] + j].re = lir[j] * gain_lin; fft_in_r[s->delay[1][i] + j].re = rir[j] * gain_lin; } @@ -816,14 +1000,11 @@ static int config_input(AVFilterLink *inlink) SOFAlizerContext *s = ctx->priv; int ret; - if (s->type == FREQUENCY_DOMAIN) { - inlink->partial_buf_size = - inlink->min_samples = - inlink->max_samples = inlink->sample_rate; - } + if (s->type == FREQUENCY_DOMAIN) + s->nb_samples = s->framesize; - /* gain -3 dB per channel, -6 dB to get LFE on a similar level */ - s->gain_lfe = expf((s->gain - 3 * inlink->channels - 6 + s->lfe_gain) / 20 * M_LN10); + /* gain -3 dB per channel */ + s->gain_lfe = expf((s->gain - 3 * inlink->channels + s->lfe_gain) / 20 * M_LN10); s->n_conv = inlink->channels; @@ -846,6 +1027,10 @@ static av_cold void uninit(AVFilterContext *ctx) av_fft_end(s->ifft[1]); av_fft_end(s->fft[0]); av_fft_end(s->fft[1]); + s->ifft[0] = NULL; + s->ifft[1] = NULL; + s->fft[0] = NULL; + s->fft[1] = NULL; av_freep(&s->delay[0]); av_freep(&s->delay[1]); av_freep(&s->data_ir[0]); @@ -856,6 +1041,8 @@ static av_cold void uninit(AVFilterContext *ctx) av_freep(&s->speaker_elev); av_freep(&s->temp_src[0]); av_freep(&s->temp_src[1]); + av_freep(&s->temp_afft[0]); + av_freep(&s->temp_afft[1]); av_freep(&s->temp_fft[0]); av_freep(&s->temp_fft[1]); av_freep(&s->data_hrtf[0]); @@ -871,12 +1058,18 @@ static const AVOption sofalizer_options[] = { { "gain", "set gain in dB", OFFSET(gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -20, 40, .flags = FLAGS }, { "rotation", "set rotation" , OFFSET(rotation), AV_OPT_TYPE_FLOAT, {.dbl=0}, -360, 360, .flags = FLAGS }, { "elevation", "set elevation", OFFSET(elevation), AV_OPT_TYPE_FLOAT, {.dbl=0}, -90, 90, .flags = FLAGS }, - { "radius", "set radius", OFFSET(radius), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 3, .flags = FLAGS }, + { "radius", "set radius", OFFSET(radius), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 5, .flags = FLAGS }, { "type", "set processing", OFFSET(type), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, .flags = FLAGS, "type" }, { "time", "time domain", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, .flags = FLAGS, "type" }, { "freq", "frequency domain", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, .flags = FLAGS, "type" }, { "speakers", "set speaker custom positions", OFFSET(speakers_pos), AV_OPT_TYPE_STRING, {.str=0}, 0, 0, .flags = FLAGS }, - { "lfegain", "set lfe gain", OFFSET(lfe_gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -9, 9, .flags = FLAGS }, + { "lfegain", "set lfe gain", OFFSET(lfe_gain), AV_OPT_TYPE_FLOAT, {.dbl=0}, -20,40, .flags = FLAGS }, + { "framesize", "set frame size", OFFSET(framesize), AV_OPT_TYPE_INT, {.i64=1024},1024,96000, .flags = FLAGS }, + { "normalize", "normalize IRs", OFFSET(normalize), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, .flags = FLAGS }, + { "interpolate","interpolate IRs from neighbors", OFFSET(interpolate),AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS }, + { "minphase", "minphase IRs", OFFSET(minphase), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS }, + { "anglestep", "set neighbor search angle step", OFFSET(anglestep), AV_OPT_TYPE_FLOAT, {.dbl=.5}, 0.01, 10, .flags = FLAGS }, + { "radstep", "set neighbor search radius step", OFFSET(radstep), AV_OPT_TYPE_FLOAT, {.dbl=.01}, 0.01, 1, .flags = FLAGS }, { NULL } }; @@ -887,7 +1080,6 @@ static const AVFilterPad inputs[] = { .name = "default", .type = AVMEDIA_TYPE_AUDIO, .config_props = config_input, - .filter_frame = filter_frame, }, { NULL } }; @@ -906,6 +1098,7 @@ AVFilter ff_af_sofalizer = { .priv_size = sizeof(SOFAlizerContext), .priv_class = &sofalizer_class, .init = init, + .activate = activate, .uninit = uninit, .query_formats = query_formats, .inputs = inputs,