2 Copyright (c) 2003-2004, Mark Borgerding
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15 #include "../_kiss_fft_guts.h"
19 Some definitions that allow real or complex filtering
22 #define MIN_FFT_LEN 2048
23 #include "kiss_fftr.h"
24 typedef kiss_fft_scalar kffsamp_t;
25 typedef kiss_fftr_cfg kfcfg_t;
26 #define FFT_ALLOC kiss_fftr_alloc
27 #define FFTFWD kiss_fftr
28 #define FFTINV kiss_fftri
30 #define MIN_FFT_LEN 1024
31 typedef kiss_fft_cpx kffsamp_t;
32 typedef kiss_fft_cfg kfcfg_t;
33 #define FFT_ALLOC kiss_fft_alloc
34 #define FFTFWD kiss_fft
35 #define FFTINV kiss_fft
38 typedef struct kiss_fastfir_state *kiss_fastfir_cfg;
42 kiss_fastfir_cfg kiss_fastfir_alloc(const kffsamp_t * imp_resp,size_t n_imp_resp,
43 size_t * nfft,void * mem,size_t*lenmem);
45 /* see do_file_filter for usage */
46 size_t kiss_fastfir( kiss_fastfir_cfg cfg, kffsamp_t * inbuf, kffsamp_t * outbuf, size_t n, size_t *offset);
53 struct kiss_fastfir_state{
58 kiss_fft_cpx * fir_freq_resp;
59 kiss_fft_cpx * freqbuf;
65 kiss_fastfir_cfg kiss_fastfir_alloc(
66 const kffsamp_t * imp_resp,size_t n_imp_resp,
67 size_t *pnfft, /* if <= 0, an appropriate size will be chosen */
68 void * mem,size_t*lenmem)
70 kiss_fastfir_cfg st = NULL;
71 size_t len_fftcfg,len_ifftcfg;
72 size_t memneeded = sizeof(struct kiss_fastfir_state);
82 /* determine fft size as next power of two at least 2x
83 the impulse response length*/
90 if ( nfft < MIN_FFT_LEN )
98 n_freq_bins = nfft/2 + 1;
103 FFT_ALLOC (nfft, 0, NULL, &len_fftcfg);
104 memneeded += len_fftcfg;
106 FFT_ALLOC (nfft, 1, NULL, &len_ifftcfg);
107 memneeded += len_ifftcfg;
109 memneeded += sizeof(kffsamp_t) * nfft;
111 memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
113 memneeded += sizeof(kiss_fft_cpx) * n_freq_bins;
115 if (lenmem == NULL) {
116 st = (kiss_fastfir_cfg) malloc (memneeded);
118 if (*lenmem >= memneeded)
119 st = (kiss_fastfir_cfg) mem;
126 st->ngood = nfft - n_imp_resp + 1;
127 st->n_freq_bins = n_freq_bins;
130 st->fftcfg = (kfcfg_t)ptr;
133 st->ifftcfg = (kfcfg_t)ptr;
136 st->tmpbuf = (kffsamp_t*)ptr;
137 ptr += sizeof(kffsamp_t) * nfft;
139 st->freqbuf = (kiss_fft_cpx*)ptr;
140 ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
142 st->fir_freq_resp = (kiss_fft_cpx*)ptr;
143 ptr += sizeof(kiss_fft_cpx) * n_freq_bins;
145 FFT_ALLOC (nfft,0,st->fftcfg , &len_fftcfg);
146 FFT_ALLOC (nfft,1,st->ifftcfg , &len_ifftcfg);
148 memset(st->tmpbuf,0,sizeof(kffsamp_t)*nfft);
149 /*zero pad in the middle to left-rotate the impulse response
150 This puts the scrap samples at the end of the inverse fft'd buffer */
151 st->tmpbuf[0] = imp_resp[ n_imp_resp - 1 ];
152 for (i=0;i<n_imp_resp - 1; ++i) {
153 st->tmpbuf[ nfft - n_imp_resp + 1 + i ] = imp_resp[ i ];
156 FFTFWD(st->fftcfg,st->tmpbuf,st->fir_freq_resp);
158 /* TODO: this won't work for fixed point */
159 scale = 1.0 / st->nfft;
161 for ( i=0; i < st->n_freq_bins; ++i ) {
163 st->fir_freq_resp[i].r *= _mm_set1_ps(scale);
164 st->fir_freq_resp[i].i *= _mm_set1_ps(scale);
166 st->fir_freq_resp[i].r *= scale;
167 st->fir_freq_resp[i].i *= scale;
173 static void fastconv1buf(const kiss_fastfir_cfg st,const kffsamp_t * in,kffsamp_t * out)
176 /* multiply the frequency response of the input signal by
177 that of the fir filter*/
178 FFTFWD( st->fftcfg, in , st->freqbuf );
179 for ( i=0; i<st->n_freq_bins; ++i ) {
180 kiss_fft_cpx tmpsamp;
181 C_MUL(tmpsamp,st->freqbuf[i],st->fir_freq_resp[i]);
182 st->freqbuf[i] = tmpsamp;
185 /* perform the inverse fft*/
186 FFTINV(st->ifftcfg,st->freqbuf,out);
189 /* n : the size of inbuf and outbuf in samples
190 return value: the number of samples completely processed
191 n-retval samples should be copied to the front of the next input buffer */
192 static size_t kff_nocopy(
194 const kffsamp_t * inbuf,
199 while (n >= st->nfft ) {
200 fastconv1buf(st,inbuf,outbuf);
209 size_t kff_flush(kiss_fastfir_cfg st,const kffsamp_t * inbuf,kffsamp_t * outbuf,size_t n)
213 ntmp = kff_nocopy(st,inbuf,outbuf,n);
219 memset(st->tmpbuf,0,sizeof(kffsamp_t)*st->nfft );
220 memcpy(st->tmpbuf,inbuf,sizeof(kffsamp_t)*n );
222 fastconv1buf(st,st->tmpbuf,st->tmpbuf);
224 memcpy(outbuf,st->tmpbuf,sizeof(kffsamp_t)*( st->ngood - zpad ));
225 return ntmp + st->ngood - zpad;
229 kiss_fastfir_cfg vst,
235 size_t ntot = n_new + *offset;
237 return kff_flush(vst,inbuf,outbuf,ntot);
239 size_t nwritten = kff_nocopy(vst,inbuf,outbuf,ntot);
240 *offset = ntot - nwritten;
241 /*save the unused or underused samples at the front of the input buffer */
242 memcpy( inbuf , inbuf+nwritten , *offset * sizeof(kffsamp_t) );
247 #ifdef FAST_FILT_UTIL
249 #include <sys/types.h>
250 #include <sys/mman.h>
254 void direct_file_filter(
257 const kffsamp_t * imp_resp,
260 size_t nlag = n_imp_resp - 1;
262 const kffsamp_t *tmph;
263 kffsamp_t *buf, *circbuf;
267 size_t oldestlag = 0;
274 buf = (kffsamp_t *) malloc ( sizeof (kffsamp_t) * nbuf);
275 circbuf = (kffsamp_t *) malloc (sizeof (kffsamp_t) * nlag);
276 if (!circbuf || !buf) {
277 perror("circbuf allocation");
281 if ( fread (circbuf, sizeof (kffsamp_t), nlag, fin) != nlag ) {
282 perror ("insufficient data to overcome transient");
287 nread = fread (buf, sizeof (kffsamp_t), nbuf, fin);
291 for (k = 0; k < nread; ++k) {
292 tmph = imp_resp+nlag;
295 outval = _mm_set1_ps(0);
299 for (tap = oldestlag; tap < nlag; ++tap)
300 outval += circbuf[tap] * *tmph--;
301 for (tap = 0; tap < oldestlag; ++tap)
302 outval += circbuf[tap] * *tmph--;
303 outval += buf[k] * *tmph;
306 outval.r = outval.i = _mm_set1_ps(0);
308 outval.r = outval.i = 0;
310 for (tap = oldestlag; tap < nlag; ++tap){
311 C_MUL(tmp,circbuf[tap],*tmph);
316 for (tap = 0; tap < oldestlag; ++tap) {
317 C_MUL(tmp,circbuf[tap],*tmph);
321 C_MUL(tmp,buf[k],*tmph);
325 circbuf[oldestlag++] = buf[k];
328 if (oldestlag == nlag)
332 if (fwrite (buf, sizeof (buf[0]), nread, fout) != nread) {
333 perror ("short write");
345 const kffsamp_t * imp_resp,
352 kiss_fastfir_cfg cfg;
353 kffsamp_t *inbuf,*outbuf;
357 fdout = fileno(fout);
359 cfg=kiss_fastfir_alloc(imp_resp,n_imp_resp,&nfft,0,0);
361 /* use length to minimize buffer shift*/
362 n_samps_buf = 8*4096/sizeof(kffsamp_t);
363 n_samps_buf = nfft + 4*(nfft-n_imp_resp+1);
365 if (verbose) fprintf(stderr,"bufsize=%d\n",sizeof(kffsamp_t)*n_samps_buf );
368 /*allocate space and initialize pointers */
369 inbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
370 outbuf = (kffsamp_t*)malloc(sizeof(kffsamp_t)*n_samps_buf);
374 /* start reading at inbuf[idx_inbuf] */
375 nread = fread( inbuf + idx_inbuf, sizeof(kffsamp_t), n_samps_buf - idx_inbuf,fin );
377 /* If nread==0, then this is a flush.
378 The total number of samples in input is idx_inbuf + nread . */
379 nwrite = kiss_fastfir(cfg, inbuf, outbuf,nread,&idx_inbuf) * sizeof(kffsamp_t);
380 /* kiss_fastfir moved any unused samples to the front of inbuf and updated idx_inbuf */
382 if ( write(fdout, outbuf, nwrite) != nwrite ) {
383 perror("short write");
392 int main(int argc,char**argv)
401 int c=getopt(argc,argv,"n:h:i:o:vd");
411 fin = fopen(optarg,"rb");
418 fout = fopen(optarg,"w+b");
425 filtfile = fopen(optarg,"rb");
426 if (filtfile==NULL) {
435 fprintf(stderr,"usage options:\n"
436 "\t-n nfft: fft size to use\n"
437 "\t-d : use direct FIR filtering, not fast convolution\n"
438 "\t-i filename: input file\n"
439 "\t-o filename: output(filtered) file\n"
440 "\t-n nfft: fft size to use\n"
441 "\t-h filename: impulse response\n");
443 default:fprintf(stderr,"bad %c\n",c);break;
446 if (filtfile==NULL) {
447 fprintf(stderr,"You must supply the FIR coeffs via -h\n");
450 fseek(filtfile,0,SEEK_END);
451 nh = ftell(filtfile) / sizeof(kffsamp_t);
452 if (verbose) fprintf(stderr,"%d samples in FIR filter\n",nh);
453 h = (kffsamp_t*)malloc(sizeof(kffsamp_t)*nh);
454 fseek(filtfile,0,SEEK_SET);
455 fread(h,sizeof(kffsamp_t),nh,filtfile);
459 direct_file_filter( fin, fout, h,nh);
461 do_file_filter( fin, fout, h,nh,nfft);
463 if (fout!=stdout) fclose(fout);
464 if (fin!=stdin) fclose(fin);