git.sesse.net Git - ffmpeg/blob - libavcodec/fft.c

   1 /*
   2  * FFT/IFFT transforms
   3  * Copyright (c) 2002 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  */
  19 #include "dsputil.h"
  20
  21 /**
  22  * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
  23  * done
  24  */
  25 int fft_init(FFTContext *s, int nbits, int inverse)
  26 {
  27     int i, j, m, n;
  28     float alpha, c1, s1, s2;
  29
  30     s->nbits = nbits;
  31     n = 1 << nbits;
  32
  33     s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
  34     if (!s->exptab)
  35         goto fail;
  36     s->revtab = av_malloc(n * sizeof(uint16_t));
  37     if (!s->revtab)
  38         goto fail;
  39     s->inverse = inverse;
  40
  41     s2 = inverse ? 1.0 : -1.0;
  42
  43     for(i=0;i<(n/2);i++) {
  44         alpha = 2 * M_PI * (float)i / (float)n;
  45         c1 = cos(alpha);
  46         s1 = sin(alpha) * s2;
  47         s->exptab[i].re = c1;
  48         s->exptab[i].im = s1;
  49     }
  50     s->fft_calc = fft_calc_c;
  51     s->exptab1 = NULL;
  52
  53     /* compute constant table for HAVE_SSE version */
  54 #if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC)
  55     {
  56         int has_vectors;
  57
  58 #if defined(HAVE_MMX)
  59         has_vectors = mm_support() & MM_SSE;
  60 #endif
  61 #if defined(HAVE_ALTIVEC)
  62         has_vectors = mm_support() & MM_ALTIVEC;
  63 #endif
  64         if (has_vectors) {
  65             int np, nblocks, np2, l;
  66             FFTComplex *q;
  67
  68             np = 1 << nbits;
  69             nblocks = np >> 3;
  70             np2 = np >> 1;
  71             s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
  72             if (!s->exptab1)
  73                 goto fail;
  74             q = s->exptab1;
  75             do {
  76                 for(l = 0; l < np2; l += 2 * nblocks) {
  77                     *q++ = s->exptab[l];
  78                     *q++ = s->exptab[l + nblocks];
  79
  80                     q->re = -s->exptab[l].im;
  81                     q->im = s->exptab[l].re;
  82                     q++;
  83                     q->re = -s->exptab[l + nblocks].im;
  84                     q->im = s->exptab[l + nblocks].re;
  85                     q++;
  86                 }
  87                 nblocks = nblocks >> 1;
  88             } while (nblocks != 0);
  89             av_freep(&s->exptab);
  90 #if defined(HAVE_MMX)
  91             s->fft_calc = fft_calc_sse;
  92 #else
  93             s->fft_calc = fft_calc_altivec;
  94 #endif
  95         }
  96     }
  97 #endif
  98
  99     /* compute bit reverse table */
 100
 101     for(i=0;i<n;i++) {
 102         m=0;
 103         for(j=0;j<nbits;j++) {
 104             m |= ((i >> j) & 1) << (nbits-j-1);
 105         }
 106         s->revtab[i]=m;
 107     }
 108     return 0;
 109  fail:
 110     av_freep(&s->revtab);
 111     av_freep(&s->exptab);
 112     av_freep(&s->exptab1);
 113     return -1;
 114 }
 115
 116 /* butter fly op */
 117 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
 118 {\
 119   FFTSample ax, ay, bx, by;\
 120   bx=pre1;\
 121   by=pim1;\
 122   ax=qre1;\
 123   ay=qim1;\
 124   pre = (bx + ax);\
 125   pim = (by + ay);\
 126   qre = (bx - ax);\
 127   qim = (by - ay);\
 128 }
 129
 130 #define MUL16(a,b) ((a) * (b))
 131
 132 #define CMUL(pre, pim, are, aim, bre, bim) \
 133 {\
 134    pre = (MUL16(are, bre) - MUL16(aim, bim));\
 135    pim = (MUL16(are, bim) + MUL16(bre, aim));\
 136 }
 137
 138 /**
 139  * Do a complex FFT with the parameters defined in fft_init(). The
 140  * input data must be permuted before with s->revtab table. No
 141  * 1.0/sqrt(n) normalization is done.
 142  */
 143 void fft_calc_c(FFTContext *s, FFTComplex *z)
 144 {
 145     int ln = s->nbits;
 146     int j, np, np2;
 147     int nblocks, nloops;
 148     register FFTComplex *p, *q;
 149     FFTComplex *exptab = s->exptab;
 150     int l;
 151     FFTSample tmp_re, tmp_im;
 152
 153     np = 1 << ln;
 154
 155     /* pass 0 */
 156
 157     p=&z[0];
 158     j=(np >> 1);
 159     do {
 160         BF(p[0].re, p[0].im, p[1].re, p[1].im,
 161            p[0].re, p[0].im, p[1].re, p[1].im);
 162         p+=2;
 163     } while (--j != 0);
 164
 165     /* pass 1 */
 166
 167
 168     p=&z[0];
 169     j=np >> 2;
 170     if (s->inverse) {
 171         do {
 172             BF(p[0].re, p[0].im, p[2].re, p[2].im,
 173                p[0].re, p[0].im, p[2].re, p[2].im);
 174             BF(p[1].re, p[1].im, p[3].re, p[3].im,
 175                p[1].re, p[1].im, -p[3].im, p[3].re);
 176             p+=4;
 177         } while (--j != 0);
 178     } else {
 179         do {
 180             BF(p[0].re, p[0].im, p[2].re, p[2].im,
 181                p[0].re, p[0].im, p[2].re, p[2].im);
 182             BF(p[1].re, p[1].im, p[3].re, p[3].im,
 183                p[1].re, p[1].im, p[3].im, -p[3].re);
 184             p+=4;
 185         } while (--j != 0);
 186     }
 187     /* pass 2 .. ln-1 */
 188
 189     nblocks = np >> 3;
 190     nloops = 1 << 2;
 191     np2 = np >> 1;
 192     do {
 193         p = z;
 194         q = z + nloops;
 195         for (j = 0; j < nblocks; ++j) {
 196             BF(p->re, p->im, q->re, q->im,
 197                p->re, p->im, q->re, q->im);
 198
 199             p++;
 200             q++;
 201             for(l = nblocks; l < np2; l += nblocks) {
 202                 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
 203                 BF(p->re, p->im, q->re, q->im,
 204                    p->re, p->im, tmp_re, tmp_im);
 205                 p++;
 206                 q++;
 207             }
 208
 209             p += nloops;
 210             q += nloops;
 211         }
 212         nblocks = nblocks >> 1;
 213         nloops = nloops << 1;
 214     } while (nblocks != 0);
 215 }
 216
 217 /**
 218  * Do the permutation needed BEFORE calling fft_calc()
 219  */
 220 void fft_permute(FFTContext *s, FFTComplex *z)
 221 {
 222     int j, k, np;
 223     FFTComplex tmp;
 224     const uint16_t *revtab = s->revtab;
 225
 226     /* reverse */
 227     np = 1 << s->nbits;
 228     for(j=0;j<np;j++) {
 229         k = revtab[j];
 230         if (k < j) {
 231             tmp = z[k];
 232             z[k] = z[j];
 233             z[j] = tmp;
 234         }
 235     }
 236 }
 237
 238 void fft_end(FFTContext *s)
 239 {
 240     av_freep(&s->revtab);
 241     av_freep(&s->exptab);
 242     av_freep(&s->exptab1);
 243 }
 244