X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Ffft.c;h=7ef5d37c731f3fa5724b9cc0618fa7e5127a560b;hb=99e3913d39787f7a88ca70a9b0504d586a20b3fe;hp=65eb575f36c64b0f443586b29b8b0474ae0561e8;hpb=db40a39aba6a22729279ac8915b52b182473f209;p=ffmpeg

diff --git a/libavcodec/fft.c b/libavcodec/fft.c
index 65eb575f36c..7ef5d37c731 100644
--- a/libavcodec/fft.c
+++ b/libavcodec/fft.c
@@ -2,31 +2,41 @@
  * FFT/IFFT transforms
  * Copyright (c) 2002 Fabrice Bellard.
  *
- * This library is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
  *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file fft.c
+ * FFT/IFFT transforms.
  */
+
 #include "dsputil.h"
 
 /**
  * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
- * done 
+ * done
  */
-int fft_init(FFTContext *s, int nbits, int inverse)
+int ff_fft_init(FFTContext *s, int nbits, int inverse)
 {
     int i, j, m, n;
     float alpha, c1, s1, s2;
-    
+    int shuffle = 0;
+    int av_unused has_vectors;
+
     s->nbits = nbits;
     n = 1 << nbits;
 
@@ -39,7 +49,7 @@ int fft_init(FFTContext *s, int nbits, int inverse)
     s->inverse = inverse;
 
     s2 = inverse ? 1.0 : -1.0;
-        
+
     for(i=0;i<(n/2);i++) {
         alpha = 2 * M_PI * (float)i / (float)n;
         c1 = cos(alpha);
@@ -47,54 +57,63 @@ int fft_init(FFTContext *s, int nbits, int inverse)
         s->exptab[i].re = c1;
         s->exptab[i].im = s1;
     }
-    s->fft_calc = fft_calc_c;
+    s->fft_calc = ff_fft_calc_c;
+    s->imdct_calc = ff_imdct_calc;
     s->exptab1 = NULL;
 
+#ifdef HAVE_MMX
+    has_vectors = mm_support();
+    shuffle = 1;
+    if (has_vectors & MM_3DNOWEXT) {
+        /* 3DNowEx for K7/K8 */
+        s->imdct_calc = ff_imdct_calc_3dn2;
+        s->fft_calc = ff_fft_calc_3dn2;
+    } else if (has_vectors & MM_3DNOW) {
+        /* 3DNow! for K6-2/3 */
+        s->fft_calc = ff_fft_calc_3dn;
+    } else if (has_vectors & MM_SSE) {
+        /* SSE for P3/P4 */
+        s->imdct_calc = ff_imdct_calc_sse;
+        s->fft_calc = ff_fft_calc_sse;
+    } else {
+        shuffle = 0;
+    }
+#elif defined HAVE_ALTIVEC && !defined ALTIVEC_USE_REFERENCE_C_CODE
+    has_vectors = mm_support();
+    if (has_vectors & MM_ALTIVEC) {
+        s->fft_calc = ff_fft_calc_altivec;
+        shuffle = 1;
+    }
+#endif
+
     /* compute constant table for HAVE_SSE version */
-#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC)
-    {
-        int has_vectors = 0;
+    if (shuffle) {
+        int np, nblocks, np2, l;
+        FFTComplex *q;
 
-#if defined(HAVE_MMX)
-        has_vectors = mm_support() & MM_SSE;
-#endif
-#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)
-        has_vectors = mm_support() & MM_ALTIVEC;
-#endif
-        if (has_vectors) {
-            int np, nblocks, np2, l;
-            FFTComplex *q;
-            
-            np = 1 << nbits;
-            nblocks = np >> 3;
-            np2 = np >> 1;
-            s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
-            if (!s->exptab1)
-                goto fail;
-            q = s->exptab1;
-            do {
-                for(l = 0; l < np2; l += 2 * nblocks) {
-                    *q++ = s->exptab[l];
-                    *q++ = s->exptab[l + nblocks];
-
-                    q->re = -s->exptab[l].im;
-                    q->im = s->exptab[l].re;
-                    q++;
-                    q->re = -s->exptab[l + nblocks].im;
-                    q->im = s->exptab[l + nblocks].re;
-                    q++;
-                }
-                nblocks = nblocks >> 1;
-            } while (nblocks != 0);
-            av_freep(&s->exptab);
-#if defined(HAVE_MMX)
-            s->fft_calc = fft_calc_sse;
-#else
-            s->fft_calc = fft_calc_altivec;
-#endif
-        }
+        np = 1 << nbits;
+        nblocks = np >> 3;
+        np2 = np >> 1;
+        s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
+        if (!s->exptab1)
+            goto fail;
+        q = s->exptab1;
+        do {
+            for(l = 0; l < np2; l += 2 * nblocks) {
+                *q++ = s->exptab[l];
+                *q++ = s->exptab[l + nblocks];
+
+                q->re = -s->exptab[l].im;
+                q->im = s->exptab[l].re;
+                q++;
+                q->re = -s->exptab[l + nblocks].im;
+                q->im = s->exptab[l + nblocks].re;
+                q++;
+            }
+            nblocks = nblocks >> 1;
+        } while (nblocks != 0);
+        av_freep(&s->exptab);
     }
-#endif
 
     /* compute bit reverse table */
 
@@ -136,15 +155,15 @@ int fft_init(FFTContext *s, int nbits, int inverse)
 }
 
 /**
- * Do a complex FFT with the parameters defined in fft_init(). The
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
  * input data must be permuted before with s->revtab table. No
- * 1.0/sqrt(n) normalization is done.  
+ * 1.0/sqrt(n) normalization is done.
  */
-void fft_calc_c(FFTContext *s, FFTComplex *z)
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
 {
     int ln = s->nbits;
-    int	j, np, np2;
-    int	nblocks, nloops;
+    int j, np, np2;
+    int nblocks, nloops;
     register FFTComplex *p, *q;
     FFTComplex *exptab = s->exptab;
     int l;
@@ -157,29 +176,29 @@ void fft_calc_c(FFTContext *s, FFTComplex *z)
     p=&z[0];
     j=(np >> 1);
     do {
-        BF(p[0].re, p[0].im, p[1].re, p[1].im, 
+        BF(p[0].re, p[0].im, p[1].re, p[1].im,
            p[0].re, p[0].im, p[1].re, p[1].im);
         p+=2;
     } while (--j != 0);
 
     /* pass 1 */
 
-    
+
     p=&z[0];
     j=np >> 2;
     if (s->inverse) {
         do {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
                p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
                p[1].re, p[1].im, -p[3].im, p[3].re);
             p+=4;
         } while (--j != 0);
     } else {
         do {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
                p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
                p[1].re, p[1].im, p[3].im, -p[3].re);
             p+=4;
         } while (--j != 0);
@@ -195,7 +214,7 @@ void fft_calc_c(FFTContext *s, FFTComplex *z)
         for (j = 0; j < nblocks; ++j) {
             BF(p->re, p->im, q->re, q->im,
                p->re, p->im, q->re, q->im);
-            
+
             p++;
             q++;
             for(l = nblocks; l < np2; l += nblocks) {
@@ -215,14 +234,14 @@ void fft_calc_c(FFTContext *s, FFTComplex *z)
 }
 
 /**
- * Do the permutation needed BEFORE calling fft_calc()
+ * Do the permutation needed BEFORE calling ff_fft_calc()
  */
-void fft_permute(FFTContext *s, FFTComplex *z)
+void ff_fft_permute(FFTContext *s, FFTComplex *z)
 {
     int j, k, np;
     FFTComplex tmp;
     const uint16_t *revtab = s->revtab;
-    
+
     /* reverse */
     np = 1 << s->nbits;
     for(j=0;j<np;j++) {
@@ -235,7 +254,7 @@ void fft_permute(FFTContext *s, FFTComplex *z)
     }
 }
 
-void fft_end(FFTContext *s)
+void ff_fft_end(FFTContext *s)
 {
     av_freep(&s->revtab);
     av_freep(&s->exptab);