2 * FFT transform, optimized with VSX built-in functions
3 * Copyright (c) 2014 Rong Yan
5 * This algorithm (though not any of the implementation details) is
6 * based on libdjbfft by D. J. Bernstein.
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/cpu.h"
28 #include "libavutil/ppc/util_altivec.h"
29 #include "libavcodec/fft.h"
30 #include "libavcodec/fft-internal.h"
35 static void fft32_vsx_interleave(FFTComplex *z)
37 fft16_vsx_interleave(z);
38 fft8_vsx_interleave(z+16);
39 fft8_vsx_interleave(z+24);
40 pass_vsx_interleave(z,ff_cos_32,4);
43 static void fft64_vsx_interleave(FFTComplex *z)
45 fft32_vsx_interleave(z);
46 fft16_vsx_interleave(z+32);
47 fft16_vsx_interleave(z+48);
48 pass_vsx_interleave(z,ff_cos_64, 8);
50 static void fft128_vsx_interleave(FFTComplex *z)
52 fft64_vsx_interleave(z);
53 fft32_vsx_interleave(z+64);
54 fft32_vsx_interleave(z+96);
55 pass_vsx_interleave(z,ff_cos_128,16);
57 static void fft256_vsx_interleave(FFTComplex *z)
59 fft128_vsx_interleave(z);
60 fft64_vsx_interleave(z+128);
61 fft64_vsx_interleave(z+192);
62 pass_vsx_interleave(z,ff_cos_256,32);
64 static void fft512_vsx_interleave(FFTComplex *z)
66 fft256_vsx_interleave(z);
67 fft128_vsx_interleave(z+256);
68 fft128_vsx_interleave(z+384);
69 pass_vsx_interleave(z,ff_cos_512,64);
71 static void fft1024_vsx_interleave(FFTComplex *z)
73 fft512_vsx_interleave(z);
74 fft256_vsx_interleave(z+512);
75 fft256_vsx_interleave(z+768);
76 pass_vsx_interleave(z,ff_cos_1024,128);
79 static void fft2048_vsx_interleave(FFTComplex *z)
81 fft1024_vsx_interleave(z);
82 fft512_vsx_interleave(z+1024);
83 fft512_vsx_interleave(z+1536);
84 pass_vsx_interleave(z,ff_cos_2048,256);
86 static void fft4096_vsx_interleave(FFTComplex *z)
88 fft2048_vsx_interleave(z);
89 fft1024_vsx_interleave(z+2048);
90 fft1024_vsx_interleave(z+3072);
91 pass_vsx_interleave(z,ff_cos_4096, 512);
93 static void fft8192_vsx_interleave(FFTComplex *z)
95 fft4096_vsx_interleave(z);
96 fft2048_vsx_interleave(z+4096);
97 fft2048_vsx_interleave(z+6144);
98 pass_vsx_interleave(z,ff_cos_8192,1024);
100 static void fft16384_vsx_interleave(FFTComplex *z)
102 fft8192_vsx_interleave(z);
103 fft4096_vsx_interleave(z+8192);
104 fft4096_vsx_interleave(z+12288);
105 pass_vsx_interleave(z,ff_cos_16384,2048);
107 static void fft32768_vsx_interleave(FFTComplex *z)
109 fft16384_vsx_interleave(z);
110 fft8192_vsx_interleave(z+16384);
111 fft8192_vsx_interleave(z+24576);
112 pass_vsx_interleave(z,ff_cos_32768,4096);
114 static void fft65536_vsx_interleave(FFTComplex *z)
116 fft32768_vsx_interleave(z);
117 fft16384_vsx_interleave(z+32768);
118 fft16384_vsx_interleave(z+49152);
119 pass_vsx_interleave(z,ff_cos_65536,8192);
122 static void fft32_vsx(FFTComplex *z)
127 pass_vsx(z,ff_cos_32,4);
130 static void fft64_vsx(FFTComplex *z)
135 pass_vsx(z,ff_cos_64, 8);
137 static void fft128_vsx(FFTComplex *z)
142 pass_vsx(z,ff_cos_128,16);
144 static void fft256_vsx(FFTComplex *z)
149 pass_vsx(z,ff_cos_256,32);
151 static void fft512_vsx(FFTComplex *z)
156 pass_vsx(z,ff_cos_512,64);
158 static void fft1024_vsx(FFTComplex *z)
163 pass_vsx(z,ff_cos_1024,128);
166 static void fft2048_vsx(FFTComplex *z)
171 pass_vsx(z,ff_cos_2048,256);
173 static void fft4096_vsx(FFTComplex *z)
178 pass_vsx(z,ff_cos_4096, 512);
180 static void fft8192_vsx(FFTComplex *z)
185 pass_vsx(z,ff_cos_8192,1024);
187 static void fft16384_vsx(FFTComplex *z)
191 fft4096_vsx(z+12288);
192 pass_vsx(z,ff_cos_16384,2048);
194 static void fft32768_vsx(FFTComplex *z)
197 fft8192_vsx(z+16384);
198 fft8192_vsx(z+24576);
199 pass_vsx(z,ff_cos_32768,4096);
201 static void fft65536_vsx(FFTComplex *z)
204 fft16384_vsx(z+32768);
205 fft16384_vsx(z+49152);
206 pass_vsx(z,ff_cos_65536,8192);
209 static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
210 fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
211 fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
213 static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
214 fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
215 fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
216 fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
218 void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
220 fft_dispatch_vsx_interleave[s->nbits-2](z);
222 void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
224 fft_dispatch_vsx[s->nbits-2](z);
226 #endif /* HAVE_VSX */