]> git.sesse.net Git - ffmpeg/blob - libavcodec/ppc/fft_vsx.c
Merge commit '2a9e1c122eed66be1b26b747342b848300b226c7'
[ffmpeg] / libavcodec / ppc / fft_vsx.c
1 /*
2  * FFT  transform, optimized with VSX built-in functions
3  * Copyright (c) 2014 Rong Yan
4  *
5  * This algorithm (though not any of the implementation details) is
6  * based on libdjbfft by D. J. Bernstein.
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25
26 #include "config.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/ppc/util_altivec.h"
29 #include "libavcodec/fft.h"
30 #include "libavcodec/fft-internal.h"
31 #include "fft_vsx.h"
32
33 #if HAVE_VSX
34
35 static void fft32_vsx_interleave(FFTComplex *z)
36 {
37     fft16_vsx_interleave(z);
38     fft8_vsx_interleave(z+16);
39     fft8_vsx_interleave(z+24);
40     pass_vsx_interleave(z,ff_cos_32,4);
41 }
42
43 static void fft64_vsx_interleave(FFTComplex *z)
44 {
45     fft32_vsx_interleave(z);
46     fft16_vsx_interleave(z+32);
47     fft16_vsx_interleave(z+48);
48     pass_vsx_interleave(z,ff_cos_64, 8);
49 }
50 static void fft128_vsx_interleave(FFTComplex *z)
51 {
52     fft64_vsx_interleave(z);
53     fft32_vsx_interleave(z+64);
54     fft32_vsx_interleave(z+96);
55     pass_vsx_interleave(z,ff_cos_128,16);
56 }
57 static void fft256_vsx_interleave(FFTComplex *z)
58 {
59     fft128_vsx_interleave(z);
60     fft64_vsx_interleave(z+128);
61     fft64_vsx_interleave(z+192);
62     pass_vsx_interleave(z,ff_cos_256,32);
63 }
64 static void fft512_vsx_interleave(FFTComplex *z)
65 {
66     fft256_vsx_interleave(z);
67     fft128_vsx_interleave(z+256);
68     fft128_vsx_interleave(z+384);
69     pass_vsx_interleave(z,ff_cos_512,64);
70 }
71 static void fft1024_vsx_interleave(FFTComplex *z)
72 {
73     fft512_vsx_interleave(z);
74     fft256_vsx_interleave(z+512);
75     fft256_vsx_interleave(z+768);
76     pass_vsx_interleave(z,ff_cos_1024,128);
77
78 }
79 static void fft2048_vsx_interleave(FFTComplex *z)
80 {
81     fft1024_vsx_interleave(z);
82     fft512_vsx_interleave(z+1024);
83     fft512_vsx_interleave(z+1536);
84     pass_vsx_interleave(z,ff_cos_2048,256);
85 }
86 static void fft4096_vsx_interleave(FFTComplex *z)
87 {
88     fft2048_vsx_interleave(z);
89     fft1024_vsx_interleave(z+2048);
90     fft1024_vsx_interleave(z+3072);
91     pass_vsx_interleave(z,ff_cos_4096, 512);
92 }
93 static void fft8192_vsx_interleave(FFTComplex *z)
94 {
95     fft4096_vsx_interleave(z);
96     fft2048_vsx_interleave(z+4096);
97     fft2048_vsx_interleave(z+6144);
98     pass_vsx_interleave(z,ff_cos_8192,1024);
99 }
100 static void fft16384_vsx_interleave(FFTComplex *z)
101 {
102     fft8192_vsx_interleave(z);
103     fft4096_vsx_interleave(z+8192);
104     fft4096_vsx_interleave(z+12288);
105     pass_vsx_interleave(z,ff_cos_16384,2048);
106 }
107 static void fft32768_vsx_interleave(FFTComplex *z)
108 {
109     fft16384_vsx_interleave(z);
110     fft8192_vsx_interleave(z+16384);
111     fft8192_vsx_interleave(z+24576);
112     pass_vsx_interleave(z,ff_cos_32768,4096);
113 }
114 static void fft65536_vsx_interleave(FFTComplex *z)
115 {
116     fft32768_vsx_interleave(z);
117     fft16384_vsx_interleave(z+32768);
118     fft16384_vsx_interleave(z+49152);
119     pass_vsx_interleave(z,ff_cos_65536,8192);
120 }
121
122 static void fft32_vsx(FFTComplex *z)
123 {
124     fft16_vsx(z);
125     fft8_vsx(z+16);
126     fft8_vsx(z+24);
127     pass_vsx(z,ff_cos_32,4);
128 }
129
130 static void fft64_vsx(FFTComplex *z)
131 {
132     fft32_vsx(z);
133     fft16_vsx(z+32);
134     fft16_vsx(z+48);
135     pass_vsx(z,ff_cos_64, 8);
136 }
137 static void fft128_vsx(FFTComplex *z)
138 {
139     fft64_vsx(z);
140     fft32_vsx(z+64);
141     fft32_vsx(z+96);
142     pass_vsx(z,ff_cos_128,16);
143 }
144 static void fft256_vsx(FFTComplex *z)
145 {
146     fft128_vsx(z);
147     fft64_vsx(z+128);
148     fft64_vsx(z+192);
149     pass_vsx(z,ff_cos_256,32);
150 }
151 static void fft512_vsx(FFTComplex *z)
152 {
153     fft256_vsx(z);
154     fft128_vsx(z+256);
155     fft128_vsx(z+384);
156     pass_vsx(z,ff_cos_512,64);
157 }
158 static void fft1024_vsx(FFTComplex *z)
159 {
160     fft512_vsx(z);
161     fft256_vsx(z+512);
162     fft256_vsx(z+768);
163     pass_vsx(z,ff_cos_1024,128);
164
165 }
166 static void fft2048_vsx(FFTComplex *z)
167 {
168     fft1024_vsx(z);
169     fft512_vsx(z+1024);
170     fft512_vsx(z+1536);
171     pass_vsx(z,ff_cos_2048,256);
172 }
173 static void fft4096_vsx(FFTComplex *z)
174 {
175     fft2048_vsx(z);
176     fft1024_vsx(z+2048);
177     fft1024_vsx(z+3072);
178     pass_vsx(z,ff_cos_4096, 512);
179 }
180 static void fft8192_vsx(FFTComplex *z)
181 {
182     fft4096_vsx(z);
183     fft2048_vsx(z+4096);
184     fft2048_vsx(z+6144);
185     pass_vsx(z,ff_cos_8192,1024);
186 }
187 static void fft16384_vsx(FFTComplex *z)
188 {
189     fft8192_vsx(z);
190     fft4096_vsx(z+8192);
191     fft4096_vsx(z+12288);
192     pass_vsx(z,ff_cos_16384,2048);
193 }
194 static void fft32768_vsx(FFTComplex *z)
195 {
196     fft16384_vsx(z);
197     fft8192_vsx(z+16384);
198     fft8192_vsx(z+24576);
199     pass_vsx(z,ff_cos_32768,4096);
200 }
201 static void fft65536_vsx(FFTComplex *z)
202 {
203     fft32768_vsx(z);
204     fft16384_vsx(z+32768);
205     fft16384_vsx(z+49152);
206     pass_vsx(z,ff_cos_65536,8192);
207 }
208
209 static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
210     fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
211     fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
212 };
213 static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
214     fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
215     fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
216     fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
217 };
218 void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
219 {
220      fft_dispatch_vsx_interleave[s->nbits-2](z);
221 }
222 void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
223 {
224      fft_dispatch_vsx[s->nbits-2](z);
225 }
226 #endif /* HAVE_VSX */