]> git.sesse.net Git - ffmpeg/blob - libavcodec/ppc/fft_vsx.c
avcodec/vc1: Simplify code setting and using extend_x/y
[ffmpeg] / libavcodec / ppc / fft_vsx.c
1 /*
2  * FFT  transform, optimized with VSX built-in functions
3  * Copyright (c) 2014 Rong Yan
4  *
5  * This algorithm (though not any of the implementation details) is
6  * based on libdjbfft by D. J. Bernstein.
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25
26 #include "config.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/ppc/types_altivec.h"
29 #include "libavutil/ppc/util_altivec.h"
30 #include "libavcodec/fft.h"
31 #include "libavcodec/fft-internal.h"
32 #include "fft_vsx.h"
33
34 #if HAVE_VSX
35
36 static void fft32_vsx_interleave(FFTComplex *z)
37 {
38     fft16_vsx_interleave(z);
39     fft8_vsx_interleave(z+16);
40     fft8_vsx_interleave(z+24);
41     pass_vsx_interleave(z,ff_cos_32,4);
42 }
43
44 static void fft64_vsx_interleave(FFTComplex *z)
45 {
46     fft32_vsx_interleave(z);
47     fft16_vsx_interleave(z+32);
48     fft16_vsx_interleave(z+48);
49     pass_vsx_interleave(z,ff_cos_64, 8);
50 }
51 static void fft128_vsx_interleave(FFTComplex *z)
52 {
53     fft64_vsx_interleave(z);
54     fft32_vsx_interleave(z+64);
55     fft32_vsx_interleave(z+96);
56     pass_vsx_interleave(z,ff_cos_128,16);
57 }
58 static void fft256_vsx_interleave(FFTComplex *z)
59 {
60     fft128_vsx_interleave(z);
61     fft64_vsx_interleave(z+128);
62     fft64_vsx_interleave(z+192);
63     pass_vsx_interleave(z,ff_cos_256,32);
64 }
65 static void fft512_vsx_interleave(FFTComplex *z)
66 {
67     fft256_vsx_interleave(z);
68     fft128_vsx_interleave(z+256);
69     fft128_vsx_interleave(z+384);
70     pass_vsx_interleave(z,ff_cos_512,64);
71 }
72 static void fft1024_vsx_interleave(FFTComplex *z)
73 {
74     fft512_vsx_interleave(z);
75     fft256_vsx_interleave(z+512);
76     fft256_vsx_interleave(z+768);
77     pass_vsx_interleave(z,ff_cos_1024,128);
78
79 }
80 static void fft2048_vsx_interleave(FFTComplex *z)
81 {
82     fft1024_vsx_interleave(z);
83     fft512_vsx_interleave(z+1024);
84     fft512_vsx_interleave(z+1536);
85     pass_vsx_interleave(z,ff_cos_2048,256);
86 }
87 static void fft4096_vsx_interleave(FFTComplex *z)
88 {
89     fft2048_vsx_interleave(z);
90     fft1024_vsx_interleave(z+2048);
91     fft1024_vsx_interleave(z+3072);
92     pass_vsx_interleave(z,ff_cos_4096, 512);
93 }
94 static void fft8192_vsx_interleave(FFTComplex *z)
95 {
96     fft4096_vsx_interleave(z);
97     fft2048_vsx_interleave(z+4096);
98     fft2048_vsx_interleave(z+6144);
99     pass_vsx_interleave(z,ff_cos_8192,1024);
100 }
101 static void fft16384_vsx_interleave(FFTComplex *z)
102 {
103     fft8192_vsx_interleave(z);
104     fft4096_vsx_interleave(z+8192);
105     fft4096_vsx_interleave(z+12288);
106     pass_vsx_interleave(z,ff_cos_16384,2048);
107 }
108 static void fft32768_vsx_interleave(FFTComplex *z)
109 {
110     fft16384_vsx_interleave(z);
111     fft8192_vsx_interleave(z+16384);
112     fft8192_vsx_interleave(z+24576);
113     pass_vsx_interleave(z,ff_cos_32768,4096);
114 }
115 static void fft65536_vsx_interleave(FFTComplex *z)
116 {
117     fft32768_vsx_interleave(z);
118     fft16384_vsx_interleave(z+32768);
119     fft16384_vsx_interleave(z+49152);
120     pass_vsx_interleave(z,ff_cos_65536,8192);
121 }
122
123 static void fft32_vsx(FFTComplex *z)
124 {
125     fft16_vsx(z);
126     fft8_vsx(z+16);
127     fft8_vsx(z+24);
128     pass_vsx(z,ff_cos_32,4);
129 }
130
131 static void fft64_vsx(FFTComplex *z)
132 {
133     fft32_vsx(z);
134     fft16_vsx(z+32);
135     fft16_vsx(z+48);
136     pass_vsx(z,ff_cos_64, 8);
137 }
138 static void fft128_vsx(FFTComplex *z)
139 {
140     fft64_vsx(z);
141     fft32_vsx(z+64);
142     fft32_vsx(z+96);
143     pass_vsx(z,ff_cos_128,16);
144 }
145 static void fft256_vsx(FFTComplex *z)
146 {
147     fft128_vsx(z);
148     fft64_vsx(z+128);
149     fft64_vsx(z+192);
150     pass_vsx(z,ff_cos_256,32);
151 }
152 static void fft512_vsx(FFTComplex *z)
153 {
154     fft256_vsx(z);
155     fft128_vsx(z+256);
156     fft128_vsx(z+384);
157     pass_vsx(z,ff_cos_512,64);
158 }
159 static void fft1024_vsx(FFTComplex *z)
160 {
161     fft512_vsx(z);
162     fft256_vsx(z+512);
163     fft256_vsx(z+768);
164     pass_vsx(z,ff_cos_1024,128);
165
166 }
167 static void fft2048_vsx(FFTComplex *z)
168 {
169     fft1024_vsx(z);
170     fft512_vsx(z+1024);
171     fft512_vsx(z+1536);
172     pass_vsx(z,ff_cos_2048,256);
173 }
174 static void fft4096_vsx(FFTComplex *z)
175 {
176     fft2048_vsx(z);
177     fft1024_vsx(z+2048);
178     fft1024_vsx(z+3072);
179     pass_vsx(z,ff_cos_4096, 512);
180 }
181 static void fft8192_vsx(FFTComplex *z)
182 {
183     fft4096_vsx(z);
184     fft2048_vsx(z+4096);
185     fft2048_vsx(z+6144);
186     pass_vsx(z,ff_cos_8192,1024);
187 }
188 static void fft16384_vsx(FFTComplex *z)
189 {
190     fft8192_vsx(z);
191     fft4096_vsx(z+8192);
192     fft4096_vsx(z+12288);
193     pass_vsx(z,ff_cos_16384,2048);
194 }
195 static void fft32768_vsx(FFTComplex *z)
196 {
197     fft16384_vsx(z);
198     fft8192_vsx(z+16384);
199     fft8192_vsx(z+24576);
200     pass_vsx(z,ff_cos_32768,4096);
201 }
202 static void fft65536_vsx(FFTComplex *z)
203 {
204     fft32768_vsx(z);
205     fft16384_vsx(z+32768);
206     fft16384_vsx(z+49152);
207     pass_vsx(z,ff_cos_65536,8192);
208 }
209
210 static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
211     fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
212     fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
213 };
214 static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
215     fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
216     fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
217     fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
218 };
219 void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
220 {
221      fft_dispatch_vsx_interleave[s->nbits-2](z);
222 }
223 void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
224 {
225      fft_dispatch_vsx[s->nbits-2](z);
226 }
227 #endif /* HAVE_VSX */