1 /*****************************************************************************
3 *****************************************************************************
4 * Copyright (C) 2004 the VideoLAN team
7 * Authors: Cyril Deguet <asmax@videolan.org>
8 * code from projectM http://xmms-projectm.sourceforge.net
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
28 Fast Fourier/Cosine/Sine Transform
30 data length :power of 2
36 cdft: Complex Discrete Fourier Transform
37 rdft: Real Discrete Fourier Transform
38 ddct: Discrete Cosine Transform
39 ddst: Discrete Sine Transform
40 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
41 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
43 void cdft(int, int, double *, int *, double *);
44 void rdft(int, int, double *, int *, double *);
45 void ddct(int, int, double *, int *, double *);
46 void ddst(int, int, double *, int *, double *);
47 void dfct(int, double *, double *, int *, double *);
48 void dfst(int, double *, double *, int *, double *);
50 USE_CDFT_PTHREADS : default=not defined
51 CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
52 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
53 USE_CDFT_WINTHREADS : default=not defined
54 CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
55 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
58 -------- Complex DFT (Discrete Fourier Transform) --------
61 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
63 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
64 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
67 ip[0] = 0; // first time only
68 cdft(2*n, 1, a, ip, w);
70 ip[0] = 0; // first time only
71 cdft(2*n, -1, a, ip, w);
73 2*n :data length (int)
74 n >= 1, n = power of 2
75 a[0...2*n-1] :input/output data (double *)
78 a[2*j+1] = Im(x[j]), 0<=j<n
81 a[2*k+1] = Im(X[k]), 0<=k<n
82 ip[0...*] :work area for bit reversal (int *)
83 length of ip >= 2+sqrt(n)
86 2+(1<<(int)(log(n+0.5)/log(2))/2).
87 ip[0],ip[1] are pointers of the cos/sin table.
88 w[0...n/2-1] :cos/sin table (double *)
89 w[],ip[] are initialized if ip[0] == 0.
92 cdft(2*n, -1, a, ip, w);
94 cdft(2*n, 1, a, ip, w);
95 for (j = 0; j <= 2 * n - 1; j++) {
101 -------- Real DFT / Inverse of Real DFT --------
104 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
105 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
106 <case2> IRDFT (excluding scale)
107 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
108 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
109 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
112 ip[0] = 0; // first time only
113 rdft(n, 1, a, ip, w);
115 ip[0] = 0; // first time only
116 rdft(n, -1, a, ip, w);
119 n >= 2, n = power of 2
120 a[0...n-1] :input/output data (double *)
123 a[2*k] = R[k], 0<=k<n/2
124 a[2*k+1] = I[k], 0<k<n/2
128 a[2*j] = R[j], 0<=j<n/2
129 a[2*j+1] = I[j], 0<j<n/2
131 ip[0...*] :work area for bit reversal (int *)
132 length of ip >= 2+sqrt(n/2)
135 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
136 ip[0],ip[1] are pointers of the cos/sin table.
137 w[0...n/2-1] :cos/sin table (double *)
138 w[],ip[] are initialized if ip[0] == 0.
141 rdft(n, 1, a, ip, w);
143 rdft(n, -1, a, ip, w);
144 for (j = 0; j <= n - 1; j++) {
150 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
152 <case1> IDCT (excluding scale)
153 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
155 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
158 ip[0] = 0; // first time only
159 ddct(n, 1, a, ip, w);
161 ip[0] = 0; // first time only
162 ddct(n, -1, a, ip, w);
165 n >= 2, n = power of 2
166 a[0...n-1] :input/output data (double *)
169 ip[0...*] :work area for bit reversal (int *)
170 length of ip >= 2+sqrt(n/2)
173 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
174 ip[0],ip[1] are pointers of the cos/sin table.
175 w[0...n*5/4-1] :cos/sin table (double *)
176 w[],ip[] are initialized if ip[0] == 0.
179 ddct(n, -1, a, ip, w);
182 ddct(n, 1, a, ip, w);
183 for (j = 0; j <= n - 1; j++) {
189 -------- DST (Discrete Sine Transform) / Inverse of DST --------
191 <case1> IDST (excluding scale)
192 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
194 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
197 ip[0] = 0; // first time only
198 ddst(n, 1, a, ip, w);
200 ip[0] = 0; // first time only
201 ddst(n, -1, a, ip, w);
204 n >= 2, n = power of 2
205 a[0...n-1] :input/output data (double *)
216 ip[0...*] :work area for bit reversal (int *)
217 length of ip >= 2+sqrt(n/2)
220 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
221 ip[0],ip[1] are pointers of the cos/sin table.
222 w[0...n*5/4-1] :cos/sin table (double *)
223 w[],ip[] are initialized if ip[0] == 0.
226 ddst(n, -1, a, ip, w);
229 ddst(n, 1, a, ip, w);
230 for (j = 0; j <= n - 1; j++) {
236 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
238 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
240 ip[0] = 0; // first time only
241 dfct(n, a, t, ip, w);
243 n :data length - 1 (int)
244 n >= 2, n = power of 2
245 a[0...n] :input/output data (double *)
248 t[0...n/2] :work area (double *)
249 ip[0...*] :work area for bit reversal (int *)
250 length of ip >= 2+sqrt(n/4)
253 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
254 ip[0],ip[1] are pointers of the cos/sin table.
255 w[0...n*5/8-1] :cos/sin table (double *)
256 w[],ip[] are initialized if ip[0] == 0.
261 dfct(n, a, t, ip, w);
265 dfct(n, a, t, ip, w);
266 for (j = 0; j <= n; j++) {
272 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
274 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
276 ip[0] = 0; // first time only
277 dfst(n, a, t, ip, w);
279 n :data length + 1 (int)
280 n >= 2, n = power of 2
281 a[0...n-1] :input/output data (double *)
284 (a[0] is used for work area)
285 t[0...n/2-1] :work area (double *)
286 ip[0...*] :work area for bit reversal (int *)
287 length of ip >= 2+sqrt(n/4)
290 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
291 ip[0],ip[1] are pointers of the cos/sin table.
292 w[0...n*5/8-1] :cos/sin table (double *)
293 w[],ip[] are initialized if ip[0] == 0.
296 dfst(n, a, t, ip, w);
298 dfst(n, a, t, ip, w);
299 for (j = 1; j <= n - 1; j++) {
306 The cos/sin table is recalculated when the larger table required.
307 w[] and ip[] are compatible with all routines.
311 void cdft(int n, int isgn, double *a, int *ip, double *w)
313 void makewt(int nw, int *ip, double *w);
314 void cftfsub(int n, double *a, int *ip, int nw, double *w);
315 void cftbsub(int n, double *a, int *ip, int nw, double *w);
324 cftfsub(n, a, ip, nw, w);
326 cftbsub(n, a, ip, nw, w);
331 void rdft(int n, int isgn, double *a, int *ip, double *w)
333 void makewt(int nw, int *ip, double *w);
334 void makect(int nc, int *ip, double *c);
335 void cftfsub(int n, double *a, int *ip, int nw, double *w);
336 void cftbsub(int n, double *a, int *ip, int nw, double *w);
337 void rftfsub(int n, double *a, int nc, double *c);
338 void rftbsub(int n, double *a, int nc, double *c);
350 makect(nc, ip, w + nw);
354 cftfsub(n, a, ip, nw, w);
355 rftfsub(n, a, nc, w + nw);
357 cftfsub(n, a, ip, nw, w);
363 a[1] = 0.5 * (a[0] - a[1]);
366 rftbsub(n, a, nc, w + nw);
367 cftbsub(n, a, ip, nw, w);
369 cftbsub(n, a, ip, nw, w);
375 void ddct(int n, int isgn, double *a, int *ip, double *w)
377 void makewt(int nw, int *ip, double *w);
378 void makect(int nc, int *ip, double *c);
379 void cftfsub(int n, double *a, int *ip, int nw, double *w);
380 void cftbsub(int n, double *a, int *ip, int nw, double *w);
381 void rftfsub(int n, double *a, int nc, double *c);
382 void rftbsub(int n, double *a, int nc, double *c);
383 void dctsub(int n, double *a, int nc, double *c);
395 makect(nc, ip, w + nw);
399 for (j = n - 2; j >= 2; j -= 2) {
400 a[j + 1] = a[j] - a[j - 1];
406 rftbsub(n, a, nc, w + nw);
407 cftbsub(n, a, ip, nw, w);
409 cftbsub(n, a, ip, nw, w);
412 dctsub(n, a, nc, w + nw);
415 cftfsub(n, a, ip, nw, w);
416 rftfsub(n, a, nc, w + nw);
418 cftfsub(n, a, ip, nw, w);
422 for (j = 2; j < n; j += 2) {
423 a[j - 1] = a[j] - a[j + 1];
431 void ddst(int n, int isgn, double *a, int *ip, double *w)
433 void makewt(int nw, int *ip, double *w);
434 void makect(int nc, int *ip, double *c);
435 void cftfsub(int n, double *a, int *ip, int nw, double *w);
436 void cftbsub(int n, double *a, int *ip, int nw, double *w);
437 void rftfsub(int n, double *a, int nc, double *c);
438 void rftbsub(int n, double *a, int nc, double *c);
439 void dstsub(int n, double *a, int nc, double *c);
451 makect(nc, ip, w + nw);
455 for (j = n - 2; j >= 2; j -= 2) {
456 a[j + 1] = -a[j] - a[j - 1];
462 rftbsub(n, a, nc, w + nw);
463 cftbsub(n, a, ip, nw, w);
465 cftbsub(n, a, ip, nw, w);
468 dstsub(n, a, nc, w + nw);
471 cftfsub(n, a, ip, nw, w);
472 rftfsub(n, a, nc, w + nw);
474 cftfsub(n, a, ip, nw, w);
478 for (j = 2; j < n; j += 2) {
479 a[j - 1] = -a[j] - a[j + 1];
487 void dfct(int n, double *a, double *t, int *ip, double *w)
489 void makewt(int nw, int *ip, double *w);
490 void makect(int nc, int *ip, double *c);
491 void cftfsub(int n, double *a, int *ip, int nw, double *w);
492 void rftfsub(int n, double *a, int nc, double *c);
493 void dctsub(int n, double *a, int nc, double *c);
494 int j, k, l, m, mh, nw, nc;
495 double xr, xi, yr, yi;
505 makect(nc, ip, w + nw);
515 for (j = 1; j < mh; j++) {
517 xr = a[j] - a[n - j];
518 xi = a[j] + a[n - j];
519 yr = a[k] - a[n - k];
520 yi = a[k] + a[n - k];
526 t[mh] = a[mh] + a[n - mh];
528 dctsub(m, a, nc, w + nw);
530 cftfsub(m, a, ip, nw, w);
531 rftfsub(m, a, nc, w + nw);
533 cftfsub(m, a, ip, nw, w);
535 a[n - 1] = a[0] - a[1];
537 for (j = m - 2; j >= 2; j -= 2) {
538 a[2 * j + 1] = a[j] + a[j + 1];
539 a[2 * j - 1] = a[j] - a[j + 1];
544 dctsub(m, t, nc, w + nw);
546 cftfsub(m, t, ip, nw, w);
547 rftfsub(m, t, nc, w + nw);
549 cftfsub(m, t, ip, nw, w);
551 a[n - l] = t[0] - t[1];
554 for (j = 2; j < m; j += 2) {
556 a[k - l] = t[j] - t[j + 1];
557 a[k + l] = t[j] + t[j + 1];
561 for (j = 0; j < mh; j++) {
563 t[j] = t[m + k] - t[m + j];
564 t[k] = t[m + k] + t[m + j];
580 void dfst(int n, double *a, double *t, int *ip, double *w)
582 void makewt(int nw, int *ip, double *w);
583 void makect(int nc, int *ip, double *c);
584 void cftfsub(int n, double *a, int *ip, int nw, double *w);
585 void rftfsub(int n, double *a, int nc, double *c);
586 void dstsub(int n, double *a, int nc, double *c);
587 int j, k, l, m, mh, nw, nc;
588 double xr, xi, yr, yi;
598 makect(nc, ip, w + nw);
603 for (j = 1; j < mh; j++) {
605 xr = a[j] + a[n - j];
606 xi = a[j] - a[n - j];
607 yr = a[k] + a[n - k];
608 yi = a[k] - a[n - k];
614 t[0] = a[mh] - a[n - mh];
617 dstsub(m, a, nc, w + nw);
619 cftfsub(m, a, ip, nw, w);
620 rftfsub(m, a, nc, w + nw);
622 cftfsub(m, a, ip, nw, w);
624 a[n - 1] = a[1] - a[0];
626 for (j = m - 2; j >= 2; j -= 2) {
627 a[2 * j + 1] = a[j] - a[j + 1];
628 a[2 * j - 1] = -a[j] - a[j + 1];
633 dstsub(m, t, nc, w + nw);
635 cftfsub(m, t, ip, nw, w);
636 rftfsub(m, t, nc, w + nw);
638 cftfsub(m, t, ip, nw, w);
640 a[n - l] = t[1] - t[0];
643 for (j = 2; j < m; j += 2) {
645 a[k - l] = -t[j] - t[j + 1];
646 a[k + l] = t[j] - t[j + 1];
650 for (j = 1; j < mh; j++) {
652 t[j] = t[m + k] + t[m + j];
653 t[k] = t[m + k] - t[m + j];
664 /* -------- initializing routines -------- */
669 void makewt(int nw, int *ip, double *w)
671 void makeipt(int nw, int *ip);
672 int j, nwh, nw0, nw1;
673 double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
679 delta = atan(1.0) / nwh;
680 wn4r = cos(delta * nwh);
684 w[2] = cos(delta * 2);
685 w[3] = sin(delta * 2);
686 } else if (nwh > 4) {
688 w[2] = 0.5 / cos(delta * 2);
689 w[3] = 0.5 / cos(delta * 6);
690 for (j = 4; j < nwh; j += 4) {
691 w[j] = cos(delta * j);
692 w[j + 1] = sin(delta * j);
693 w[j + 2] = cos(3 * delta * j);
694 w[j + 3] = -sin(3 * delta * j);
708 } else if (nwh > 4) {
711 w[nw1 + 2] = 0.5 / wk1r;
712 w[nw1 + 3] = 0.5 / wk3r;
713 for (j = 4; j < nwh; j += 4) {
714 wk1r = w[nw0 + 2 * j];
715 wk1i = w[nw0 + 2 * j + 1];
716 wk3r = w[nw0 + 2 * j + 2];
717 wk3i = w[nw0 + 2 * j + 3];
719 w[nw1 + j + 1] = wk1i;
720 w[nw1 + j + 2] = wk3r;
721 w[nw1 + j + 3] = wk3i;
730 void makeipt(int nw, int *ip)
732 int j, l, m, m2, p, q;
737 for (l = nw; l > 32; l >>= 2) {
740 for (j = m; j < m2; j++) {
750 void makect(int nc, int *ip, double *c)
758 delta = atan(1.0) / nch;
759 c[0] = cos(delta * nch);
761 for (j = 1; j < nch; j++) {
762 c[j] = 0.5 * cos(delta * j);
763 c[nc - j] = 0.5 * sin(delta * j);
769 /* -------- child routines -------- */
772 #ifdef USE_CDFT_PTHREADS
773 #define USE_CDFT_THREADS
774 #ifndef CDFT_THREADS_BEGIN_N
775 #define CDFT_THREADS_BEGIN_N 8192
777 #ifndef CDFT_4THREADS_BEGIN_N
778 #define CDFT_4THREADS_BEGIN_N 65536
783 #define cdft_thread_t pthread_t
784 #define cdft_thread_create(thp,func,argp) { \
785 if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
786 fprintf(stderr, "cdft thread error\n"); \
790 #define cdft_thread_wait(th) { \
791 if (pthread_join(th, NULL) != 0) { \
792 fprintf(stderr, "cdft thread error\n"); \
796 #endif /* USE_CDFT_PTHREADS */
799 #ifdef USE_CDFT_WINTHREADS
800 #define USE_CDFT_THREADS
801 #ifndef CDFT_THREADS_BEGIN_N
802 #define CDFT_THREADS_BEGIN_N 32768
804 #ifndef CDFT_4THREADS_BEGIN_N
805 #define CDFT_4THREADS_BEGIN_N 524288
810 #define cdft_thread_t HANDLE
811 #define cdft_thread_create(thp,func,argp) { \
813 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
815 fprintf(stderr, "cdft thread error\n"); \
819 #define cdft_thread_wait(th) { \
820 WaitForSingleObject(th, INFINITE); \
823 #endif /* USE_CDFT_WINTHREADS */
826 void cftfsub(int n, double *a, int *ip, int nw, double *w)
828 void bitrv2(int n, int *ip, double *a);
829 void bitrv216(double *a);
830 void bitrv208(double *a);
831 void cftf1st(int n, double *a, double *w);
832 void cftrec4(int n, double *a, int nw, double *w);
833 void cftleaf(int n, int isplt, double *a, int nw, double *w);
834 void cftfx41(int n, double *a, int nw, double *w);
835 void cftf161(double *a, double *w);
836 void cftf081(double *a, double *w);
837 void cftf040(double *a);
838 void cftx020(double *a);
839 #ifdef USE_CDFT_THREADS
840 void cftrec4_th(int n, double *a, int nw, double *w);
841 #endif /* USE_CDFT_THREADS */
845 cftf1st(n, a, &w[nw - (n >> 2)]);
846 #ifdef USE_CDFT_THREADS
847 if (n > CDFT_THREADS_BEGIN_N) {
848 cftrec4_th(n, a, nw, w);
850 #endif /* USE_CDFT_THREADS */
852 cftrec4(n, a, nw, w);
853 } else if (n > 128) {
854 cftleaf(n, 1, a, nw, w);
856 cftfx41(n, a, nw, w);
859 } else if (n == 32) {
860 cftf161(a, &w[nw - 8]);
874 void cftbsub(int n, double *a, int *ip, int nw, double *w)
876 void bitrv2conj(int n, int *ip, double *a);
877 void bitrv216neg(double *a);
878 void bitrv208neg(double *a);
879 void cftb1st(int n, double *a, double *w);
880 void cftrec4(int n, double *a, int nw, double *w);
881 void cftleaf(int n, int isplt, double *a, int nw, double *w);
882 void cftfx41(int n, double *a, int nw, double *w);
883 void cftf161(double *a, double *w);
884 void cftf081(double *a, double *w);
885 void cftb040(double *a);
886 void cftx020(double *a);
887 #ifdef USE_CDFT_THREADS
888 void cftrec4_th(int n, double *a, int nw, double *w);
889 #endif /* USE_CDFT_THREADS */
893 cftb1st(n, a, &w[nw - (n >> 2)]);
894 #ifdef USE_CDFT_THREADS
895 if (n > CDFT_THREADS_BEGIN_N) {
896 cftrec4_th(n, a, nw, w);
898 #endif /* USE_CDFT_THREADS */
900 cftrec4(n, a, nw, w);
901 } else if (n > 128) {
902 cftleaf(n, 1, a, nw, w);
904 cftfx41(n, a, nw, w);
906 bitrv2conj(n, ip, a);
907 } else if (n == 32) {
908 cftf161(a, &w[nw - 8]);
922 void bitrv2(int n, int *ip, double *a)
924 int j, j1, k, k1, l, m, nh, nm;
925 double xr, xi, yr, yi;
928 for (l = n >> 2; l > 8; l >>= 2) {
934 for (k = 0; k < m; k++) {
935 for (j = 0; j < k; j++) {
936 j1 = 4 * j + 2 * ip[m + k];
937 k1 = 4 * k + 2 * ip[m + j];
1097 k1 = 4 * k + 2 * ip[m + k];
1160 for (k = 0; k < m; k++) {
1161 for (j = 0; j < k; j++) {
1162 j1 = 4 * j + ip[m + k];
1163 k1 = 4 * k + ip[m + j];
1243 k1 = 4 * k + ip[m + k];
1269 void bitrv2conj(int n, int *ip, double *a)
1271 int j, j1, k, k1, l, m, nh, nm;
1272 double xr, xi, yr, yi;
1275 for (l = n >> 2; l > 8; l >>= 2) {
1281 for (k = 0; k < m; k++) {
1282 for (j = 0; j < k; j++) {
1283 j1 = 4 * j + 2 * ip[m + k];
1284 k1 = 4 * k + 2 * ip[m + j];
1444 k1 = 4 * k + 2 * ip[m + k];
1447 a[j1 - 1] = -a[j1 - 1];
1456 a[k1 + 3] = -a[k1 + 3];
1499 a[j1 - 1] = -a[j1 - 1];
1508 a[k1 + 3] = -a[k1 + 3];
1511 for (k = 0; k < m; k++) {
1512 for (j = 0; j < k; j++) {
1513 j1 = 4 * j + ip[m + k];
1514 k1 = 4 * k + ip[m + j];
1594 k1 = 4 * k + ip[m + k];
1597 a[j1 - 1] = -a[j1 - 1];
1606 a[k1 + 3] = -a[k1 + 3];
1609 a[j1 - 1] = -a[j1 - 1];
1618 a[k1 + 3] = -a[k1 + 3];
1624 void bitrv216(double *a)
1626 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1627 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
1628 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1681 void bitrv216neg(double *a)
1683 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1684 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
1685 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
1686 x13r, x13i, x14r, x14i, x15r, x15i;
1751 void bitrv208(double *a)
1753 double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1774 void bitrv208neg(double *a)
1776 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1777 x5r, x5i, x6r, x6i, x7r, x7i;
1810 void cftf1st(int n, double *a, double *w)
1812 int j, j0, j1, j2, j3, k, m, mh;
1813 double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1814 wd1r, wd1i, wd3r, wd3i;
1815 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1816 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1824 x0i = a[1] + a[j2 + 1];
1826 x1i = a[1] - a[j2 + 1];
1827 x2r = a[j1] + a[j3];
1828 x2i = a[j1 + 1] + a[j3 + 1];
1829 x3r = a[j1] - a[j3];
1830 x3i = a[j1 + 1] - a[j3 + 1];
1834 a[j1 + 1] = x0i - x2i;
1836 a[j2 + 1] = x1i + x3r;
1838 a[j3 + 1] = x1i - x3r;
1847 for (j = 2; j < mh - 2; j += 4) {
1849 wk1r = csc1 * (wd1r + w[k]);
1850 wk1i = csc1 * (wd1i + w[k + 1]);
1851 wk3r = csc3 * (wd3r + w[k + 2]);
1852 wk3i = csc3 * (wd3i + w[k + 3]);
1861 x0i = a[j + 1] + a[j2 + 1];
1863 x1i = a[j + 1] - a[j2 + 1];
1864 y0r = a[j + 2] + a[j2 + 2];
1865 y0i = a[j + 3] + a[j2 + 3];
1866 y1r = a[j + 2] - a[j2 + 2];
1867 y1i = a[j + 3] - a[j2 + 3];
1868 x2r = a[j1] + a[j3];
1869 x2i = a[j1 + 1] + a[j3 + 1];
1870 x3r = a[j1] - a[j3];
1871 x3i = a[j1 + 1] - a[j3 + 1];
1872 y2r = a[j1 + 2] + a[j3 + 2];
1873 y2i = a[j1 + 3] + a[j3 + 3];
1874 y3r = a[j1 + 2] - a[j3 + 2];
1875 y3i = a[j1 + 3] - a[j3 + 3];
1877 a[j + 1] = x0i + x2i;
1878 a[j + 2] = y0r + y2r;
1879 a[j + 3] = y0i + y2i;
1881 a[j1 + 1] = x0i - x2i;
1882 a[j1 + 2] = y0r - y2r;
1883 a[j1 + 3] = y0i - y2i;
1886 a[j2] = wk1r * x0r - wk1i * x0i;
1887 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1890 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1891 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1894 a[j3] = wk3r * x0r + wk3i * x0i;
1895 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1898 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1899 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1904 x0r = a[j0] + a[j2];
1905 x0i = a[j0 + 1] + a[j2 + 1];
1906 x1r = a[j0] - a[j2];
1907 x1i = a[j0 + 1] - a[j2 + 1];
1908 y0r = a[j0 - 2] + a[j2 - 2];
1909 y0i = a[j0 - 1] + a[j2 - 1];
1910 y1r = a[j0 - 2] - a[j2 - 2];
1911 y1i = a[j0 - 1] - a[j2 - 1];
1912 x2r = a[j1] + a[j3];
1913 x2i = a[j1 + 1] + a[j3 + 1];
1914 x3r = a[j1] - a[j3];
1915 x3i = a[j1 + 1] - a[j3 + 1];
1916 y2r = a[j1 - 2] + a[j3 - 2];
1917 y2i = a[j1 - 1] + a[j3 - 1];
1918 y3r = a[j1 - 2] - a[j3 - 2];
1919 y3i = a[j1 - 1] - a[j3 - 1];
1921 a[j0 + 1] = x0i + x2i;
1922 a[j0 - 2] = y0r + y2r;
1923 a[j0 - 1] = y0i + y2i;
1925 a[j1 + 1] = x0i - x2i;
1926 a[j1 - 2] = y0r - y2r;
1927 a[j1 - 1] = y0i - y2i;
1930 a[j2] = wk1i * x0r - wk1r * x0i;
1931 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1934 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1935 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1938 a[j3] = wk3i * x0r + wk3r * x0i;
1939 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1942 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1943 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1945 wk1r = csc1 * (wd1r + wn4r);
1946 wk1i = csc1 * (wd1i + wn4r);
1947 wk3r = csc3 * (wd3r - wn4r);
1948 wk3i = csc3 * (wd3i - wn4r);
1953 x0r = a[j0 - 2] + a[j2 - 2];
1954 x0i = a[j0 - 1] + a[j2 - 1];
1955 x1r = a[j0 - 2] - a[j2 - 2];
1956 x1i = a[j0 - 1] - a[j2 - 1];
1957 x2r = a[j1 - 2] + a[j3 - 2];
1958 x2i = a[j1 - 1] + a[j3 - 1];
1959 x3r = a[j1 - 2] - a[j3 - 2];
1960 x3i = a[j1 - 1] - a[j3 - 1];
1961 a[j0 - 2] = x0r + x2r;
1962 a[j0 - 1] = x0i + x2i;
1963 a[j1 - 2] = x0r - x2r;
1964 a[j1 - 1] = x0i - x2i;
1967 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1968 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1971 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1972 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1973 x0r = a[j0] + a[j2];
1974 x0i = a[j0 + 1] + a[j2 + 1];
1975 x1r = a[j0] - a[j2];
1976 x1i = a[j0 + 1] - a[j2 + 1];
1977 x2r = a[j1] + a[j3];
1978 x2i = a[j1 + 1] + a[j3 + 1];
1979 x3r = a[j1] - a[j3];
1980 x3i = a[j1 + 1] - a[j3 + 1];
1982 a[j0 + 1] = x0i + x2i;
1984 a[j1 + 1] = x0i - x2i;
1987 a[j2] = wn4r * (x0r - x0i);
1988 a[j2 + 1] = wn4r * (x0i + x0r);
1991 a[j3] = -wn4r * (x0r + x0i);
1992 a[j3 + 1] = -wn4r * (x0i - x0r);
1993 x0r = a[j0 + 2] + a[j2 + 2];
1994 x0i = a[j0 + 3] + a[j2 + 3];
1995 x1r = a[j0 + 2] - a[j2 + 2];
1996 x1i = a[j0 + 3] - a[j2 + 3];
1997 x2r = a[j1 + 2] + a[j3 + 2];
1998 x2i = a[j1 + 3] + a[j3 + 3];
1999 x3r = a[j1 + 2] - a[j3 + 2];
2000 x3i = a[j1 + 3] - a[j3 + 3];
2001 a[j0 + 2] = x0r + x2r;
2002 a[j0 + 3] = x0i + x2i;
2003 a[j1 + 2] = x0r - x2r;
2004 a[j1 + 3] = x0i - x2i;
2007 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2008 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2011 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2012 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2016 void cftb1st(int n, double *a, double *w)
2018 int j, j0, j1, j2, j3, k, m, mh;
2019 double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
2020 wd1r, wd1i, wd3r, wd3i;
2021 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2022 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
2030 x0i = -a[1] - a[j2 + 1];
2032 x1i = -a[1] + a[j2 + 1];
2033 x2r = a[j1] + a[j3];
2034 x2i = a[j1 + 1] + a[j3 + 1];
2035 x3r = a[j1] - a[j3];
2036 x3i = a[j1 + 1] - a[j3 + 1];
2040 a[j1 + 1] = x0i + x2i;
2042 a[j2 + 1] = x1i + x3r;
2044 a[j3 + 1] = x1i - x3r;
2053 for (j = 2; j < mh - 2; j += 4) {
2055 wk1r = csc1 * (wd1r + w[k]);
2056 wk1i = csc1 * (wd1i + w[k + 1]);
2057 wk3r = csc3 * (wd3r + w[k + 2]);
2058 wk3i = csc3 * (wd3i + w[k + 3]);
2067 x0i = -a[j + 1] - a[j2 + 1];
2069 x1i = -a[j + 1] + a[j2 + 1];
2070 y0r = a[j + 2] + a[j2 + 2];
2071 y0i = -a[j + 3] - a[j2 + 3];
2072 y1r = a[j + 2] - a[j2 + 2];
2073 y1i = -a[j + 3] + a[j2 + 3];
2074 x2r = a[j1] + a[j3];
2075 x2i = a[j1 + 1] + a[j3 + 1];
2076 x3r = a[j1] - a[j3];
2077 x3i = a[j1 + 1] - a[j3 + 1];
2078 y2r = a[j1 + 2] + a[j3 + 2];
2079 y2i = a[j1 + 3] + a[j3 + 3];
2080 y3r = a[j1 + 2] - a[j3 + 2];
2081 y3i = a[j1 + 3] - a[j3 + 3];
2083 a[j + 1] = x0i - x2i;
2084 a[j + 2] = y0r + y2r;
2085 a[j + 3] = y0i - y2i;
2087 a[j1 + 1] = x0i + x2i;
2088 a[j1 + 2] = y0r - y2r;
2089 a[j1 + 3] = y0i + y2i;
2092 a[j2] = wk1r * x0r - wk1i * x0i;
2093 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2096 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2097 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2100 a[j3] = wk3r * x0r + wk3i * x0i;
2101 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2104 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2105 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2110 x0r = a[j0] + a[j2];
2111 x0i = -a[j0 + 1] - a[j2 + 1];
2112 x1r = a[j0] - a[j2];
2113 x1i = -a[j0 + 1] + a[j2 + 1];
2114 y0r = a[j0 - 2] + a[j2 - 2];
2115 y0i = -a[j0 - 1] - a[j2 - 1];
2116 y1r = a[j0 - 2] - a[j2 - 2];
2117 y1i = -a[j0 - 1] + a[j2 - 1];
2118 x2r = a[j1] + a[j3];
2119 x2i = a[j1 + 1] + a[j3 + 1];
2120 x3r = a[j1] - a[j3];
2121 x3i = a[j1 + 1] - a[j3 + 1];
2122 y2r = a[j1 - 2] + a[j3 - 2];
2123 y2i = a[j1 - 1] + a[j3 - 1];
2124 y3r = a[j1 - 2] - a[j3 - 2];
2125 y3i = a[j1 - 1] - a[j3 - 1];
2127 a[j0 + 1] = x0i - x2i;
2128 a[j0 - 2] = y0r + y2r;
2129 a[j0 - 1] = y0i - y2i;
2131 a[j1 + 1] = x0i + x2i;
2132 a[j1 - 2] = y0r - y2r;
2133 a[j1 - 1] = y0i + y2i;
2136 a[j2] = wk1i * x0r - wk1r * x0i;
2137 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2140 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2141 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2144 a[j3] = wk3i * x0r + wk3r * x0i;
2145 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2148 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2149 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2151 wk1r = csc1 * (wd1r + wn4r);
2152 wk1i = csc1 * (wd1i + wn4r);
2153 wk3r = csc3 * (wd3r - wn4r);
2154 wk3i = csc3 * (wd3i - wn4r);
2159 x0r = a[j0 - 2] + a[j2 - 2];
2160 x0i = -a[j0 - 1] - a[j2 - 1];
2161 x1r = a[j0 - 2] - a[j2 - 2];
2162 x1i = -a[j0 - 1] + a[j2 - 1];
2163 x2r = a[j1 - 2] + a[j3 - 2];
2164 x2i = a[j1 - 1] + a[j3 - 1];
2165 x3r = a[j1 - 2] - a[j3 - 2];
2166 x3i = a[j1 - 1] - a[j3 - 1];
2167 a[j0 - 2] = x0r + x2r;
2168 a[j0 - 1] = x0i - x2i;
2169 a[j1 - 2] = x0r - x2r;
2170 a[j1 - 1] = x0i + x2i;
2173 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2174 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2177 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2178 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2179 x0r = a[j0] + a[j2];
2180 x0i = -a[j0 + 1] - a[j2 + 1];
2181 x1r = a[j0] - a[j2];
2182 x1i = -a[j0 + 1] + a[j2 + 1];
2183 x2r = a[j1] + a[j3];
2184 x2i = a[j1 + 1] + a[j3 + 1];
2185 x3r = a[j1] - a[j3];
2186 x3i = a[j1 + 1] - a[j3 + 1];
2188 a[j0 + 1] = x0i - x2i;
2190 a[j1 + 1] = x0i + x2i;
2193 a[j2] = wn4r * (x0r - x0i);
2194 a[j2 + 1] = wn4r * (x0i + x0r);
2197 a[j3] = -wn4r * (x0r + x0i);
2198 a[j3 + 1] = -wn4r * (x0i - x0r);
2199 x0r = a[j0 + 2] + a[j2 + 2];
2200 x0i = -a[j0 + 3] - a[j2 + 3];
2201 x1r = a[j0 + 2] - a[j2 + 2];
2202 x1i = -a[j0 + 3] + a[j2 + 3];
2203 x2r = a[j1 + 2] + a[j3 + 2];
2204 x2i = a[j1 + 3] + a[j3 + 3];
2205 x3r = a[j1 + 2] - a[j3 + 2];
2206 x3i = a[j1 + 3] - a[j3 + 3];
2207 a[j0 + 2] = x0r + x2r;
2208 a[j0 + 3] = x0i - x2i;
2209 a[j1 + 2] = x0r - x2r;
2210 a[j1 + 3] = x0i + x2i;
2213 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2214 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2217 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2218 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2222 #ifdef USE_CDFT_THREADS
2223 struct cdft_arg_st {
2230 typedef struct cdft_arg_st cdft_arg_t;
2233 void cftrec4_th(int n, double *a, int nw, double *w)
2235 void *cftrec1_th(void *p);
2236 void *cftrec2_th(void *p);
2237 int i, idiv4, m, nthread;
2238 cdft_thread_t th[4];
2244 if (n > CDFT_4THREADS_BEGIN_N) {
2249 for (i = 0; i < nthread; i++) {
2252 ag[i].a = &a[i * m];
2256 cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2258 cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2261 for (i = 0; i < nthread; i++) {
2262 cdft_thread_wait(th[i]);
2267 void *cftrec1_th(void *p)
2269 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2270 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2271 void cftmdl1(int n, double *a, double *w);
2272 int isplt, j, k, m, n, n0, nw;
2275 n0 = ((cdft_arg_t *) p)->n0;
2276 n = ((cdft_arg_t *) p)->n;
2277 a = ((cdft_arg_t *) p)->a;
2278 nw = ((cdft_arg_t *) p)->nw;
2279 w = ((cdft_arg_t *) p)->w;
2283 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2285 cftleaf(m, 1, &a[n - m], nw, w);
2287 for (j = n - m; j > 0; j -= m) {
2289 isplt = cfttree(m, j, k, a, nw, w);
2290 cftleaf(m, isplt, &a[j - m], nw, w);
2296 void *cftrec2_th(void *p)
2298 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2299 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2300 void cftmdl2(int n, double *a, double *w);
2301 int isplt, j, k, m, n, n0, nw;
2304 n0 = ((cdft_arg_t *) p)->n0;
2305 n = ((cdft_arg_t *) p)->n;
2306 a = ((cdft_arg_t *) p)->a;
2307 nw = ((cdft_arg_t *) p)->nw;
2308 w = ((cdft_arg_t *) p)->w;
2314 cftmdl2(m, &a[n - m], &w[nw - m]);
2316 cftleaf(m, 0, &a[n - m], nw, w);
2318 for (j = n - m; j > 0; j -= m) {
2320 isplt = cfttree(m, j, k, a, nw, w);
2321 cftleaf(m, isplt, &a[j - m], nw, w);
2325 #endif /* USE_CDFT_THREADS */
2328 void cftrec4(int n, double *a, int nw, double *w)
2330 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2331 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2332 void cftmdl1(int n, double *a, double *w);
2338 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2340 cftleaf(m, 1, &a[n - m], nw, w);
2342 for (j = n - m; j > 0; j -= m) {
2344 isplt = cfttree(m, j, k, a, nw, w);
2345 cftleaf(m, isplt, &a[j - m], nw, w);
2350 int cfttree(int n, int j, int k, double *a, int nw, double *w)
2352 void cftmdl1(int n, double *a, double *w);
2353 void cftmdl2(int n, double *a, double *w);
2359 cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2361 cftmdl2(n, &a[j - n], &w[nw - n]);
2365 for (i = k; (i & 3) == 0; i >>= 2) {
2371 cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2376 cftmdl2(m, &a[j - m], &w[nw - m]);
2385 void cftleaf(int n, int isplt, double *a, int nw, double *w)
2387 void cftmdl1(int n, double *a, double *w);
2388 void cftmdl2(int n, double *a, double *w);
2389 void cftf161(double *a, double *w);
2390 void cftf162(double *a, double *w);
2391 void cftf081(double *a, double *w);
2392 void cftf082(double *a, double *w);
2395 cftmdl1(128, a, &w[nw - 64]);
2396 cftf161(a, &w[nw - 8]);
2397 cftf162(&a[32], &w[nw - 32]);
2398 cftf161(&a[64], &w[nw - 8]);
2399 cftf161(&a[96], &w[nw - 8]);
2400 cftmdl2(128, &a[128], &w[nw - 128]);
2401 cftf161(&a[128], &w[nw - 8]);
2402 cftf162(&a[160], &w[nw - 32]);
2403 cftf161(&a[192], &w[nw - 8]);
2404 cftf162(&a[224], &w[nw - 32]);
2405 cftmdl1(128, &a[256], &w[nw - 64]);
2406 cftf161(&a[256], &w[nw - 8]);
2407 cftf162(&a[288], &w[nw - 32]);
2408 cftf161(&a[320], &w[nw - 8]);
2409 cftf161(&a[352], &w[nw - 8]);
2411 cftmdl1(128, &a[384], &w[nw - 64]);
2412 cftf161(&a[480], &w[nw - 8]);
2414 cftmdl2(128, &a[384], &w[nw - 128]);
2415 cftf162(&a[480], &w[nw - 32]);
2417 cftf161(&a[384], &w[nw - 8]);
2418 cftf162(&a[416], &w[nw - 32]);
2419 cftf161(&a[448], &w[nw - 8]);
2421 cftmdl1(64, a, &w[nw - 32]);
2422 cftf081(a, &w[nw - 8]);
2423 cftf082(&a[16], &w[nw - 8]);
2424 cftf081(&a[32], &w[nw - 8]);
2425 cftf081(&a[48], &w[nw - 8]);
2426 cftmdl2(64, &a[64], &w[nw - 64]);
2427 cftf081(&a[64], &w[nw - 8]);
2428 cftf082(&a[80], &w[nw - 8]);
2429 cftf081(&a[96], &w[nw - 8]);
2430 cftf082(&a[112], &w[nw - 8]);
2431 cftmdl1(64, &a[128], &w[nw - 32]);
2432 cftf081(&a[128], &w[nw - 8]);
2433 cftf082(&a[144], &w[nw - 8]);
2434 cftf081(&a[160], &w[nw - 8]);
2435 cftf081(&a[176], &w[nw - 8]);
2437 cftmdl1(64, &a[192], &w[nw - 32]);
2438 cftf081(&a[240], &w[nw - 8]);
2440 cftmdl2(64, &a[192], &w[nw - 64]);
2441 cftf082(&a[240], &w[nw - 8]);
2443 cftf081(&a[192], &w[nw - 8]);
2444 cftf082(&a[208], &w[nw - 8]);
2445 cftf081(&a[224], &w[nw - 8]);
2450 void cftmdl1(int n, double *a, double *w)
2452 int j, j0, j1, j2, j3, k, m, mh;
2453 double wn4r, wk1r, wk1i, wk3r, wk3i;
2454 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2462 x0i = a[1] + a[j2 + 1];
2464 x1i = a[1] - a[j2 + 1];
2465 x2r = a[j1] + a[j3];
2466 x2i = a[j1 + 1] + a[j3 + 1];
2467 x3r = a[j1] - a[j3];
2468 x3i = a[j1 + 1] - a[j3 + 1];
2472 a[j1 + 1] = x0i - x2i;
2474 a[j2 + 1] = x1i + x3r;
2476 a[j3 + 1] = x1i - x3r;
2479 for (j = 2; j < mh; j += 2) {
2489 x0i = a[j + 1] + a[j2 + 1];
2491 x1i = a[j + 1] - a[j2 + 1];
2492 x2r = a[j1] + a[j3];
2493 x2i = a[j1 + 1] + a[j3 + 1];
2494 x3r = a[j1] - a[j3];
2495 x3i = a[j1 + 1] - a[j3 + 1];
2497 a[j + 1] = x0i + x2i;
2499 a[j1 + 1] = x0i - x2i;
2502 a[j2] = wk1r * x0r - wk1i * x0i;
2503 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2506 a[j3] = wk3r * x0r + wk3i * x0i;
2507 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2512 x0r = a[j0] + a[j2];
2513 x0i = a[j0 + 1] + a[j2 + 1];
2514 x1r = a[j0] - a[j2];
2515 x1i = a[j0 + 1] - a[j2 + 1];
2516 x2r = a[j1] + a[j3];
2517 x2i = a[j1 + 1] + a[j3 + 1];
2518 x3r = a[j1] - a[j3];
2519 x3i = a[j1 + 1] - a[j3 + 1];
2521 a[j0 + 1] = x0i + x2i;
2523 a[j1 + 1] = x0i - x2i;
2526 a[j2] = wk1i * x0r - wk1r * x0i;
2527 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2530 a[j3] = wk3i * x0r + wk3r * x0i;
2531 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2537 x0r = a[j0] + a[j2];
2538 x0i = a[j0 + 1] + a[j2 + 1];
2539 x1r = a[j0] - a[j2];
2540 x1i = a[j0 + 1] - a[j2 + 1];
2541 x2r = a[j1] + a[j3];
2542 x2i = a[j1 + 1] + a[j3 + 1];
2543 x3r = a[j1] - a[j3];
2544 x3i = a[j1 + 1] - a[j3 + 1];
2546 a[j0 + 1] = x0i + x2i;
2548 a[j1 + 1] = x0i - x2i;
2551 a[j2] = wn4r * (x0r - x0i);
2552 a[j2 + 1] = wn4r * (x0i + x0r);
2555 a[j3] = -wn4r * (x0r + x0i);
2556 a[j3 + 1] = -wn4r * (x0i - x0r);
2560 void cftmdl2(int n, double *a, double *w)
2562 int j, j0, j1, j2, j3, k, kr, m, mh;
2563 double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2564 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2572 x0r = a[0] - a[j2 + 1];
2574 x1r = a[0] + a[j2 + 1];
2576 x2r = a[j1] - a[j3 + 1];
2577 x2i = a[j1 + 1] + a[j3];
2578 x3r = a[j1] + a[j3 + 1];
2579 x3i = a[j1 + 1] - a[j3];
2580 y0r = wn4r * (x2r - x2i);
2581 y0i = wn4r * (x2i + x2r);
2585 a[j1 + 1] = x0i - y0i;
2586 y0r = wn4r * (x3r - x3i);
2587 y0i = wn4r * (x3i + x3r);
2589 a[j2 + 1] = x1i + y0r;
2591 a[j3 + 1] = x1i - y0r;
2594 for (j = 2; j < mh; j += 2) {
2608 x0r = a[j] - a[j2 + 1];
2609 x0i = a[j + 1] + a[j2];
2610 x1r = a[j] + a[j2 + 1];
2611 x1i = a[j + 1] - a[j2];
2612 x2r = a[j1] - a[j3 + 1];
2613 x2i = a[j1 + 1] + a[j3];
2614 x3r = a[j1] + a[j3 + 1];
2615 x3i = a[j1 + 1] - a[j3];
2616 y0r = wk1r * x0r - wk1i * x0i;
2617 y0i = wk1r * x0i + wk1i * x0r;
2618 y2r = wd1r * x2r - wd1i * x2i;
2619 y2i = wd1r * x2i + wd1i * x2r;
2621 a[j + 1] = y0i + y2i;
2623 a[j1 + 1] = y0i - y2i;
2624 y0r = wk3r * x1r + wk3i * x1i;
2625 y0i = wk3r * x1i - wk3i * x1r;
2626 y2r = wd3r * x3r + wd3i * x3i;
2627 y2i = wd3r * x3i - wd3i * x3r;
2629 a[j2 + 1] = y0i + y2i;
2631 a[j3 + 1] = y0i - y2i;
2636 x0r = a[j0] - a[j2 + 1];
2637 x0i = a[j0 + 1] + a[j2];
2638 x1r = a[j0] + a[j2 + 1];
2639 x1i = a[j0 + 1] - a[j2];
2640 x2r = a[j1] - a[j3 + 1];
2641 x2i = a[j1 + 1] + a[j3];
2642 x3r = a[j1] + a[j3 + 1];
2643 x3i = a[j1 + 1] - a[j3];
2644 y0r = wd1i * x0r - wd1r * x0i;
2645 y0i = wd1i * x0i + wd1r * x0r;
2646 y2r = wk1i * x2r - wk1r * x2i;
2647 y2i = wk1i * x2i + wk1r * x2r;
2649 a[j0 + 1] = y0i + y2i;
2651 a[j1 + 1] = y0i - y2i;
2652 y0r = wd3i * x1r + wd3r * x1i;
2653 y0i = wd3i * x1i - wd3r * x1r;
2654 y2r = wk3i * x3r + wk3r * x3i;
2655 y2i = wk3i * x3i - wk3r * x3r;
2657 a[j2 + 1] = y0i + y2i;
2659 a[j3 + 1] = y0i - y2i;
2667 x0r = a[j0] - a[j2 + 1];
2668 x0i = a[j0 + 1] + a[j2];
2669 x1r = a[j0] + a[j2 + 1];
2670 x1i = a[j0 + 1] - a[j2];
2671 x2r = a[j1] - a[j3 + 1];
2672 x2i = a[j1 + 1] + a[j3];
2673 x3r = a[j1] + a[j3 + 1];
2674 x3i = a[j1 + 1] - a[j3];
2675 y0r = wk1r * x0r - wk1i * x0i;
2676 y0i = wk1r * x0i + wk1i * x0r;
2677 y2r = wk1i * x2r - wk1r * x2i;
2678 y2i = wk1i * x2i + wk1r * x2r;
2680 a[j0 + 1] = y0i + y2i;
2682 a[j1 + 1] = y0i - y2i;
2683 y0r = wk1i * x1r - wk1r * x1i;
2684 y0i = wk1i * x1i + wk1r * x1r;
2685 y2r = wk1r * x3r - wk1i * x3i;
2686 y2i = wk1r * x3i + wk1i * x3r;
2688 a[j2 + 1] = y0i - y2i;
2690 a[j3 + 1] = y0i + y2i;
2694 void cftfx41(int n, double *a, int nw, double *w)
2696 void cftf161(double *a, double *w);
2697 void cftf162(double *a, double *w);
2698 void cftf081(double *a, double *w);
2699 void cftf082(double *a, double *w);
2702 cftf161(a, &w[nw - 8]);
2703 cftf162(&a[32], &w[nw - 32]);
2704 cftf161(&a[64], &w[nw - 8]);
2705 cftf161(&a[96], &w[nw - 8]);
2707 cftf081(a, &w[nw - 8]);
2708 cftf082(&a[16], &w[nw - 8]);
2709 cftf081(&a[32], &w[nw - 8]);
2710 cftf081(&a[48], &w[nw - 8]);
2715 void cftf161(double *a, double *w)
2717 double wn4r, wk1r, wk1i,
2718 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2719 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2720 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2721 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2722 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2747 x2r = a[10] + a[26];
2748 x2i = a[11] + a[27];
2749 x3r = a[10] - a[26];
2750 x3i = a[11] - a[27];
2757 y9r = wk1r * x0r - wk1i * x0i;
2758 y9i = wk1r * x0i + wk1i * x0r;
2761 y13r = wk1i * x0r - wk1r * x0i;
2762 y13i = wk1i * x0i + wk1r * x0r;
2767 x2r = a[12] + a[28];
2768 x2i = a[13] + a[29];
2769 x3r = a[12] - a[28];
2770 x3i = a[13] - a[29];
2777 y10r = wn4r * (x0r - x0i);
2778 y10i = wn4r * (x0i + x0r);
2781 y14r = wn4r * (x0r + x0i);
2782 y14i = wn4r * (x0i - x0r);
2787 x2r = a[14] + a[30];
2788 x2i = a[15] + a[31];
2789 x3r = a[14] - a[30];
2790 x3i = a[15] - a[31];
2797 y11r = wk1i * x0r - wk1r * x0i;
2798 y11i = wk1i * x0i + wk1r * x0r;
2801 y15r = wk1r * x0r - wk1i * x0i;
2802 y15i = wk1r * x0i + wk1i * x0r;
2837 x2r = wn4r * (x0r - x0i);
2838 x2i = wn4r * (x0i + x0r);
2841 x3r = wn4r * (x0r - x0i);
2842 x3i = wn4r * (x0i + x0r);
2874 void cftf162(double *a, double *w)
2876 double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
2877 x0r, x0i, x1r, x1i, x2r, x2i,
2878 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2879 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2880 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2881 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2894 x2r = wn4r * (x0r - x0i);
2895 x2i = wn4r * (x0i + x0r);
2904 x2r = wn4r * (x0r - x0i);
2905 x2i = wn4r * (x0i + x0r);
2912 x1r = wk1r * x0r - wk1i * x0i;
2913 x1i = wk1r * x0i + wk1i * x0r;
2914 x0r = a[10] - a[27];
2915 x0i = a[11] + a[26];
2916 x2r = wk3i * x0r - wk3r * x0i;
2917 x2i = wk3i * x0i + wk3r * x0r;
2924 x1r = wk3r * x0r - wk3i * x0i;
2925 x1i = wk3r * x0i + wk3i * x0r;
2926 x0r = a[10] + a[27];
2927 x0i = a[11] - a[26];
2928 x2r = wk1r * x0r + wk1i * x0i;
2929 x2i = wk1r * x0i - wk1i * x0r;
2936 x1r = wk2r * x0r - wk2i * x0i;
2937 x1i = wk2r * x0i + wk2i * x0r;
2938 x0r = a[12] - a[29];
2939 x0i = a[13] + a[28];
2940 x2r = wk2i * x0r - wk2r * x0i;
2941 x2i = wk2i * x0i + wk2r * x0r;
2948 x1r = wk2i * x0r - wk2r * x0i;
2949 x1i = wk2i * x0i + wk2r * x0r;
2950 x0r = a[12] + a[29];
2951 x0i = a[13] - a[28];
2952 x2r = wk2r * x0r - wk2i * x0i;
2953 x2i = wk2r * x0i + wk2i * x0r;
2960 x1r = wk3r * x0r - wk3i * x0i;
2961 x1i = wk3r * x0i + wk3i * x0r;
2962 x0r = a[14] - a[31];
2963 x0i = a[15] + a[30];
2964 x2r = wk1i * x0r - wk1r * x0i;
2965 x2i = wk1i * x0i + wk1r * x0r;
2972 x1r = wk1i * x0r + wk1r * x0i;
2973 x1i = wk1i * x0i - wk1r * x0r;
2974 x0r = a[14] + a[31];
2975 x0i = a[15] - a[30];
2976 x2r = wk3i * x0r - wk3r * x0i;
2977 x2i = wk3i * x0i + wk3r * x0r;
3002 x2r = wn4r * (x0r - x0i);
3003 x2i = wn4r * (x0i + x0r);
3012 x2r = wn4r * (x0r - x0i);
3013 x2i = wn4r * (x0i + x0r);
3038 x2r = wn4r * (x0r - x0i);
3039 x2i = wn4r * (x0i + x0r);
3048 x2r = wn4r * (x0r - x0i);
3049 x2i = wn4r * (x0i + x0r);
3057 void cftf081(double *a, double *w)
3059 double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
3060 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3061 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3096 y5r = wn4r * (x0r - x0i);
3097 y5i = wn4r * (x0r + x0i);
3098 y7r = wn4r * (x2r - x2i);
3099 y7i = wn4r * (x2r + x2i);
3119 void cftf082(double *a, double *w)
3121 double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
3122 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3123 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3134 y2r = wn4r * (x0r - x0i);
3135 y2i = wn4r * (x0i + x0r);
3138 y3r = wn4r * (x0r - x0i);
3139 y3i = wn4r * (x0i + x0r);
3142 y4r = wk1r * x0r - wk1i * x0i;
3143 y4i = wk1r * x0i + wk1i * x0r;
3146 y5r = wk1i * x0r - wk1r * x0i;
3147 y5i = wk1i * x0i + wk1r * x0r;
3150 y6r = wk1i * x0r - wk1r * x0i;
3151 y6i = wk1i * x0i + wk1r * x0r;
3154 y7r = wk1r * x0r - wk1i * x0i;
3155 y7i = wk1r * x0i + wk1i * x0r;
3191 void cftf040(double *a)
3193 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3214 void cftb040(double *a)
3216 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3237 void cftx020(double *a)
3250 void rftfsub(int n, double *a, int nc, double *c)
3252 int j, k, kk, ks, m;
3253 double wkr, wki, xr, xi, yr, yi;
3258 for (j = 2; j < m; j += 2) {
3261 wkr = 0.5 - c[nc - kk];
3264 xi = a[j + 1] + a[k + 1];
3265 yr = wkr * xr - wki * xi;
3266 yi = wkr * xi + wki * xr;
3275 void rftbsub(int n, double *a, int nc, double *c)
3277 int j, k, kk, ks, m;
3278 double wkr, wki, xr, xi, yr, yi;
3283 for (j = 2; j < m; j += 2) {
3286 wkr = 0.5 - c[nc - kk];
3289 xi = a[j + 1] + a[k + 1];
3290 yr = wkr * xr + wki * xi;
3291 yi = wkr * xi - wki * xr;
3300 void dctsub(int n, double *a, int nc, double *c)
3302 int j, k, kk, ks, m;
3303 double wkr, wki, xr;
3308 for (j = 1; j < m; j++) {
3311 wkr = c[kk] - c[nc - kk];
3312 wki = c[kk] + c[nc - kk];
3313 xr = wki * a[j] - wkr * a[k];
3314 a[j] = wkr * a[j] + wki * a[k];
3321 void dstsub(int n, double *a, int nc, double *c)
3323 int j, k, kk, ks, m;
3324 double wkr, wki, xr;
3329 for (j = 1; j < m; j++) {
3332 wkr = c[kk] - c[nc - kk];
3333 wki = c[kk] + c[nc - kk];
3334 xr = wki * a[k] - wkr * a[j];
3335 a[k] = wkr * a[k] + wki * a[j];