1 /*****************************************************************************
3 *****************************************************************************
4 * Copyright (C) 2004 the VideoLAN team
7 * Authors: Cyril Deguet <asmax@videolan.org>
8 * code from projectM http://xmms-projectm.sourceforge.net
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
28 Fast Fourier/Cosine/Sine Transform
30 data length :power of 2
36 cdft: Complex Discrete Fourier Transform
37 rdft: Real Discrete Fourier Transform
38 ddct: Discrete Cosine Transform
39 ddst: Discrete Sine Transform
40 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
41 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
43 void cdft(int, int, double *, int *, double *);
44 void rdft(int, int, double *, int *, double *);
45 void ddct(int, int, double *, int *, double *);
46 void ddst(int, int, double *, int *, double *);
47 void dfct(int, double *, double *, int *, double *);
48 void dfst(int, double *, double *, int *, double *);
50 USE_CDFT_PTHREADS : default=not defined
51 CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
52 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
53 USE_CDFT_WINTHREADS : default=not defined
54 CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
55 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
58 -------- Complex DFT (Discrete Fourier Transform) --------
61 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
63 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
64 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
67 ip[0] = 0; // first time only
68 cdft(2*n, 1, a, ip, w);
70 ip[0] = 0; // first time only
71 cdft(2*n, -1, a, ip, w);
73 2*n :data length (int)
74 n >= 1, n = power of 2
75 a[0...2*n-1] :input/output data (double *)
78 a[2*j+1] = Im(x[j]), 0<=j<n
81 a[2*k+1] = Im(X[k]), 0<=k<n
82 ip[0...*] :work area for bit reversal (int *)
83 length of ip >= 2+sqrt(n)
86 2+(1<<(int)(log(n+0.5)/log(2))/2).
87 ip[0],ip[1] are pointers of the cos/sin table.
88 w[0...n/2-1] :cos/sin table (double *)
89 w[],ip[] are initialized if ip[0] == 0.
92 cdft(2*n, -1, a, ip, w);
94 cdft(2*n, 1, a, ip, w);
95 for (j = 0; j <= 2 * n - 1; j++) {
101 -------- Real DFT / Inverse of Real DFT --------
104 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
105 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
106 <case2> IRDFT (excluding scale)
107 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
108 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
109 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
112 ip[0] = 0; // first time only
113 rdft(n, 1, a, ip, w);
115 ip[0] = 0; // first time only
116 rdft(n, -1, a, ip, w);
119 n >= 2, n = power of 2
120 a[0...n-1] :input/output data (double *)
123 a[2*k] = R[k], 0<=k<n/2
124 a[2*k+1] = I[k], 0<k<n/2
128 a[2*j] = R[j], 0<=j<n/2
129 a[2*j+1] = I[j], 0<j<n/2
131 ip[0...*] :work area for bit reversal (int *)
132 length of ip >= 2+sqrt(n/2)
135 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
136 ip[0],ip[1] are pointers of the cos/sin table.
137 w[0...n/2-1] :cos/sin table (double *)
138 w[],ip[] are initialized if ip[0] == 0.
141 rdft(n, 1, a, ip, w);
143 rdft(n, -1, a, ip, w);
144 for (j = 0; j <= n - 1; j++) {
150 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
152 <case1> IDCT (excluding scale)
153 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
155 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
158 ip[0] = 0; // first time only
159 ddct(n, 1, a, ip, w);
161 ip[0] = 0; // first time only
162 ddct(n, -1, a, ip, w);
165 n >= 2, n = power of 2
166 a[0...n-1] :input/output data (double *)
169 ip[0...*] :work area for bit reversal (int *)
170 length of ip >= 2+sqrt(n/2)
173 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
174 ip[0],ip[1] are pointers of the cos/sin table.
175 w[0...n*5/4-1] :cos/sin table (double *)
176 w[],ip[] are initialized if ip[0] == 0.
179 ddct(n, -1, a, ip, w);
182 ddct(n, 1, a, ip, w);
183 for (j = 0; j <= n - 1; j++) {
189 -------- DST (Discrete Sine Transform) / Inverse of DST --------
191 <case1> IDST (excluding scale)
192 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
194 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
197 ip[0] = 0; // first time only
198 ddst(n, 1, a, ip, w);
200 ip[0] = 0; // first time only
201 ddst(n, -1, a, ip, w);
204 n >= 2, n = power of 2
205 a[0...n-1] :input/output data (double *)
216 ip[0...*] :work area for bit reversal (int *)
217 length of ip >= 2+sqrt(n/2)
220 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
221 ip[0],ip[1] are pointers of the cos/sin table.
222 w[0...n*5/4-1] :cos/sin table (double *)
223 w[],ip[] are initialized if ip[0] == 0.
226 ddst(n, -1, a, ip, w);
229 ddst(n, 1, a, ip, w);
230 for (j = 0; j <= n - 1; j++) {
236 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
238 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
240 ip[0] = 0; // first time only
241 dfct(n, a, t, ip, w);
243 n :data length - 1 (int)
244 n >= 2, n = power of 2
245 a[0...n] :input/output data (double *)
248 t[0...n/2] :work area (double *)
249 ip[0...*] :work area for bit reversal (int *)
250 length of ip >= 2+sqrt(n/4)
253 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
254 ip[0],ip[1] are pointers of the cos/sin table.
255 w[0...n*5/8-1] :cos/sin table (double *)
256 w[],ip[] are initialized if ip[0] == 0.
261 dfct(n, a, t, ip, w);
265 dfct(n, a, t, ip, w);
266 for (j = 0; j <= n; j++) {
272 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
274 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
276 ip[0] = 0; // first time only
277 dfst(n, a, t, ip, w);
279 n :data length + 1 (int)
280 n >= 2, n = power of 2
281 a[0...n-1] :input/output data (double *)
284 (a[0] is used for work area)
285 t[0...n/2-1] :work area (double *)
286 ip[0...*] :work area for bit reversal (int *)
287 length of ip >= 2+sqrt(n/4)
290 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
291 ip[0],ip[1] are pointers of the cos/sin table.
292 w[0...n*5/8-1] :cos/sin table (double *)
293 w[],ip[] are initialized if ip[0] == 0.
296 dfst(n, a, t, ip, w);
298 dfst(n, a, t, ip, w);
299 for (j = 1; j <= n - 1; j++) {
306 The cos/sin table is recalculated when the larger table required.
307 w[] and ip[] are compatible with all routines.
311 void cdft(int n, int isgn, double *a, int *ip, double *w)
313 void makewt(int nw, int *ip, double *w);
314 void cftfsub(int n, double *a, int *ip, int nw, double *w);
315 void cftbsub(int n, double *a, int *ip, int nw, double *w);
324 cftfsub(n, a, ip, nw, w);
326 cftbsub(n, a, ip, nw, w);
331 void rdft(int n, int isgn, double *a, int *ip, double *w)
333 void makewt(int nw, int *ip, double *w);
334 void makect(int nc, int *ip, double *c);
335 void cftfsub(int n, double *a, int *ip, int nw, double *w);
336 void cftbsub(int n, double *a, int *ip, int nw, double *w);
337 void rftfsub(int n, double *a, int nc, double *c);
338 void rftbsub(int n, double *a, int nc, double *c);
350 makect(nc, ip, w + nw);
354 cftfsub(n, a, ip, nw, w);
355 rftfsub(n, a, nc, w + nw);
357 cftfsub(n, a, ip, nw, w);
363 a[1] = 0.5 * (a[0] - a[1]);
366 rftbsub(n, a, nc, w + nw);
367 cftbsub(n, a, ip, nw, w);
369 cftbsub(n, a, ip, nw, w);
375 void ddct(int n, int isgn, double *a, int *ip, double *w)
377 void makewt(int nw, int *ip, double *w);
378 void makect(int nc, int *ip, double *c);
379 void cftfsub(int n, double *a, int *ip, int nw, double *w);
380 void cftbsub(int n, double *a, int *ip, int nw, double *w);
381 void rftfsub(int n, double *a, int nc, double *c);
382 void rftbsub(int n, double *a, int nc, double *c);
383 void dctsub(int n, double *a, int nc, double *c);
395 makect(nc, ip, w + nw);
399 for (j = n - 2; j >= 2; j -= 2) {
400 a[j + 1] = a[j] - a[j - 1];
406 rftbsub(n, a, nc, w + nw);
407 cftbsub(n, a, ip, nw, w);
409 cftbsub(n, a, ip, nw, w);
412 dctsub(n, a, nc, w + nw);
415 cftfsub(n, a, ip, nw, w);
416 rftfsub(n, a, nc, w + nw);
418 cftfsub(n, a, ip, nw, w);
422 for (j = 2; j < n; j += 2) {
423 a[j - 1] = a[j] - a[j + 1];
431 void ddst(int n, int isgn, double *a, int *ip, double *w)
433 void makewt(int nw, int *ip, double *w);
434 void makect(int nc, int *ip, double *c);
435 void cftfsub(int n, double *a, int *ip, int nw, double *w);
436 void cftbsub(int n, double *a, int *ip, int nw, double *w);
437 void rftfsub(int n, double *a, int nc, double *c);
438 void rftbsub(int n, double *a, int nc, double *c);
439 void dstsub(int n, double *a, int nc, double *c);
451 makect(nc, ip, w + nw);
455 for (j = n - 2; j >= 2; j -= 2) {
456 a[j + 1] = -a[j] - a[j - 1];
462 rftbsub(n, a, nc, w + nw);
463 cftbsub(n, a, ip, nw, w);
465 cftbsub(n, a, ip, nw, w);
468 dstsub(n, a, nc, w + nw);
471 cftfsub(n, a, ip, nw, w);
472 rftfsub(n, a, nc, w + nw);
474 cftfsub(n, a, ip, nw, w);
478 for (j = 2; j < n; j += 2) {
479 a[j - 1] = -a[j] - a[j + 1];
487 void dfct(int n, double *a, double *t, int *ip, double *w)
489 void makewt(int nw, int *ip, double *w);
490 void makect(int nc, int *ip, double *c);
491 void cftfsub(int n, double *a, int *ip, int nw, double *w);
492 void rftfsub(int n, double *a, int nc, double *c);
493 void dctsub(int n, double *a, int nc, double *c);
494 int j, k, l, m, mh, nw, nc;
495 double xr, xi, yr, yi;
505 makect(nc, ip, w + nw);
515 for (j = 1; j < mh; j++) {
517 xr = a[j] - a[n - j];
518 xi = a[j] + a[n - j];
519 yr = a[k] - a[n - k];
520 yi = a[k] + a[n - k];
526 t[mh] = a[mh] + a[n - mh];
528 dctsub(m, a, nc, w + nw);
530 cftfsub(m, a, ip, nw, w);
531 rftfsub(m, a, nc, w + nw);
533 cftfsub(m, a, ip, nw, w);
535 a[n - 1] = a[0] - a[1];
537 for (j = m - 2; j >= 2; j -= 2) {
538 a[2 * j + 1] = a[j] + a[j + 1];
539 a[2 * j - 1] = a[j] - a[j + 1];
544 dctsub(m, t, nc, w + nw);
546 cftfsub(m, t, ip, nw, w);
547 rftfsub(m, t, nc, w + nw);
549 cftfsub(m, t, ip, nw, w);
551 a[n - l] = t[0] - t[1];
554 for (j = 2; j < m; j += 2) {
556 a[k - l] = t[j] - t[j + 1];
557 a[k + l] = t[j] + t[j + 1];
561 for (j = 0; j < mh; j++) {
563 t[j] = t[m + k] - t[m + j];
564 t[k] = t[m + k] + t[m + j];
580 void dfst(int n, double *a, double *t, int *ip, double *w)
582 void makewt(int nw, int *ip, double *w);
583 void makect(int nc, int *ip, double *c);
584 void cftfsub(int n, double *a, int *ip, int nw, double *w);
585 void rftfsub(int n, double *a, int nc, double *c);
586 void dstsub(int n, double *a, int nc, double *c);
587 int j, k, l, m, mh, nw, nc;
588 double xr, xi, yr, yi;
598 makect(nc, ip, w + nw);
603 for (j = 1; j < mh; j++) {
605 xr = a[j] + a[n - j];
606 xi = a[j] - a[n - j];
607 yr = a[k] + a[n - k];
608 yi = a[k] - a[n - k];
614 t[0] = a[mh] - a[n - mh];
617 dstsub(m, a, nc, w + nw);
619 cftfsub(m, a, ip, nw, w);
620 rftfsub(m, a, nc, w + nw);
622 cftfsub(m, a, ip, nw, w);
624 a[n - 1] = a[1] - a[0];
626 for (j = m - 2; j >= 2; j -= 2) {
627 a[2 * j + 1] = a[j] - a[j + 1];
628 a[2 * j - 1] = -a[j] - a[j + 1];
633 dstsub(m, t, nc, w + nw);
635 cftfsub(m, t, ip, nw, w);
636 rftfsub(m, t, nc, w + nw);
638 cftfsub(m, t, ip, nw, w);
640 a[n - l] = t[1] - t[0];
643 for (j = 2; j < m; j += 2) {
645 a[k - l] = -t[j] - t[j + 1];
646 a[k + l] = t[j] - t[j + 1];
650 for (j = 1; j < mh; j++) {
652 t[j] = t[m + k] + t[m + j];
653 t[k] = t[m + k] - t[m + j];
664 /* -------- initializing routines -------- */
669 void makewt(int nw, int *ip, double *w)
671 void makeipt(int nw, int *ip);
672 int j, nwh, nw0, nw1;
673 double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
679 delta = atan(1.0) / nwh;
680 wn4r = cos(delta * nwh);
684 w[2] = cos(delta * 2);
685 w[3] = sin(delta * 2);
686 } else if (nwh > 4) {
688 w[2] = 0.5 / cos(delta * 2);
689 w[3] = 0.5 / cos(delta * 6);
690 for (j = 4; j < nwh; j += 4) {
691 w[j] = cos(delta * j);
692 w[j + 1] = sin(delta * j);
693 w[j + 2] = cos(3 * delta * j);
694 w[j + 3] = -sin(3 * delta * j);
708 } else if (nwh > 4) {
711 w[nw1 + 2] = 0.5 / wk1r;
712 w[nw1 + 3] = 0.5 / wk3r;
713 for (j = 4; j < nwh; j += 4) {
714 wk1r = w[nw0 + 2 * j];
715 wk1i = w[nw0 + 2 * j + 1];
716 wk3r = w[nw0 + 2 * j + 2];
717 wk3i = w[nw0 + 2 * j + 3];
719 w[nw1 + j + 1] = wk1i;
720 w[nw1 + j + 2] = wk3r;
721 w[nw1 + j + 3] = wk3i;
730 void makeipt(int nw, int *ip)
732 int j, l, m, m2, p, q;
737 for (l = nw; l > 32; l >>= 2) {
740 for (j = m; j < m2; j++) {
750 void makect(int nc, int *ip, double *c)
758 delta = atan(1.0) / nch;
759 c[0] = cos(delta * nch);
761 for (j = 1; j < nch; j++) {
762 c[j] = 0.5 * cos(delta * j);
763 c[nc - j] = 0.5 * sin(delta * j);
769 /* -------- child routines -------- */
772 #ifdef USE_CDFT_PTHREADS
773 #define USE_CDFT_THREADS
774 #ifndef CDFT_THREADS_BEGIN_N
775 #define CDFT_THREADS_BEGIN_N 8192
777 #ifndef CDFT_4THREADS_BEGIN_N
778 #define CDFT_4THREADS_BEGIN_N 65536
781 #define cdft_thread_t pthread_t
782 #define cdft_thread_create(thp,func,argp) { \
783 if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
784 fprintf(stderr, "cdft thread error\n"); \
788 #define cdft_thread_wait(th) { \
789 if (pthread_join(th, NULL) != 0) { \
790 fprintf(stderr, "cdft thread error\n"); \
794 #endif /* USE_CDFT_PTHREADS */
797 #ifdef USE_CDFT_WINTHREADS
798 #define USE_CDFT_THREADS
799 #ifndef CDFT_THREADS_BEGIN_N
800 #define CDFT_THREADS_BEGIN_N 32768
802 #ifndef CDFT_4THREADS_BEGIN_N
803 #define CDFT_4THREADS_BEGIN_N 524288
806 #define cdft_thread_t HANDLE
807 #define cdft_thread_create(thp,func,argp) { \
809 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
811 fprintf(stderr, "cdft thread error\n"); \
815 #define cdft_thread_wait(th) { \
816 WaitForSingleObject(th, INFINITE); \
819 #endif /* USE_CDFT_WINTHREADS */
822 void cftfsub(int n, double *a, int *ip, int nw, double *w)
824 void bitrv2(int n, int *ip, double *a);
825 void bitrv216(double *a);
826 void bitrv208(double *a);
827 void cftf1st(int n, double *a, double *w);
828 void cftrec4(int n, double *a, int nw, double *w);
829 void cftleaf(int n, int isplt, double *a, int nw, double *w);
830 void cftfx41(int n, double *a, int nw, double *w);
831 void cftf161(double *a, double *w);
832 void cftf081(double *a, double *w);
833 void cftf040(double *a);
834 void cftx020(double *a);
835 #ifdef USE_CDFT_THREADS
836 void cftrec4_th(int n, double *a, int nw, double *w);
837 #endif /* USE_CDFT_THREADS */
841 cftf1st(n, a, &w[nw - (n >> 2)]);
842 #ifdef USE_CDFT_THREADS
843 if (n > CDFT_THREADS_BEGIN_N) {
844 cftrec4_th(n, a, nw, w);
846 #endif /* USE_CDFT_THREADS */
848 cftrec4(n, a, nw, w);
849 } else if (n > 128) {
850 cftleaf(n, 1, a, nw, w);
852 cftfx41(n, a, nw, w);
855 } else if (n == 32) {
856 cftf161(a, &w[nw - 8]);
870 void cftbsub(int n, double *a, int *ip, int nw, double *w)
872 void bitrv2conj(int n, int *ip, double *a);
873 void bitrv216neg(double *a);
874 void bitrv208neg(double *a);
875 void cftb1st(int n, double *a, double *w);
876 void cftrec4(int n, double *a, int nw, double *w);
877 void cftleaf(int n, int isplt, double *a, int nw, double *w);
878 void cftfx41(int n, double *a, int nw, double *w);
879 void cftf161(double *a, double *w);
880 void cftf081(double *a, double *w);
881 void cftb040(double *a);
882 void cftx020(double *a);
883 #ifdef USE_CDFT_THREADS
884 void cftrec4_th(int n, double *a, int nw, double *w);
885 #endif /* USE_CDFT_THREADS */
889 cftb1st(n, a, &w[nw - (n >> 2)]);
890 #ifdef USE_CDFT_THREADS
891 if (n > CDFT_THREADS_BEGIN_N) {
892 cftrec4_th(n, a, nw, w);
894 #endif /* USE_CDFT_THREADS */
896 cftrec4(n, a, nw, w);
897 } else if (n > 128) {
898 cftleaf(n, 1, a, nw, w);
900 cftfx41(n, a, nw, w);
902 bitrv2conj(n, ip, a);
903 } else if (n == 32) {
904 cftf161(a, &w[nw - 8]);
918 void bitrv2(int n, int *ip, double *a)
920 int j, j1, k, k1, l, m, nh, nm;
921 double xr, xi, yr, yi;
924 for (l = n >> 2; l > 8; l >>= 2) {
930 for (k = 0; k < m; k++) {
931 for (j = 0; j < k; j++) {
932 j1 = 4 * j + 2 * ip[m + k];
933 k1 = 4 * k + 2 * ip[m + j];
1093 k1 = 4 * k + 2 * ip[m + k];
1156 for (k = 0; k < m; k++) {
1157 for (j = 0; j < k; j++) {
1158 j1 = 4 * j + ip[m + k];
1159 k1 = 4 * k + ip[m + j];
1239 k1 = 4 * k + ip[m + k];
1265 void bitrv2conj(int n, int *ip, double *a)
1267 int j, j1, k, k1, l, m, nh, nm;
1268 double xr, xi, yr, yi;
1271 for (l = n >> 2; l > 8; l >>= 2) {
1277 for (k = 0; k < m; k++) {
1278 for (j = 0; j < k; j++) {
1279 j1 = 4 * j + 2 * ip[m + k];
1280 k1 = 4 * k + 2 * ip[m + j];
1440 k1 = 4 * k + 2 * ip[m + k];
1443 a[j1 - 1] = -a[j1 - 1];
1452 a[k1 + 3] = -a[k1 + 3];
1495 a[j1 - 1] = -a[j1 - 1];
1504 a[k1 + 3] = -a[k1 + 3];
1507 for (k = 0; k < m; k++) {
1508 for (j = 0; j < k; j++) {
1509 j1 = 4 * j + ip[m + k];
1510 k1 = 4 * k + ip[m + j];
1590 k1 = 4 * k + ip[m + k];
1593 a[j1 - 1] = -a[j1 - 1];
1602 a[k1 + 3] = -a[k1 + 3];
1605 a[j1 - 1] = -a[j1 - 1];
1614 a[k1 + 3] = -a[k1 + 3];
1620 void bitrv216(double *a)
1622 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1623 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
1624 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1677 void bitrv216neg(double *a)
1679 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1680 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
1681 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
1682 x13r, x13i, x14r, x14i, x15r, x15i;
1747 void bitrv208(double *a)
1749 double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1770 void bitrv208neg(double *a)
1772 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1773 x5r, x5i, x6r, x6i, x7r, x7i;
1806 void cftf1st(int n, double *a, double *w)
1808 int j, j0, j1, j2, j3, k, m, mh;
1809 double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1810 wd1r, wd1i, wd3r, wd3i;
1811 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1812 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1820 x0i = a[1] + a[j2 + 1];
1822 x1i = a[1] - a[j2 + 1];
1823 x2r = a[j1] + a[j3];
1824 x2i = a[j1 + 1] + a[j3 + 1];
1825 x3r = a[j1] - a[j3];
1826 x3i = a[j1 + 1] - a[j3 + 1];
1830 a[j1 + 1] = x0i - x2i;
1832 a[j2 + 1] = x1i + x3r;
1834 a[j3 + 1] = x1i - x3r;
1843 for (j = 2; j < mh - 2; j += 4) {
1845 wk1r = csc1 * (wd1r + w[k]);
1846 wk1i = csc1 * (wd1i + w[k + 1]);
1847 wk3r = csc3 * (wd3r + w[k + 2]);
1848 wk3i = csc3 * (wd3i + w[k + 3]);
1857 x0i = a[j + 1] + a[j2 + 1];
1859 x1i = a[j + 1] - a[j2 + 1];
1860 y0r = a[j + 2] + a[j2 + 2];
1861 y0i = a[j + 3] + a[j2 + 3];
1862 y1r = a[j + 2] - a[j2 + 2];
1863 y1i = a[j + 3] - a[j2 + 3];
1864 x2r = a[j1] + a[j3];
1865 x2i = a[j1 + 1] + a[j3 + 1];
1866 x3r = a[j1] - a[j3];
1867 x3i = a[j1 + 1] - a[j3 + 1];
1868 y2r = a[j1 + 2] + a[j3 + 2];
1869 y2i = a[j1 + 3] + a[j3 + 3];
1870 y3r = a[j1 + 2] - a[j3 + 2];
1871 y3i = a[j1 + 3] - a[j3 + 3];
1873 a[j + 1] = x0i + x2i;
1874 a[j + 2] = y0r + y2r;
1875 a[j + 3] = y0i + y2i;
1877 a[j1 + 1] = x0i - x2i;
1878 a[j1 + 2] = y0r - y2r;
1879 a[j1 + 3] = y0i - y2i;
1882 a[j2] = wk1r * x0r - wk1i * x0i;
1883 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1886 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1887 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1890 a[j3] = wk3r * x0r + wk3i * x0i;
1891 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1894 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1895 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1900 x0r = a[j0] + a[j2];
1901 x0i = a[j0 + 1] + a[j2 + 1];
1902 x1r = a[j0] - a[j2];
1903 x1i = a[j0 + 1] - a[j2 + 1];
1904 y0r = a[j0 - 2] + a[j2 - 2];
1905 y0i = a[j0 - 1] + a[j2 - 1];
1906 y1r = a[j0 - 2] - a[j2 - 2];
1907 y1i = a[j0 - 1] - a[j2 - 1];
1908 x2r = a[j1] + a[j3];
1909 x2i = a[j1 + 1] + a[j3 + 1];
1910 x3r = a[j1] - a[j3];
1911 x3i = a[j1 + 1] - a[j3 + 1];
1912 y2r = a[j1 - 2] + a[j3 - 2];
1913 y2i = a[j1 - 1] + a[j3 - 1];
1914 y3r = a[j1 - 2] - a[j3 - 2];
1915 y3i = a[j1 - 1] - a[j3 - 1];
1917 a[j0 + 1] = x0i + x2i;
1918 a[j0 - 2] = y0r + y2r;
1919 a[j0 - 1] = y0i + y2i;
1921 a[j1 + 1] = x0i - x2i;
1922 a[j1 - 2] = y0r - y2r;
1923 a[j1 - 1] = y0i - y2i;
1926 a[j2] = wk1i * x0r - wk1r * x0i;
1927 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1930 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1931 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1934 a[j3] = wk3i * x0r + wk3r * x0i;
1935 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1938 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1939 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1941 wk1r = csc1 * (wd1r + wn4r);
1942 wk1i = csc1 * (wd1i + wn4r);
1943 wk3r = csc3 * (wd3r - wn4r);
1944 wk3i = csc3 * (wd3i - wn4r);
1949 x0r = a[j0 - 2] + a[j2 - 2];
1950 x0i = a[j0 - 1] + a[j2 - 1];
1951 x1r = a[j0 - 2] - a[j2 - 2];
1952 x1i = a[j0 - 1] - a[j2 - 1];
1953 x2r = a[j1 - 2] + a[j3 - 2];
1954 x2i = a[j1 - 1] + a[j3 - 1];
1955 x3r = a[j1 - 2] - a[j3 - 2];
1956 x3i = a[j1 - 1] - a[j3 - 1];
1957 a[j0 - 2] = x0r + x2r;
1958 a[j0 - 1] = x0i + x2i;
1959 a[j1 - 2] = x0r - x2r;
1960 a[j1 - 1] = x0i - x2i;
1963 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1964 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1967 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1968 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1969 x0r = a[j0] + a[j2];
1970 x0i = a[j0 + 1] + a[j2 + 1];
1971 x1r = a[j0] - a[j2];
1972 x1i = a[j0 + 1] - a[j2 + 1];
1973 x2r = a[j1] + a[j3];
1974 x2i = a[j1 + 1] + a[j3 + 1];
1975 x3r = a[j1] - a[j3];
1976 x3i = a[j1 + 1] - a[j3 + 1];
1978 a[j0 + 1] = x0i + x2i;
1980 a[j1 + 1] = x0i - x2i;
1983 a[j2] = wn4r * (x0r - x0i);
1984 a[j2 + 1] = wn4r * (x0i + x0r);
1987 a[j3] = -wn4r * (x0r + x0i);
1988 a[j3 + 1] = -wn4r * (x0i - x0r);
1989 x0r = a[j0 + 2] + a[j2 + 2];
1990 x0i = a[j0 + 3] + a[j2 + 3];
1991 x1r = a[j0 + 2] - a[j2 + 2];
1992 x1i = a[j0 + 3] - a[j2 + 3];
1993 x2r = a[j1 + 2] + a[j3 + 2];
1994 x2i = a[j1 + 3] + a[j3 + 3];
1995 x3r = a[j1 + 2] - a[j3 + 2];
1996 x3i = a[j1 + 3] - a[j3 + 3];
1997 a[j0 + 2] = x0r + x2r;
1998 a[j0 + 3] = x0i + x2i;
1999 a[j1 + 2] = x0r - x2r;
2000 a[j1 + 3] = x0i - x2i;
2003 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2004 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2007 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2008 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2012 void cftb1st(int n, double *a, double *w)
2014 int j, j0, j1, j2, j3, k, m, mh;
2015 double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
2016 wd1r, wd1i, wd3r, wd3i;
2017 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2018 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
2026 x0i = -a[1] - a[j2 + 1];
2028 x1i = -a[1] + a[j2 + 1];
2029 x2r = a[j1] + a[j3];
2030 x2i = a[j1 + 1] + a[j3 + 1];
2031 x3r = a[j1] - a[j3];
2032 x3i = a[j1 + 1] - a[j3 + 1];
2036 a[j1 + 1] = x0i + x2i;
2038 a[j2 + 1] = x1i + x3r;
2040 a[j3 + 1] = x1i - x3r;
2049 for (j = 2; j < mh - 2; j += 4) {
2051 wk1r = csc1 * (wd1r + w[k]);
2052 wk1i = csc1 * (wd1i + w[k + 1]);
2053 wk3r = csc3 * (wd3r + w[k + 2]);
2054 wk3i = csc3 * (wd3i + w[k + 3]);
2063 x0i = -a[j + 1] - a[j2 + 1];
2065 x1i = -a[j + 1] + a[j2 + 1];
2066 y0r = a[j + 2] + a[j2 + 2];
2067 y0i = -a[j + 3] - a[j2 + 3];
2068 y1r = a[j + 2] - a[j2 + 2];
2069 y1i = -a[j + 3] + a[j2 + 3];
2070 x2r = a[j1] + a[j3];
2071 x2i = a[j1 + 1] + a[j3 + 1];
2072 x3r = a[j1] - a[j3];
2073 x3i = a[j1 + 1] - a[j3 + 1];
2074 y2r = a[j1 + 2] + a[j3 + 2];
2075 y2i = a[j1 + 3] + a[j3 + 3];
2076 y3r = a[j1 + 2] - a[j3 + 2];
2077 y3i = a[j1 + 3] - a[j3 + 3];
2079 a[j + 1] = x0i - x2i;
2080 a[j + 2] = y0r + y2r;
2081 a[j + 3] = y0i - y2i;
2083 a[j1 + 1] = x0i + x2i;
2084 a[j1 + 2] = y0r - y2r;
2085 a[j1 + 3] = y0i + y2i;
2088 a[j2] = wk1r * x0r - wk1i * x0i;
2089 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2092 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2093 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2096 a[j3] = wk3r * x0r + wk3i * x0i;
2097 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2100 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2101 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2106 x0r = a[j0] + a[j2];
2107 x0i = -a[j0 + 1] - a[j2 + 1];
2108 x1r = a[j0] - a[j2];
2109 x1i = -a[j0 + 1] + a[j2 + 1];
2110 y0r = a[j0 - 2] + a[j2 - 2];
2111 y0i = -a[j0 - 1] - a[j2 - 1];
2112 y1r = a[j0 - 2] - a[j2 - 2];
2113 y1i = -a[j0 - 1] + a[j2 - 1];
2114 x2r = a[j1] + a[j3];
2115 x2i = a[j1 + 1] + a[j3 + 1];
2116 x3r = a[j1] - a[j3];
2117 x3i = a[j1 + 1] - a[j3 + 1];
2118 y2r = a[j1 - 2] + a[j3 - 2];
2119 y2i = a[j1 - 1] + a[j3 - 1];
2120 y3r = a[j1 - 2] - a[j3 - 2];
2121 y3i = a[j1 - 1] - a[j3 - 1];
2123 a[j0 + 1] = x0i - x2i;
2124 a[j0 - 2] = y0r + y2r;
2125 a[j0 - 1] = y0i - y2i;
2127 a[j1 + 1] = x0i + x2i;
2128 a[j1 - 2] = y0r - y2r;
2129 a[j1 - 1] = y0i + y2i;
2132 a[j2] = wk1i * x0r - wk1r * x0i;
2133 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2136 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2137 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2140 a[j3] = wk3i * x0r + wk3r * x0i;
2141 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2144 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2145 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2147 wk1r = csc1 * (wd1r + wn4r);
2148 wk1i = csc1 * (wd1i + wn4r);
2149 wk3r = csc3 * (wd3r - wn4r);
2150 wk3i = csc3 * (wd3i - wn4r);
2155 x0r = a[j0 - 2] + a[j2 - 2];
2156 x0i = -a[j0 - 1] - a[j2 - 1];
2157 x1r = a[j0 - 2] - a[j2 - 2];
2158 x1i = -a[j0 - 1] + a[j2 - 1];
2159 x2r = a[j1 - 2] + a[j3 - 2];
2160 x2i = a[j1 - 1] + a[j3 - 1];
2161 x3r = a[j1 - 2] - a[j3 - 2];
2162 x3i = a[j1 - 1] - a[j3 - 1];
2163 a[j0 - 2] = x0r + x2r;
2164 a[j0 - 1] = x0i - x2i;
2165 a[j1 - 2] = x0r - x2r;
2166 a[j1 - 1] = x0i + x2i;
2169 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2170 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2173 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2174 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2175 x0r = a[j0] + a[j2];
2176 x0i = -a[j0 + 1] - a[j2 + 1];
2177 x1r = a[j0] - a[j2];
2178 x1i = -a[j0 + 1] + a[j2 + 1];
2179 x2r = a[j1] + a[j3];
2180 x2i = a[j1 + 1] + a[j3 + 1];
2181 x3r = a[j1] - a[j3];
2182 x3i = a[j1 + 1] - a[j3 + 1];
2184 a[j0 + 1] = x0i - x2i;
2186 a[j1 + 1] = x0i + x2i;
2189 a[j2] = wn4r * (x0r - x0i);
2190 a[j2 + 1] = wn4r * (x0i + x0r);
2193 a[j3] = -wn4r * (x0r + x0i);
2194 a[j3 + 1] = -wn4r * (x0i - x0r);
2195 x0r = a[j0 + 2] + a[j2 + 2];
2196 x0i = -a[j0 + 3] - a[j2 + 3];
2197 x1r = a[j0 + 2] - a[j2 + 2];
2198 x1i = -a[j0 + 3] + a[j2 + 3];
2199 x2r = a[j1 + 2] + a[j3 + 2];
2200 x2i = a[j1 + 3] + a[j3 + 3];
2201 x3r = a[j1 + 2] - a[j3 + 2];
2202 x3i = a[j1 + 3] - a[j3 + 3];
2203 a[j0 + 2] = x0r + x2r;
2204 a[j0 + 3] = x0i - x2i;
2205 a[j1 + 2] = x0r - x2r;
2206 a[j1 + 3] = x0i + x2i;
2209 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2210 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2213 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2214 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2218 #ifdef USE_CDFT_THREADS
2219 struct cdft_arg_st {
2226 typedef struct cdft_arg_st cdft_arg_t;
2229 void cftrec4_th(int n, double *a, int nw, double *w)
2231 void *cftrec1_th(void *p);
2232 void *cftrec2_th(void *p);
2233 int i, idiv4, m, nthread;
2234 cdft_thread_t th[4];
2240 if (n > CDFT_4THREADS_BEGIN_N) {
2245 for (i = 0; i < nthread; i++) {
2248 ag[i].a = &a[i * m];
2252 cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2254 cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2257 for (i = 0; i < nthread; i++) {
2258 cdft_thread_wait(th[i]);
2263 void *cftrec1_th(void *p)
2265 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2266 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2267 void cftmdl1(int n, double *a, double *w);
2268 int isplt, j, k, m, n, n0, nw;
2271 n0 = ((cdft_arg_t *) p)->n0;
2272 n = ((cdft_arg_t *) p)->n;
2273 a = ((cdft_arg_t *) p)->a;
2274 nw = ((cdft_arg_t *) p)->nw;
2275 w = ((cdft_arg_t *) p)->w;
2279 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2281 cftleaf(m, 1, &a[n - m], nw, w);
2283 for (j = n - m; j > 0; j -= m) {
2285 isplt = cfttree(m, j, k, a, nw, w);
2286 cftleaf(m, isplt, &a[j - m], nw, w);
2292 void *cftrec2_th(void *p)
2294 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2295 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2296 void cftmdl2(int n, double *a, double *w);
2297 int isplt, j, k, m, n, n0, nw;
2300 n0 = ((cdft_arg_t *) p)->n0;
2301 n = ((cdft_arg_t *) p)->n;
2302 a = ((cdft_arg_t *) p)->a;
2303 nw = ((cdft_arg_t *) p)->nw;
2304 w = ((cdft_arg_t *) p)->w;
2310 cftmdl2(m, &a[n - m], &w[nw - m]);
2312 cftleaf(m, 0, &a[n - m], nw, w);
2314 for (j = n - m; j > 0; j -= m) {
2316 isplt = cfttree(m, j, k, a, nw, w);
2317 cftleaf(m, isplt, &a[j - m], nw, w);
2321 #endif /* USE_CDFT_THREADS */
2324 void cftrec4(int n, double *a, int nw, double *w)
2326 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2327 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2328 void cftmdl1(int n, double *a, double *w);
2334 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2336 cftleaf(m, 1, &a[n - m], nw, w);
2338 for (j = n - m; j > 0; j -= m) {
2340 isplt = cfttree(m, j, k, a, nw, w);
2341 cftleaf(m, isplt, &a[j - m], nw, w);
2346 int cfttree(int n, int j, int k, double *a, int nw, double *w)
2348 void cftmdl1(int n, double *a, double *w);
2349 void cftmdl2(int n, double *a, double *w);
2355 cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2357 cftmdl2(n, &a[j - n], &w[nw - n]);
2361 for (i = k; (i & 3) == 0; i >>= 2) {
2367 cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2372 cftmdl2(m, &a[j - m], &w[nw - m]);
2381 void cftleaf(int n, int isplt, double *a, int nw, double *w)
2383 void cftmdl1(int n, double *a, double *w);
2384 void cftmdl2(int n, double *a, double *w);
2385 void cftf161(double *a, double *w);
2386 void cftf162(double *a, double *w);
2387 void cftf081(double *a, double *w);
2388 void cftf082(double *a, double *w);
2391 cftmdl1(128, a, &w[nw - 64]);
2392 cftf161(a, &w[nw - 8]);
2393 cftf162(&a[32], &w[nw - 32]);
2394 cftf161(&a[64], &w[nw - 8]);
2395 cftf161(&a[96], &w[nw - 8]);
2396 cftmdl2(128, &a[128], &w[nw - 128]);
2397 cftf161(&a[128], &w[nw - 8]);
2398 cftf162(&a[160], &w[nw - 32]);
2399 cftf161(&a[192], &w[nw - 8]);
2400 cftf162(&a[224], &w[nw - 32]);
2401 cftmdl1(128, &a[256], &w[nw - 64]);
2402 cftf161(&a[256], &w[nw - 8]);
2403 cftf162(&a[288], &w[nw - 32]);
2404 cftf161(&a[320], &w[nw - 8]);
2405 cftf161(&a[352], &w[nw - 8]);
2407 cftmdl1(128, &a[384], &w[nw - 64]);
2408 cftf161(&a[480], &w[nw - 8]);
2410 cftmdl2(128, &a[384], &w[nw - 128]);
2411 cftf162(&a[480], &w[nw - 32]);
2413 cftf161(&a[384], &w[nw - 8]);
2414 cftf162(&a[416], &w[nw - 32]);
2415 cftf161(&a[448], &w[nw - 8]);
2417 cftmdl1(64, a, &w[nw - 32]);
2418 cftf081(a, &w[nw - 8]);
2419 cftf082(&a[16], &w[nw - 8]);
2420 cftf081(&a[32], &w[nw - 8]);
2421 cftf081(&a[48], &w[nw - 8]);
2422 cftmdl2(64, &a[64], &w[nw - 64]);
2423 cftf081(&a[64], &w[nw - 8]);
2424 cftf082(&a[80], &w[nw - 8]);
2425 cftf081(&a[96], &w[nw - 8]);
2426 cftf082(&a[112], &w[nw - 8]);
2427 cftmdl1(64, &a[128], &w[nw - 32]);
2428 cftf081(&a[128], &w[nw - 8]);
2429 cftf082(&a[144], &w[nw - 8]);
2430 cftf081(&a[160], &w[nw - 8]);
2431 cftf081(&a[176], &w[nw - 8]);
2433 cftmdl1(64, &a[192], &w[nw - 32]);
2434 cftf081(&a[240], &w[nw - 8]);
2436 cftmdl2(64, &a[192], &w[nw - 64]);
2437 cftf082(&a[240], &w[nw - 8]);
2439 cftf081(&a[192], &w[nw - 8]);
2440 cftf082(&a[208], &w[nw - 8]);
2441 cftf081(&a[224], &w[nw - 8]);
2446 void cftmdl1(int n, double *a, double *w)
2448 int j, j0, j1, j2, j3, k, m, mh;
2449 double wn4r, wk1r, wk1i, wk3r, wk3i;
2450 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2458 x0i = a[1] + a[j2 + 1];
2460 x1i = a[1] - a[j2 + 1];
2461 x2r = a[j1] + a[j3];
2462 x2i = a[j1 + 1] + a[j3 + 1];
2463 x3r = a[j1] - a[j3];
2464 x3i = a[j1 + 1] - a[j3 + 1];
2468 a[j1 + 1] = x0i - x2i;
2470 a[j2 + 1] = x1i + x3r;
2472 a[j3 + 1] = x1i - x3r;
2475 for (j = 2; j < mh; j += 2) {
2485 x0i = a[j + 1] + a[j2 + 1];
2487 x1i = a[j + 1] - a[j2 + 1];
2488 x2r = a[j1] + a[j3];
2489 x2i = a[j1 + 1] + a[j3 + 1];
2490 x3r = a[j1] - a[j3];
2491 x3i = a[j1 + 1] - a[j3 + 1];
2493 a[j + 1] = x0i + x2i;
2495 a[j1 + 1] = x0i - x2i;
2498 a[j2] = wk1r * x0r - wk1i * x0i;
2499 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2502 a[j3] = wk3r * x0r + wk3i * x0i;
2503 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2508 x0r = a[j0] + a[j2];
2509 x0i = a[j0 + 1] + a[j2 + 1];
2510 x1r = a[j0] - a[j2];
2511 x1i = a[j0 + 1] - a[j2 + 1];
2512 x2r = a[j1] + a[j3];
2513 x2i = a[j1 + 1] + a[j3 + 1];
2514 x3r = a[j1] - a[j3];
2515 x3i = a[j1 + 1] - a[j3 + 1];
2517 a[j0 + 1] = x0i + x2i;
2519 a[j1 + 1] = x0i - x2i;
2522 a[j2] = wk1i * x0r - wk1r * x0i;
2523 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2526 a[j3] = wk3i * x0r + wk3r * x0i;
2527 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2533 x0r = a[j0] + a[j2];
2534 x0i = a[j0 + 1] + a[j2 + 1];
2535 x1r = a[j0] - a[j2];
2536 x1i = a[j0 + 1] - a[j2 + 1];
2537 x2r = a[j1] + a[j3];
2538 x2i = a[j1 + 1] + a[j3 + 1];
2539 x3r = a[j1] - a[j3];
2540 x3i = a[j1 + 1] - a[j3 + 1];
2542 a[j0 + 1] = x0i + x2i;
2544 a[j1 + 1] = x0i - x2i;
2547 a[j2] = wn4r * (x0r - x0i);
2548 a[j2 + 1] = wn4r * (x0i + x0r);
2551 a[j3] = -wn4r * (x0r + x0i);
2552 a[j3 + 1] = -wn4r * (x0i - x0r);
2556 void cftmdl2(int n, double *a, double *w)
2558 int j, j0, j1, j2, j3, k, kr, m, mh;
2559 double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2560 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2568 x0r = a[0] - a[j2 + 1];
2570 x1r = a[0] + a[j2 + 1];
2572 x2r = a[j1] - a[j3 + 1];
2573 x2i = a[j1 + 1] + a[j3];
2574 x3r = a[j1] + a[j3 + 1];
2575 x3i = a[j1 + 1] - a[j3];
2576 y0r = wn4r * (x2r - x2i);
2577 y0i = wn4r * (x2i + x2r);
2581 a[j1 + 1] = x0i - y0i;
2582 y0r = wn4r * (x3r - x3i);
2583 y0i = wn4r * (x3i + x3r);
2585 a[j2 + 1] = x1i + y0r;
2587 a[j3 + 1] = x1i - y0r;
2590 for (j = 2; j < mh; j += 2) {
2604 x0r = a[j] - a[j2 + 1];
2605 x0i = a[j + 1] + a[j2];
2606 x1r = a[j] + a[j2 + 1];
2607 x1i = a[j + 1] - a[j2];
2608 x2r = a[j1] - a[j3 + 1];
2609 x2i = a[j1 + 1] + a[j3];
2610 x3r = a[j1] + a[j3 + 1];
2611 x3i = a[j1 + 1] - a[j3];
2612 y0r = wk1r * x0r - wk1i * x0i;
2613 y0i = wk1r * x0i + wk1i * x0r;
2614 y2r = wd1r * x2r - wd1i * x2i;
2615 y2i = wd1r * x2i + wd1i * x2r;
2617 a[j + 1] = y0i + y2i;
2619 a[j1 + 1] = y0i - y2i;
2620 y0r = wk3r * x1r + wk3i * x1i;
2621 y0i = wk3r * x1i - wk3i * x1r;
2622 y2r = wd3r * x3r + wd3i * x3i;
2623 y2i = wd3r * x3i - wd3i * x3r;
2625 a[j2 + 1] = y0i + y2i;
2627 a[j3 + 1] = y0i - y2i;
2632 x0r = a[j0] - a[j2 + 1];
2633 x0i = a[j0 + 1] + a[j2];
2634 x1r = a[j0] + a[j2 + 1];
2635 x1i = a[j0 + 1] - a[j2];
2636 x2r = a[j1] - a[j3 + 1];
2637 x2i = a[j1 + 1] + a[j3];
2638 x3r = a[j1] + a[j3 + 1];
2639 x3i = a[j1 + 1] - a[j3];
2640 y0r = wd1i * x0r - wd1r * x0i;
2641 y0i = wd1i * x0i + wd1r * x0r;
2642 y2r = wk1i * x2r - wk1r * x2i;
2643 y2i = wk1i * x2i + wk1r * x2r;
2645 a[j0 + 1] = y0i + y2i;
2647 a[j1 + 1] = y0i - y2i;
2648 y0r = wd3i * x1r + wd3r * x1i;
2649 y0i = wd3i * x1i - wd3r * x1r;
2650 y2r = wk3i * x3r + wk3r * x3i;
2651 y2i = wk3i * x3i - wk3r * x3r;
2653 a[j2 + 1] = y0i + y2i;
2655 a[j3 + 1] = y0i - y2i;
2663 x0r = a[j0] - a[j2 + 1];
2664 x0i = a[j0 + 1] + a[j2];
2665 x1r = a[j0] + a[j2 + 1];
2666 x1i = a[j0 + 1] - a[j2];
2667 x2r = a[j1] - a[j3 + 1];
2668 x2i = a[j1 + 1] + a[j3];
2669 x3r = a[j1] + a[j3 + 1];
2670 x3i = a[j1 + 1] - a[j3];
2671 y0r = wk1r * x0r - wk1i * x0i;
2672 y0i = wk1r * x0i + wk1i * x0r;
2673 y2r = wk1i * x2r - wk1r * x2i;
2674 y2i = wk1i * x2i + wk1r * x2r;
2676 a[j0 + 1] = y0i + y2i;
2678 a[j1 + 1] = y0i - y2i;
2679 y0r = wk1i * x1r - wk1r * x1i;
2680 y0i = wk1i * x1i + wk1r * x1r;
2681 y2r = wk1r * x3r - wk1i * x3i;
2682 y2i = wk1r * x3i + wk1i * x3r;
2684 a[j2 + 1] = y0i - y2i;
2686 a[j3 + 1] = y0i + y2i;
2690 void cftfx41(int n, double *a, int nw, double *w)
2692 void cftf161(double *a, double *w);
2693 void cftf162(double *a, double *w);
2694 void cftf081(double *a, double *w);
2695 void cftf082(double *a, double *w);
2698 cftf161(a, &w[nw - 8]);
2699 cftf162(&a[32], &w[nw - 32]);
2700 cftf161(&a[64], &w[nw - 8]);
2701 cftf161(&a[96], &w[nw - 8]);
2703 cftf081(a, &w[nw - 8]);
2704 cftf082(&a[16], &w[nw - 8]);
2705 cftf081(&a[32], &w[nw - 8]);
2706 cftf081(&a[48], &w[nw - 8]);
2711 void cftf161(double *a, double *w)
2713 double wn4r, wk1r, wk1i,
2714 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2715 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2716 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2717 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2718 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2743 x2r = a[10] + a[26];
2744 x2i = a[11] + a[27];
2745 x3r = a[10] - a[26];
2746 x3i = a[11] - a[27];
2753 y9r = wk1r * x0r - wk1i * x0i;
2754 y9i = wk1r * x0i + wk1i * x0r;
2757 y13r = wk1i * x0r - wk1r * x0i;
2758 y13i = wk1i * x0i + wk1r * x0r;
2763 x2r = a[12] + a[28];
2764 x2i = a[13] + a[29];
2765 x3r = a[12] - a[28];
2766 x3i = a[13] - a[29];
2773 y10r = wn4r * (x0r - x0i);
2774 y10i = wn4r * (x0i + x0r);
2777 y14r = wn4r * (x0r + x0i);
2778 y14i = wn4r * (x0i - x0r);
2783 x2r = a[14] + a[30];
2784 x2i = a[15] + a[31];
2785 x3r = a[14] - a[30];
2786 x3i = a[15] - a[31];
2793 y11r = wk1i * x0r - wk1r * x0i;
2794 y11i = wk1i * x0i + wk1r * x0r;
2797 y15r = wk1r * x0r - wk1i * x0i;
2798 y15i = wk1r * x0i + wk1i * x0r;
2833 x2r = wn4r * (x0r - x0i);
2834 x2i = wn4r * (x0i + x0r);
2837 x3r = wn4r * (x0r - x0i);
2838 x3i = wn4r * (x0i + x0r);
2870 void cftf162(double *a, double *w)
2872 double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
2873 x0r, x0i, x1r, x1i, x2r, x2i,
2874 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2875 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2876 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2877 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2890 x2r = wn4r * (x0r - x0i);
2891 x2i = wn4r * (x0i + x0r);
2900 x2r = wn4r * (x0r - x0i);
2901 x2i = wn4r * (x0i + x0r);
2908 x1r = wk1r * x0r - wk1i * x0i;
2909 x1i = wk1r * x0i + wk1i * x0r;
2910 x0r = a[10] - a[27];
2911 x0i = a[11] + a[26];
2912 x2r = wk3i * x0r - wk3r * x0i;
2913 x2i = wk3i * x0i + wk3r * x0r;
2920 x1r = wk3r * x0r - wk3i * x0i;
2921 x1i = wk3r * x0i + wk3i * x0r;
2922 x0r = a[10] + a[27];
2923 x0i = a[11] - a[26];
2924 x2r = wk1r * x0r + wk1i * x0i;
2925 x2i = wk1r * x0i - wk1i * x0r;
2932 x1r = wk2r * x0r - wk2i * x0i;
2933 x1i = wk2r * x0i + wk2i * x0r;
2934 x0r = a[12] - a[29];
2935 x0i = a[13] + a[28];
2936 x2r = wk2i * x0r - wk2r * x0i;
2937 x2i = wk2i * x0i + wk2r * x0r;
2944 x1r = wk2i * x0r - wk2r * x0i;
2945 x1i = wk2i * x0i + wk2r * x0r;
2946 x0r = a[12] + a[29];
2947 x0i = a[13] - a[28];
2948 x2r = wk2r * x0r - wk2i * x0i;
2949 x2i = wk2r * x0i + wk2i * x0r;
2956 x1r = wk3r * x0r - wk3i * x0i;
2957 x1i = wk3r * x0i + wk3i * x0r;
2958 x0r = a[14] - a[31];
2959 x0i = a[15] + a[30];
2960 x2r = wk1i * x0r - wk1r * x0i;
2961 x2i = wk1i * x0i + wk1r * x0r;
2968 x1r = wk1i * x0r + wk1r * x0i;
2969 x1i = wk1i * x0i - wk1r * x0r;
2970 x0r = a[14] + a[31];
2971 x0i = a[15] - a[30];
2972 x2r = wk3i * x0r - wk3r * x0i;
2973 x2i = wk3i * x0i + wk3r * x0r;
2998 x2r = wn4r * (x0r - x0i);
2999 x2i = wn4r * (x0i + x0r);
3008 x2r = wn4r * (x0r - x0i);
3009 x2i = wn4r * (x0i + x0r);
3034 x2r = wn4r * (x0r - x0i);
3035 x2i = wn4r * (x0i + x0r);
3044 x2r = wn4r * (x0r - x0i);
3045 x2i = wn4r * (x0i + x0r);
3053 void cftf081(double *a, double *w)
3055 double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
3056 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3057 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3092 y5r = wn4r * (x0r - x0i);
3093 y5i = wn4r * (x0r + x0i);
3094 y7r = wn4r * (x2r - x2i);
3095 y7i = wn4r * (x2r + x2i);
3115 void cftf082(double *a, double *w)
3117 double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
3118 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3119 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3130 y2r = wn4r * (x0r - x0i);
3131 y2i = wn4r * (x0i + x0r);
3134 y3r = wn4r * (x0r - x0i);
3135 y3i = wn4r * (x0i + x0r);
3138 y4r = wk1r * x0r - wk1i * x0i;
3139 y4i = wk1r * x0i + wk1i * x0r;
3142 y5r = wk1i * x0r - wk1r * x0i;
3143 y5i = wk1i * x0i + wk1r * x0r;
3146 y6r = wk1i * x0r - wk1r * x0i;
3147 y6i = wk1i * x0i + wk1r * x0r;
3150 y7r = wk1r * x0r - wk1i * x0i;
3151 y7i = wk1r * x0i + wk1i * x0r;
3187 void cftf040(double *a)
3189 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3210 void cftb040(double *a)
3212 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3233 void cftx020(double *a)
3246 void rftfsub(int n, double *a, int nc, double *c)
3248 int j, k, kk, ks, m;
3249 double wkr, wki, xr, xi, yr, yi;
3254 for (j = 2; j < m; j += 2) {
3257 wkr = 0.5 - c[nc - kk];
3260 xi = a[j + 1] + a[k + 1];
3261 yr = wkr * xr - wki * xi;
3262 yi = wkr * xi + wki * xr;
3271 void rftbsub(int n, double *a, int nc, double *c)
3273 int j, k, kk, ks, m;
3274 double wkr, wki, xr, xi, yr, yi;
3279 for (j = 2; j < m; j += 2) {
3282 wkr = 0.5 - c[nc - kk];
3285 xi = a[j + 1] + a[k + 1];
3286 yr = wkr * xr + wki * xi;
3287 yi = wkr * xi - wki * xr;
3296 void dctsub(int n, double *a, int nc, double *c)
3298 int j, k, kk, ks, m;
3299 double wkr, wki, xr;
3304 for (j = 1; j < m; j++) {
3307 wkr = c[kk] - c[nc - kk];
3308 wki = c[kk] + c[nc - kk];
3309 xr = wki * a[j] - wkr * a[k];
3310 a[j] = wkr * a[j] + wki * a[k];
3317 void dstsub(int n, double *a, int nc, double *c)
3319 int j, k, kk, ks, m;
3320 double wkr, wki, xr;
3325 for (j = 1; j < m; j++) {
3328 wkr = c[kk] - c[nc - kk];
3329 wki = c[kk] + c[nc - kk];
3330 xr = wki * a[k] - wkr * a[j];
3331 a[k] = wkr * a[k] + wki * a[j];