1 /*****************************************************************************
3 *****************************************************************************
4 * Copyright (C) 2004 the VideoLAN team
7 * Authors: Cyril Deguet <asmax@videolan.org>
8 * code from projectM http://xmms-projectm.sourceforge.net
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
28 Fast Fourier/Cosine/Sine Transform
30 data length :power of 2
36 cdft: Complex Discrete Fourier Transform
37 rdft: Real Discrete Fourier Transform
38 ddct: Discrete Cosine Transform
39 ddst: Discrete Sine Transform
40 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
41 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
43 void cdft(int, int, double *, int *, double *);
44 void rdft(int, int, double *, int *, double *);
45 void ddct(int, int, double *, int *, double *);
46 void ddst(int, int, double *, int *, double *);
47 void dfct(int, double *, double *, int *, double *);
48 void dfst(int, double *, double *, int *, double *);
50 USE_CDFT_PTHREADS : default=not defined
51 CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
52 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
53 USE_CDFT_WINTHREADS : default=not defined
54 CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
55 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
58 -------- Complex DFT (Discrete Fourier Transform) --------
61 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
63 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
64 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
67 ip[0] = 0; // first time only
68 cdft(2*n, 1, a, ip, w);
70 ip[0] = 0; // first time only
71 cdft(2*n, -1, a, ip, w);
73 2*n :data length (int)
74 n >= 1, n = power of 2
75 a[0...2*n-1] :input/output data (double *)
78 a[2*j+1] = Im(x[j]), 0<=j<n
81 a[2*k+1] = Im(X[k]), 0<=k<n
82 ip[0...*] :work area for bit reversal (int *)
83 length of ip >= 2+sqrt(n)
86 2+(1<<(int)(log(n+0.5)/log(2))/2).
87 ip[0],ip[1] are pointers of the cos/sin table.
88 w[0...n/2-1] :cos/sin table (double *)
89 w[],ip[] are initialized if ip[0] == 0.
92 cdft(2*n, -1, a, ip, w);
94 cdft(2*n, 1, a, ip, w);
95 for (j = 0; j <= 2 * n - 1; j++) {
101 -------- Real DFT / Inverse of Real DFT --------
104 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
105 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
106 <case2> IRDFT (excluding scale)
107 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
108 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
109 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
112 ip[0] = 0; // first time only
113 rdft(n, 1, a, ip, w);
115 ip[0] = 0; // first time only
116 rdft(n, -1, a, ip, w);
119 n >= 2, n = power of 2
120 a[0...n-1] :input/output data (double *)
123 a[2*k] = R[k], 0<=k<n/2
124 a[2*k+1] = I[k], 0<k<n/2
128 a[2*j] = R[j], 0<=j<n/2
129 a[2*j+1] = I[j], 0<j<n/2
131 ip[0...*] :work area for bit reversal (int *)
132 length of ip >= 2+sqrt(n/2)
135 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
136 ip[0],ip[1] are pointers of the cos/sin table.
137 w[0...n/2-1] :cos/sin table (double *)
138 w[],ip[] are initialized if ip[0] == 0.
141 rdft(n, 1, a, ip, w);
143 rdft(n, -1, a, ip, w);
144 for (j = 0; j <= n - 1; j++) {
150 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
152 <case1> IDCT (excluding scale)
153 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
155 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
158 ip[0] = 0; // first time only
159 ddct(n, 1, a, ip, w);
161 ip[0] = 0; // first time only
162 ddct(n, -1, a, ip, w);
165 n >= 2, n = power of 2
166 a[0...n-1] :input/output data (double *)
169 ip[0...*] :work area for bit reversal (int *)
170 length of ip >= 2+sqrt(n/2)
173 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
174 ip[0],ip[1] are pointers of the cos/sin table.
175 w[0...n*5/4-1] :cos/sin table (double *)
176 w[],ip[] are initialized if ip[0] == 0.
179 ddct(n, -1, a, ip, w);
182 ddct(n, 1, a, ip, w);
183 for (j = 0; j <= n - 1; j++) {
189 -------- DST (Discrete Sine Transform) / Inverse of DST --------
191 <case1> IDST (excluding scale)
192 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
194 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
197 ip[0] = 0; // first time only
198 ddst(n, 1, a, ip, w);
200 ip[0] = 0; // first time only
201 ddst(n, -1, a, ip, w);
204 n >= 2, n = power of 2
205 a[0...n-1] :input/output data (double *)
216 ip[0...*] :work area for bit reversal (int *)
217 length of ip >= 2+sqrt(n/2)
220 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
221 ip[0],ip[1] are pointers of the cos/sin table.
222 w[0...n*5/4-1] :cos/sin table (double *)
223 w[],ip[] are initialized if ip[0] == 0.
226 ddst(n, -1, a, ip, w);
229 ddst(n, 1, a, ip, w);
230 for (j = 0; j <= n - 1; j++) {
236 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
238 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
240 ip[0] = 0; // first time only
241 dfct(n, a, t, ip, w);
243 n :data length - 1 (int)
244 n >= 2, n = power of 2
245 a[0...n] :input/output data (double *)
248 t[0...n/2] :work area (double *)
249 ip[0...*] :work area for bit reversal (int *)
250 length of ip >= 2+sqrt(n/4)
253 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
254 ip[0],ip[1] are pointers of the cos/sin table.
255 w[0...n*5/8-1] :cos/sin table (double *)
256 w[],ip[] are initialized if ip[0] == 0.
261 dfct(n, a, t, ip, w);
265 dfct(n, a, t, ip, w);
266 for (j = 0; j <= n; j++) {
272 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
274 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
276 ip[0] = 0; // first time only
277 dfst(n, a, t, ip, w);
279 n :data length + 1 (int)
280 n >= 2, n = power of 2
281 a[0...n-1] :input/output data (double *)
284 (a[0] is used for work area)
285 t[0...n/2-1] :work area (double *)
286 ip[0...*] :work area for bit reversal (int *)
287 length of ip >= 2+sqrt(n/4)
290 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
291 ip[0],ip[1] are pointers of the cos/sin table.
292 w[0...n*5/8-1] :cos/sin table (double *)
293 w[],ip[] are initialized if ip[0] == 0.
296 dfst(n, a, t, ip, w);
298 dfst(n, a, t, ip, w);
299 for (j = 1; j <= n - 1; j++) {
306 The cos/sin table is recalculated when the larger table required.
307 w[] and ip[] are compatible with all routines.
311 void cdft(int n, int isgn, double *a, int *ip, double *w)
313 void makewt(int nw, int *ip, double *w);
314 void cftfsub(int n, double *a, int *ip, int nw, double *w);
315 void cftbsub(int n, double *a, int *ip, int nw, double *w);
324 cftfsub(n, a, ip, nw, w);
326 cftbsub(n, a, ip, nw, w);
331 void rdft(int n, int isgn, double *a, int *ip, double *w)
333 void makewt(int nw, int *ip, double *w);
334 void makect(int nc, int *ip, double *c);
335 void cftfsub(int n, double *a, int *ip, int nw, double *w);
336 void cftbsub(int n, double *a, int *ip, int nw, double *w);
337 void rftfsub(int n, double *a, int nc, double *c);
338 void rftbsub(int n, double *a, int nc, double *c);
350 makect(nc, ip, w + nw);
354 cftfsub(n, a, ip, nw, w);
355 rftfsub(n, a, nc, w + nw);
357 cftfsub(n, a, ip, nw, w);
363 a[1] = 0.5 * (a[0] - a[1]);
366 rftbsub(n, a, nc, w + nw);
367 cftbsub(n, a, ip, nw, w);
369 cftbsub(n, a, ip, nw, w);
375 void ddct(int n, int isgn, double *a, int *ip, double *w)
377 void makewt(int nw, int *ip, double *w);
378 void makect(int nc, int *ip, double *c);
379 void cftfsub(int n, double *a, int *ip, int nw, double *w);
380 void cftbsub(int n, double *a, int *ip, int nw, double *w);
381 void rftfsub(int n, double *a, int nc, double *c);
382 void rftbsub(int n, double *a, int nc, double *c);
383 void dctsub(int n, double *a, int nc, double *c);
395 makect(nc, ip, w + nw);
399 for (j = n - 2; j >= 2; j -= 2) {
400 a[j + 1] = a[j] - a[j - 1];
406 rftbsub(n, a, nc, w + nw);
407 cftbsub(n, a, ip, nw, w);
409 cftbsub(n, a, ip, nw, w);
412 dctsub(n, a, nc, w + nw);
415 cftfsub(n, a, ip, nw, w);
416 rftfsub(n, a, nc, w + nw);
418 cftfsub(n, a, ip, nw, w);
422 for (j = 2; j < n; j += 2) {
423 a[j - 1] = a[j] - a[j + 1];
431 void ddst(int n, int isgn, double *a, int *ip, double *w)
433 void makewt(int nw, int *ip, double *w);
434 void makect(int nc, int *ip, double *c);
435 void cftfsub(int n, double *a, int *ip, int nw, double *w);
436 void cftbsub(int n, double *a, int *ip, int nw, double *w);
437 void rftfsub(int n, double *a, int nc, double *c);
438 void rftbsub(int n, double *a, int nc, double *c);
439 void dstsub(int n, double *a, int nc, double *c);
451 makect(nc, ip, w + nw);
455 for (j = n - 2; j >= 2; j -= 2) {
456 a[j + 1] = -a[j] - a[j - 1];
462 rftbsub(n, a, nc, w + nw);
463 cftbsub(n, a, ip, nw, w);
465 cftbsub(n, a, ip, nw, w);
468 dstsub(n, a, nc, w + nw);
471 cftfsub(n, a, ip, nw, w);
472 rftfsub(n, a, nc, w + nw);
474 cftfsub(n, a, ip, nw, w);
478 for (j = 2; j < n; j += 2) {
479 a[j - 1] = -a[j] - a[j + 1];
487 void dfct(int n, double *a, double *t, int *ip, double *w)
489 void makewt(int nw, int *ip, double *w);
490 void makect(int nc, int *ip, double *c);
491 void cftfsub(int n, double *a, int *ip, int nw, double *w);
492 void rftfsub(int n, double *a, int nc, double *c);
493 void dctsub(int n, double *a, int nc, double *c);
494 int j, k, l, m, mh, nw, nc;
495 double xr, xi, yr, yi;
505 makect(nc, ip, w + nw);
515 for (j = 1; j < mh; j++) {
517 xr = a[j] - a[n - j];
518 xi = a[j] + a[n - j];
519 yr = a[k] - a[n - k];
520 yi = a[k] + a[n - k];
526 t[mh] = a[mh] + a[n - mh];
528 dctsub(m, a, nc, w + nw);
530 cftfsub(m, a, ip, nw, w);
531 rftfsub(m, a, nc, w + nw);
533 cftfsub(m, a, ip, nw, w);
535 a[n - 1] = a[0] - a[1];
537 for (j = m - 2; j >= 2; j -= 2) {
538 a[2 * j + 1] = a[j] + a[j + 1];
539 a[2 * j - 1] = a[j] - a[j + 1];
544 dctsub(m, t, nc, w + nw);
546 cftfsub(m, t, ip, nw, w);
547 rftfsub(m, t, nc, w + nw);
549 cftfsub(m, t, ip, nw, w);
551 a[n - l] = t[0] - t[1];
554 for (j = 2; j < m; j += 2) {
556 a[k - l] = t[j] - t[j + 1];
557 a[k + l] = t[j] + t[j + 1];
561 for (j = 0; j < mh; j++) {
563 t[j] = t[m + k] - t[m + j];
564 t[k] = t[m + k] + t[m + j];
580 void dfst(int n, double *a, double *t, int *ip, double *w)
582 void makewt(int nw, int *ip, double *w);
583 void makect(int nc, int *ip, double *c);
584 void cftfsub(int n, double *a, int *ip, int nw, double *w);
585 void rftfsub(int n, double *a, int nc, double *c);
586 void dstsub(int n, double *a, int nc, double *c);
587 int j, k, l, m, mh, nw, nc;
588 double xr, xi, yr, yi;
598 makect(nc, ip, w + nw);
603 for (j = 1; j < mh; j++) {
605 xr = a[j] + a[n - j];
606 xi = a[j] - a[n - j];
607 yr = a[k] + a[n - k];
608 yi = a[k] - a[n - k];
614 t[0] = a[mh] - a[n - mh];
617 dstsub(m, a, nc, w + nw);
619 cftfsub(m, a, ip, nw, w);
620 rftfsub(m, a, nc, w + nw);
622 cftfsub(m, a, ip, nw, w);
624 a[n - 1] = a[1] - a[0];
626 for (j = m - 2; j >= 2; j -= 2) {
627 a[2 * j + 1] = a[j] - a[j + 1];
628 a[2 * j - 1] = -a[j] - a[j + 1];
633 dstsub(m, t, nc, w + nw);
635 cftfsub(m, t, ip, nw, w);
636 rftfsub(m, t, nc, w + nw);
638 cftfsub(m, t, ip, nw, w);
640 a[n - l] = t[1] - t[0];
643 for (j = 2; j < m; j += 2) {
645 a[k - l] = -t[j] - t[j + 1];
646 a[k + l] = t[j] - t[j + 1];
650 for (j = 1; j < mh; j++) {
652 t[j] = t[m + k] + t[m + j];
653 t[k] = t[m + k] - t[m + j];
664 /* -------- initializing routines -------- */
669 void makewt(int nw, int *ip, double *w)
671 void makeipt(int nw, int *ip);
672 int j, nwh, nw0, nw1;
673 double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
679 delta = atan(1.0) / nwh;
680 wn4r = cos(delta * nwh);
684 w[2] = cos(delta * 2);
685 w[3] = sin(delta * 2);
686 } else if (nwh > 4) {
688 w[2] = 0.5 / cos(delta * 2);
689 w[3] = 0.5 / cos(delta * 6);
690 for (j = 4; j < nwh; j += 4) {
691 w[j] = cos(delta * j);
692 w[j + 1] = sin(delta * j);
693 w[j + 2] = cos(3 * delta * j);
694 w[j + 3] = -sin(3 * delta * j);
708 } else if (nwh > 4) {
711 w[nw1 + 2] = 0.5 / wk1r;
712 w[nw1 + 3] = 0.5 / wk3r;
713 for (j = 4; j < nwh; j += 4) {
714 wk1r = w[nw0 + 2 * j];
715 wk1i = w[nw0 + 2 * j + 1];
716 wk3r = w[nw0 + 2 * j + 2];
717 wk3i = w[nw0 + 2 * j + 3];
719 w[nw1 + j + 1] = wk1i;
720 w[nw1 + j + 2] = wk3r;
721 w[nw1 + j + 3] = wk3i;
730 void makeipt(int nw, int *ip)
732 int j, l, m, m2, p, q;
737 for (l = nw; l > 32; l >>= 2) {
740 for (j = m; j < m2; j++) {
750 void makect(int nc, int *ip, double *c)
758 delta = atan(1.0) / nch;
759 c[0] = cos(delta * nch);
761 for (j = 1; j < nch; j++) {
762 c[j] = 0.5 * cos(delta * j);
763 c[nc - j] = 0.5 * sin(delta * j);
769 /* -------- child routines -------- */
772 #ifdef USE_CDFT_PTHREADS
773 #define USE_CDFT_THREADS
774 #ifndef CDFT_THREADS_BEGIN_N
775 #define CDFT_THREADS_BEGIN_N 8192
777 #ifndef CDFT_4THREADS_BEGIN_N
778 #define CDFT_4THREADS_BEGIN_N 65536
782 #define cdft_thread_t pthread_t
783 #define cdft_thread_create(thp,func,argp) { \
784 if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
785 fprintf(stderr, "cdft thread error\n"); \
789 #define cdft_thread_wait(th) { \
790 if (pthread_join(th, NULL) != 0) { \
791 fprintf(stderr, "cdft thread error\n"); \
795 #endif /* USE_CDFT_PTHREADS */
798 #ifdef USE_CDFT_WINTHREADS
799 #define USE_CDFT_THREADS
800 #ifndef CDFT_THREADS_BEGIN_N
801 #define CDFT_THREADS_BEGIN_N 32768
803 #ifndef CDFT_4THREADS_BEGIN_N
804 #define CDFT_4THREADS_BEGIN_N 524288
808 #define cdft_thread_t HANDLE
809 #define cdft_thread_create(thp,func,argp) { \
811 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
813 fprintf(stderr, "cdft thread error\n"); \
817 #define cdft_thread_wait(th) { \
818 WaitForSingleObject(th, INFINITE); \
821 #endif /* USE_CDFT_WINTHREADS */
824 void cftfsub(int n, double *a, int *ip, int nw, double *w)
826 void bitrv2(int n, int *ip, double *a);
827 void bitrv216(double *a);
828 void bitrv208(double *a);
829 void cftf1st(int n, double *a, double *w);
830 void cftrec4(int n, double *a, int nw, double *w);
831 void cftleaf(int n, int isplt, double *a, int nw, double *w);
832 void cftfx41(int n, double *a, int nw, double *w);
833 void cftf161(double *a, double *w);
834 void cftf081(double *a, double *w);
835 void cftf040(double *a);
836 void cftx020(double *a);
837 #ifdef USE_CDFT_THREADS
838 void cftrec4_th(int n, double *a, int nw, double *w);
839 #endif /* USE_CDFT_THREADS */
843 cftf1st(n, a, &w[nw - (n >> 2)]);
844 #ifdef USE_CDFT_THREADS
845 if (n > CDFT_THREADS_BEGIN_N) {
846 cftrec4_th(n, a, nw, w);
848 #endif /* USE_CDFT_THREADS */
850 cftrec4(n, a, nw, w);
851 } else if (n > 128) {
852 cftleaf(n, 1, a, nw, w);
854 cftfx41(n, a, nw, w);
857 } else if (n == 32) {
858 cftf161(a, &w[nw - 8]);
872 void cftbsub(int n, double *a, int *ip, int nw, double *w)
874 void bitrv2conj(int n, int *ip, double *a);
875 void bitrv216neg(double *a);
876 void bitrv208neg(double *a);
877 void cftb1st(int n, double *a, double *w);
878 void cftrec4(int n, double *a, int nw, double *w);
879 void cftleaf(int n, int isplt, double *a, int nw, double *w);
880 void cftfx41(int n, double *a, int nw, double *w);
881 void cftf161(double *a, double *w);
882 void cftf081(double *a, double *w);
883 void cftb040(double *a);
884 void cftx020(double *a);
885 #ifdef USE_CDFT_THREADS
886 void cftrec4_th(int n, double *a, int nw, double *w);
887 #endif /* USE_CDFT_THREADS */
891 cftb1st(n, a, &w[nw - (n >> 2)]);
892 #ifdef USE_CDFT_THREADS
893 if (n > CDFT_THREADS_BEGIN_N) {
894 cftrec4_th(n, a, nw, w);
896 #endif /* USE_CDFT_THREADS */
898 cftrec4(n, a, nw, w);
899 } else if (n > 128) {
900 cftleaf(n, 1, a, nw, w);
902 cftfx41(n, a, nw, w);
904 bitrv2conj(n, ip, a);
905 } else if (n == 32) {
906 cftf161(a, &w[nw - 8]);
920 void bitrv2(int n, int *ip, double *a)
922 int j, j1, k, k1, l, m, nh, nm;
923 double xr, xi, yr, yi;
926 for (l = n >> 2; l > 8; l >>= 2) {
932 for (k = 0; k < m; k++) {
933 for (j = 0; j < k; j++) {
934 j1 = 4 * j + 2 * ip[m + k];
935 k1 = 4 * k + 2 * ip[m + j];
1095 k1 = 4 * k + 2 * ip[m + k];
1158 for (k = 0; k < m; k++) {
1159 for (j = 0; j < k; j++) {
1160 j1 = 4 * j + ip[m + k];
1161 k1 = 4 * k + ip[m + j];
1241 k1 = 4 * k + ip[m + k];
1267 void bitrv2conj(int n, int *ip, double *a)
1269 int j, j1, k, k1, l, m, nh, nm;
1270 double xr, xi, yr, yi;
1273 for (l = n >> 2; l > 8; l >>= 2) {
1279 for (k = 0; k < m; k++) {
1280 for (j = 0; j < k; j++) {
1281 j1 = 4 * j + 2 * ip[m + k];
1282 k1 = 4 * k + 2 * ip[m + j];
1442 k1 = 4 * k + 2 * ip[m + k];
1445 a[j1 - 1] = -a[j1 - 1];
1454 a[k1 + 3] = -a[k1 + 3];
1497 a[j1 - 1] = -a[j1 - 1];
1506 a[k1 + 3] = -a[k1 + 3];
1509 for (k = 0; k < m; k++) {
1510 for (j = 0; j < k; j++) {
1511 j1 = 4 * j + ip[m + k];
1512 k1 = 4 * k + ip[m + j];
1592 k1 = 4 * k + ip[m + k];
1595 a[j1 - 1] = -a[j1 - 1];
1604 a[k1 + 3] = -a[k1 + 3];
1607 a[j1 - 1] = -a[j1 - 1];
1616 a[k1 + 3] = -a[k1 + 3];
1622 void bitrv216(double *a)
1624 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1625 x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
1626 x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1679 void bitrv216neg(double *a)
1681 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1682 x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
1683 x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
1684 x13r, x13i, x14r, x14i, x15r, x15i;
1749 void bitrv208(double *a)
1751 double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1772 void bitrv208neg(double *a)
1774 double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1775 x5r, x5i, x6r, x6i, x7r, x7i;
1808 void cftf1st(int n, double *a, double *w)
1810 int j, j0, j1, j2, j3, k, m, mh;
1811 double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1812 wd1r, wd1i, wd3r, wd3i;
1813 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1814 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1822 x0i = a[1] + a[j2 + 1];
1824 x1i = a[1] - a[j2 + 1];
1825 x2r = a[j1] + a[j3];
1826 x2i = a[j1 + 1] + a[j3 + 1];
1827 x3r = a[j1] - a[j3];
1828 x3i = a[j1 + 1] - a[j3 + 1];
1832 a[j1 + 1] = x0i - x2i;
1834 a[j2 + 1] = x1i + x3r;
1836 a[j3 + 1] = x1i - x3r;
1845 for (j = 2; j < mh - 2; j += 4) {
1847 wk1r = csc1 * (wd1r + w[k]);
1848 wk1i = csc1 * (wd1i + w[k + 1]);
1849 wk3r = csc3 * (wd3r + w[k + 2]);
1850 wk3i = csc3 * (wd3i + w[k + 3]);
1859 x0i = a[j + 1] + a[j2 + 1];
1861 x1i = a[j + 1] - a[j2 + 1];
1862 y0r = a[j + 2] + a[j2 + 2];
1863 y0i = a[j + 3] + a[j2 + 3];
1864 y1r = a[j + 2] - a[j2 + 2];
1865 y1i = a[j + 3] - a[j2 + 3];
1866 x2r = a[j1] + a[j3];
1867 x2i = a[j1 + 1] + a[j3 + 1];
1868 x3r = a[j1] - a[j3];
1869 x3i = a[j1 + 1] - a[j3 + 1];
1870 y2r = a[j1 + 2] + a[j3 + 2];
1871 y2i = a[j1 + 3] + a[j3 + 3];
1872 y3r = a[j1 + 2] - a[j3 + 2];
1873 y3i = a[j1 + 3] - a[j3 + 3];
1875 a[j + 1] = x0i + x2i;
1876 a[j + 2] = y0r + y2r;
1877 a[j + 3] = y0i + y2i;
1879 a[j1 + 1] = x0i - x2i;
1880 a[j1 + 2] = y0r - y2r;
1881 a[j1 + 3] = y0i - y2i;
1884 a[j2] = wk1r * x0r - wk1i * x0i;
1885 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1888 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1889 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1892 a[j3] = wk3r * x0r + wk3i * x0i;
1893 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1896 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1897 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1902 x0r = a[j0] + a[j2];
1903 x0i = a[j0 + 1] + a[j2 + 1];
1904 x1r = a[j0] - a[j2];
1905 x1i = a[j0 + 1] - a[j2 + 1];
1906 y0r = a[j0 - 2] + a[j2 - 2];
1907 y0i = a[j0 - 1] + a[j2 - 1];
1908 y1r = a[j0 - 2] - a[j2 - 2];
1909 y1i = a[j0 - 1] - a[j2 - 1];
1910 x2r = a[j1] + a[j3];
1911 x2i = a[j1 + 1] + a[j3 + 1];
1912 x3r = a[j1] - a[j3];
1913 x3i = a[j1 + 1] - a[j3 + 1];
1914 y2r = a[j1 - 2] + a[j3 - 2];
1915 y2i = a[j1 - 1] + a[j3 - 1];
1916 y3r = a[j1 - 2] - a[j3 - 2];
1917 y3i = a[j1 - 1] - a[j3 - 1];
1919 a[j0 + 1] = x0i + x2i;
1920 a[j0 - 2] = y0r + y2r;
1921 a[j0 - 1] = y0i + y2i;
1923 a[j1 + 1] = x0i - x2i;
1924 a[j1 - 2] = y0r - y2r;
1925 a[j1 - 1] = y0i - y2i;
1928 a[j2] = wk1i * x0r - wk1r * x0i;
1929 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1932 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1933 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1936 a[j3] = wk3i * x0r + wk3r * x0i;
1937 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1940 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1941 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1943 wk1r = csc1 * (wd1r + wn4r);
1944 wk1i = csc1 * (wd1i + wn4r);
1945 wk3r = csc3 * (wd3r - wn4r);
1946 wk3i = csc3 * (wd3i - wn4r);
1951 x0r = a[j0 - 2] + a[j2 - 2];
1952 x0i = a[j0 - 1] + a[j2 - 1];
1953 x1r = a[j0 - 2] - a[j2 - 2];
1954 x1i = a[j0 - 1] - a[j2 - 1];
1955 x2r = a[j1 - 2] + a[j3 - 2];
1956 x2i = a[j1 - 1] + a[j3 - 1];
1957 x3r = a[j1 - 2] - a[j3 - 2];
1958 x3i = a[j1 - 1] - a[j3 - 1];
1959 a[j0 - 2] = x0r + x2r;
1960 a[j0 - 1] = x0i + x2i;
1961 a[j1 - 2] = x0r - x2r;
1962 a[j1 - 1] = x0i - x2i;
1965 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1966 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1969 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1970 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1971 x0r = a[j0] + a[j2];
1972 x0i = a[j0 + 1] + a[j2 + 1];
1973 x1r = a[j0] - a[j2];
1974 x1i = a[j0 + 1] - a[j2 + 1];
1975 x2r = a[j1] + a[j3];
1976 x2i = a[j1 + 1] + a[j3 + 1];
1977 x3r = a[j1] - a[j3];
1978 x3i = a[j1 + 1] - a[j3 + 1];
1980 a[j0 + 1] = x0i + x2i;
1982 a[j1 + 1] = x0i - x2i;
1985 a[j2] = wn4r * (x0r - x0i);
1986 a[j2 + 1] = wn4r * (x0i + x0r);
1989 a[j3] = -wn4r * (x0r + x0i);
1990 a[j3 + 1] = -wn4r * (x0i - x0r);
1991 x0r = a[j0 + 2] + a[j2 + 2];
1992 x0i = a[j0 + 3] + a[j2 + 3];
1993 x1r = a[j0 + 2] - a[j2 + 2];
1994 x1i = a[j0 + 3] - a[j2 + 3];
1995 x2r = a[j1 + 2] + a[j3 + 2];
1996 x2i = a[j1 + 3] + a[j3 + 3];
1997 x3r = a[j1 + 2] - a[j3 + 2];
1998 x3i = a[j1 + 3] - a[j3 + 3];
1999 a[j0 + 2] = x0r + x2r;
2000 a[j0 + 3] = x0i + x2i;
2001 a[j1 + 2] = x0r - x2r;
2002 a[j1 + 3] = x0i - x2i;
2005 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2006 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2009 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2010 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2014 void cftb1st(int n, double *a, double *w)
2016 int j, j0, j1, j2, j3, k, m, mh;
2017 double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
2018 wd1r, wd1i, wd3r, wd3i;
2019 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2020 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
2028 x0i = -a[1] - a[j2 + 1];
2030 x1i = -a[1] + a[j2 + 1];
2031 x2r = a[j1] + a[j3];
2032 x2i = a[j1 + 1] + a[j3 + 1];
2033 x3r = a[j1] - a[j3];
2034 x3i = a[j1 + 1] - a[j3 + 1];
2038 a[j1 + 1] = x0i + x2i;
2040 a[j2 + 1] = x1i + x3r;
2042 a[j3 + 1] = x1i - x3r;
2051 for (j = 2; j < mh - 2; j += 4) {
2053 wk1r = csc1 * (wd1r + w[k]);
2054 wk1i = csc1 * (wd1i + w[k + 1]);
2055 wk3r = csc3 * (wd3r + w[k + 2]);
2056 wk3i = csc3 * (wd3i + w[k + 3]);
2065 x0i = -a[j + 1] - a[j2 + 1];
2067 x1i = -a[j + 1] + a[j2 + 1];
2068 y0r = a[j + 2] + a[j2 + 2];
2069 y0i = -a[j + 3] - a[j2 + 3];
2070 y1r = a[j + 2] - a[j2 + 2];
2071 y1i = -a[j + 3] + a[j2 + 3];
2072 x2r = a[j1] + a[j3];
2073 x2i = a[j1 + 1] + a[j3 + 1];
2074 x3r = a[j1] - a[j3];
2075 x3i = a[j1 + 1] - a[j3 + 1];
2076 y2r = a[j1 + 2] + a[j3 + 2];
2077 y2i = a[j1 + 3] + a[j3 + 3];
2078 y3r = a[j1 + 2] - a[j3 + 2];
2079 y3i = a[j1 + 3] - a[j3 + 3];
2081 a[j + 1] = x0i - x2i;
2082 a[j + 2] = y0r + y2r;
2083 a[j + 3] = y0i - y2i;
2085 a[j1 + 1] = x0i + x2i;
2086 a[j1 + 2] = y0r - y2r;
2087 a[j1 + 3] = y0i + y2i;
2090 a[j2] = wk1r * x0r - wk1i * x0i;
2091 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2094 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2095 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2098 a[j3] = wk3r * x0r + wk3i * x0i;
2099 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2102 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2103 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2108 x0r = a[j0] + a[j2];
2109 x0i = -a[j0 + 1] - a[j2 + 1];
2110 x1r = a[j0] - a[j2];
2111 x1i = -a[j0 + 1] + a[j2 + 1];
2112 y0r = a[j0 - 2] + a[j2 - 2];
2113 y0i = -a[j0 - 1] - a[j2 - 1];
2114 y1r = a[j0 - 2] - a[j2 - 2];
2115 y1i = -a[j0 - 1] + a[j2 - 1];
2116 x2r = a[j1] + a[j3];
2117 x2i = a[j1 + 1] + a[j3 + 1];
2118 x3r = a[j1] - a[j3];
2119 x3i = a[j1 + 1] - a[j3 + 1];
2120 y2r = a[j1 - 2] + a[j3 - 2];
2121 y2i = a[j1 - 1] + a[j3 - 1];
2122 y3r = a[j1 - 2] - a[j3 - 2];
2123 y3i = a[j1 - 1] - a[j3 - 1];
2125 a[j0 + 1] = x0i - x2i;
2126 a[j0 - 2] = y0r + y2r;
2127 a[j0 - 1] = y0i - y2i;
2129 a[j1 + 1] = x0i + x2i;
2130 a[j1 - 2] = y0r - y2r;
2131 a[j1 - 1] = y0i + y2i;
2134 a[j2] = wk1i * x0r - wk1r * x0i;
2135 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2138 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2139 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2142 a[j3] = wk3i * x0r + wk3r * x0i;
2143 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2146 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2147 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2149 wk1r = csc1 * (wd1r + wn4r);
2150 wk1i = csc1 * (wd1i + wn4r);
2151 wk3r = csc3 * (wd3r - wn4r);
2152 wk3i = csc3 * (wd3i - wn4r);
2157 x0r = a[j0 - 2] + a[j2 - 2];
2158 x0i = -a[j0 - 1] - a[j2 - 1];
2159 x1r = a[j0 - 2] - a[j2 - 2];
2160 x1i = -a[j0 - 1] + a[j2 - 1];
2161 x2r = a[j1 - 2] + a[j3 - 2];
2162 x2i = a[j1 - 1] + a[j3 - 1];
2163 x3r = a[j1 - 2] - a[j3 - 2];
2164 x3i = a[j1 - 1] - a[j3 - 1];
2165 a[j0 - 2] = x0r + x2r;
2166 a[j0 - 1] = x0i - x2i;
2167 a[j1 - 2] = x0r - x2r;
2168 a[j1 - 1] = x0i + x2i;
2171 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2172 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2175 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2176 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2177 x0r = a[j0] + a[j2];
2178 x0i = -a[j0 + 1] - a[j2 + 1];
2179 x1r = a[j0] - a[j2];
2180 x1i = -a[j0 + 1] + a[j2 + 1];
2181 x2r = a[j1] + a[j3];
2182 x2i = a[j1 + 1] + a[j3 + 1];
2183 x3r = a[j1] - a[j3];
2184 x3i = a[j1 + 1] - a[j3 + 1];
2186 a[j0 + 1] = x0i - x2i;
2188 a[j1 + 1] = x0i + x2i;
2191 a[j2] = wn4r * (x0r - x0i);
2192 a[j2 + 1] = wn4r * (x0i + x0r);
2195 a[j3] = -wn4r * (x0r + x0i);
2196 a[j3 + 1] = -wn4r * (x0i - x0r);
2197 x0r = a[j0 + 2] + a[j2 + 2];
2198 x0i = -a[j0 + 3] - a[j2 + 3];
2199 x1r = a[j0 + 2] - a[j2 + 2];
2200 x1i = -a[j0 + 3] + a[j2 + 3];
2201 x2r = a[j1 + 2] + a[j3 + 2];
2202 x2i = a[j1 + 3] + a[j3 + 3];
2203 x3r = a[j1 + 2] - a[j3 + 2];
2204 x3i = a[j1 + 3] - a[j3 + 3];
2205 a[j0 + 2] = x0r + x2r;
2206 a[j0 + 3] = x0i - x2i;
2207 a[j1 + 2] = x0r - x2r;
2208 a[j1 + 3] = x0i + x2i;
2211 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2212 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2215 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2216 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2220 #ifdef USE_CDFT_THREADS
2221 struct cdft_arg_st {
2228 typedef struct cdft_arg_st cdft_arg_t;
2231 void cftrec4_th(int n, double *a, int nw, double *w)
2233 void *cftrec1_th(void *p);
2234 void *cftrec2_th(void *p);
2235 int i, idiv4, m, nthread;
2236 cdft_thread_t th[4];
2242 if (n > CDFT_4THREADS_BEGIN_N) {
2247 for (i = 0; i < nthread; i++) {
2250 ag[i].a = &a[i * m];
2254 cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2256 cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2259 for (i = 0; i < nthread; i++) {
2260 cdft_thread_wait(th[i]);
2265 void *cftrec1_th(void *p)
2267 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2268 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2269 void cftmdl1(int n, double *a, double *w);
2270 int isplt, j, k, m, n, n0, nw;
2273 n0 = ((cdft_arg_t *) p)->n0;
2274 n = ((cdft_arg_t *) p)->n;
2275 a = ((cdft_arg_t *) p)->a;
2276 nw = ((cdft_arg_t *) p)->nw;
2277 w = ((cdft_arg_t *) p)->w;
2281 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2283 cftleaf(m, 1, &a[n - m], nw, w);
2285 for (j = n - m; j > 0; j -= m) {
2287 isplt = cfttree(m, j, k, a, nw, w);
2288 cftleaf(m, isplt, &a[j - m], nw, w);
2294 void *cftrec2_th(void *p)
2296 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2297 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2298 void cftmdl2(int n, double *a, double *w);
2299 int isplt, j, k, m, n, n0, nw;
2302 n0 = ((cdft_arg_t *) p)->n0;
2303 n = ((cdft_arg_t *) p)->n;
2304 a = ((cdft_arg_t *) p)->a;
2305 nw = ((cdft_arg_t *) p)->nw;
2306 w = ((cdft_arg_t *) p)->w;
2312 cftmdl2(m, &a[n - m], &w[nw - m]);
2314 cftleaf(m, 0, &a[n - m], nw, w);
2316 for (j = n - m; j > 0; j -= m) {
2318 isplt = cfttree(m, j, k, a, nw, w);
2319 cftleaf(m, isplt, &a[j - m], nw, w);
2323 #endif /* USE_CDFT_THREADS */
2326 void cftrec4(int n, double *a, int nw, double *w)
2328 int cfttree(int n, int j, int k, double *a, int nw, double *w);
2329 void cftleaf(int n, int isplt, double *a, int nw, double *w);
2330 void cftmdl1(int n, double *a, double *w);
2336 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2338 cftleaf(m, 1, &a[n - m], nw, w);
2340 for (j = n - m; j > 0; j -= m) {
2342 isplt = cfttree(m, j, k, a, nw, w);
2343 cftleaf(m, isplt, &a[j - m], nw, w);
2348 int cfttree(int n, int j, int k, double *a, int nw, double *w)
2350 void cftmdl1(int n, double *a, double *w);
2351 void cftmdl2(int n, double *a, double *w);
2357 cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2359 cftmdl2(n, &a[j - n], &w[nw - n]);
2363 for (i = k; (i & 3) == 0; i >>= 2) {
2369 cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2374 cftmdl2(m, &a[j - m], &w[nw - m]);
2383 void cftleaf(int n, int isplt, double *a, int nw, double *w)
2385 void cftmdl1(int n, double *a, double *w);
2386 void cftmdl2(int n, double *a, double *w);
2387 void cftf161(double *a, double *w);
2388 void cftf162(double *a, double *w);
2389 void cftf081(double *a, double *w);
2390 void cftf082(double *a, double *w);
2393 cftmdl1(128, a, &w[nw - 64]);
2394 cftf161(a, &w[nw - 8]);
2395 cftf162(&a[32], &w[nw - 32]);
2396 cftf161(&a[64], &w[nw - 8]);
2397 cftf161(&a[96], &w[nw - 8]);
2398 cftmdl2(128, &a[128], &w[nw - 128]);
2399 cftf161(&a[128], &w[nw - 8]);
2400 cftf162(&a[160], &w[nw - 32]);
2401 cftf161(&a[192], &w[nw - 8]);
2402 cftf162(&a[224], &w[nw - 32]);
2403 cftmdl1(128, &a[256], &w[nw - 64]);
2404 cftf161(&a[256], &w[nw - 8]);
2405 cftf162(&a[288], &w[nw - 32]);
2406 cftf161(&a[320], &w[nw - 8]);
2407 cftf161(&a[352], &w[nw - 8]);
2409 cftmdl1(128, &a[384], &w[nw - 64]);
2410 cftf161(&a[480], &w[nw - 8]);
2412 cftmdl2(128, &a[384], &w[nw - 128]);
2413 cftf162(&a[480], &w[nw - 32]);
2415 cftf161(&a[384], &w[nw - 8]);
2416 cftf162(&a[416], &w[nw - 32]);
2417 cftf161(&a[448], &w[nw - 8]);
2419 cftmdl1(64, a, &w[nw - 32]);
2420 cftf081(a, &w[nw - 8]);
2421 cftf082(&a[16], &w[nw - 8]);
2422 cftf081(&a[32], &w[nw - 8]);
2423 cftf081(&a[48], &w[nw - 8]);
2424 cftmdl2(64, &a[64], &w[nw - 64]);
2425 cftf081(&a[64], &w[nw - 8]);
2426 cftf082(&a[80], &w[nw - 8]);
2427 cftf081(&a[96], &w[nw - 8]);
2428 cftf082(&a[112], &w[nw - 8]);
2429 cftmdl1(64, &a[128], &w[nw - 32]);
2430 cftf081(&a[128], &w[nw - 8]);
2431 cftf082(&a[144], &w[nw - 8]);
2432 cftf081(&a[160], &w[nw - 8]);
2433 cftf081(&a[176], &w[nw - 8]);
2435 cftmdl1(64, &a[192], &w[nw - 32]);
2436 cftf081(&a[240], &w[nw - 8]);
2438 cftmdl2(64, &a[192], &w[nw - 64]);
2439 cftf082(&a[240], &w[nw - 8]);
2441 cftf081(&a[192], &w[nw - 8]);
2442 cftf082(&a[208], &w[nw - 8]);
2443 cftf081(&a[224], &w[nw - 8]);
2448 void cftmdl1(int n, double *a, double *w)
2450 int j, j0, j1, j2, j3, k, m, mh;
2451 double wn4r, wk1r, wk1i, wk3r, wk3i;
2452 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2460 x0i = a[1] + a[j2 + 1];
2462 x1i = a[1] - a[j2 + 1];
2463 x2r = a[j1] + a[j3];
2464 x2i = a[j1 + 1] + a[j3 + 1];
2465 x3r = a[j1] - a[j3];
2466 x3i = a[j1 + 1] - a[j3 + 1];
2470 a[j1 + 1] = x0i - x2i;
2472 a[j2 + 1] = x1i + x3r;
2474 a[j3 + 1] = x1i - x3r;
2477 for (j = 2; j < mh; j += 2) {
2487 x0i = a[j + 1] + a[j2 + 1];
2489 x1i = a[j + 1] - a[j2 + 1];
2490 x2r = a[j1] + a[j3];
2491 x2i = a[j1 + 1] + a[j3 + 1];
2492 x3r = a[j1] - a[j3];
2493 x3i = a[j1 + 1] - a[j3 + 1];
2495 a[j + 1] = x0i + x2i;
2497 a[j1 + 1] = x0i - x2i;
2500 a[j2] = wk1r * x0r - wk1i * x0i;
2501 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2504 a[j3] = wk3r * x0r + wk3i * x0i;
2505 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2510 x0r = a[j0] + a[j2];
2511 x0i = a[j0 + 1] + a[j2 + 1];
2512 x1r = a[j0] - a[j2];
2513 x1i = a[j0 + 1] - a[j2 + 1];
2514 x2r = a[j1] + a[j3];
2515 x2i = a[j1 + 1] + a[j3 + 1];
2516 x3r = a[j1] - a[j3];
2517 x3i = a[j1 + 1] - a[j3 + 1];
2519 a[j0 + 1] = x0i + x2i;
2521 a[j1 + 1] = x0i - x2i;
2524 a[j2] = wk1i * x0r - wk1r * x0i;
2525 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2528 a[j3] = wk3i * x0r + wk3r * x0i;
2529 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2535 x0r = a[j0] + a[j2];
2536 x0i = a[j0 + 1] + a[j2 + 1];
2537 x1r = a[j0] - a[j2];
2538 x1i = a[j0 + 1] - a[j2 + 1];
2539 x2r = a[j1] + a[j3];
2540 x2i = a[j1 + 1] + a[j3 + 1];
2541 x3r = a[j1] - a[j3];
2542 x3i = a[j1 + 1] - a[j3 + 1];
2544 a[j0 + 1] = x0i + x2i;
2546 a[j1 + 1] = x0i - x2i;
2549 a[j2] = wn4r * (x0r - x0i);
2550 a[j2 + 1] = wn4r * (x0i + x0r);
2553 a[j3] = -wn4r * (x0r + x0i);
2554 a[j3 + 1] = -wn4r * (x0i - x0r);
2558 void cftmdl2(int n, double *a, double *w)
2560 int j, j0, j1, j2, j3, k, kr, m, mh;
2561 double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2562 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2570 x0r = a[0] - a[j2 + 1];
2572 x1r = a[0] + a[j2 + 1];
2574 x2r = a[j1] - a[j3 + 1];
2575 x2i = a[j1 + 1] + a[j3];
2576 x3r = a[j1] + a[j3 + 1];
2577 x3i = a[j1 + 1] - a[j3];
2578 y0r = wn4r * (x2r - x2i);
2579 y0i = wn4r * (x2i + x2r);
2583 a[j1 + 1] = x0i - y0i;
2584 y0r = wn4r * (x3r - x3i);
2585 y0i = wn4r * (x3i + x3r);
2587 a[j2 + 1] = x1i + y0r;
2589 a[j3 + 1] = x1i - y0r;
2592 for (j = 2; j < mh; j += 2) {
2606 x0r = a[j] - a[j2 + 1];
2607 x0i = a[j + 1] + a[j2];
2608 x1r = a[j] + a[j2 + 1];
2609 x1i = a[j + 1] - a[j2];
2610 x2r = a[j1] - a[j3 + 1];
2611 x2i = a[j1 + 1] + a[j3];
2612 x3r = a[j1] + a[j3 + 1];
2613 x3i = a[j1 + 1] - a[j3];
2614 y0r = wk1r * x0r - wk1i * x0i;
2615 y0i = wk1r * x0i + wk1i * x0r;
2616 y2r = wd1r * x2r - wd1i * x2i;
2617 y2i = wd1r * x2i + wd1i * x2r;
2619 a[j + 1] = y0i + y2i;
2621 a[j1 + 1] = y0i - y2i;
2622 y0r = wk3r * x1r + wk3i * x1i;
2623 y0i = wk3r * x1i - wk3i * x1r;
2624 y2r = wd3r * x3r + wd3i * x3i;
2625 y2i = wd3r * x3i - wd3i * x3r;
2627 a[j2 + 1] = y0i + y2i;
2629 a[j3 + 1] = y0i - y2i;
2634 x0r = a[j0] - a[j2 + 1];
2635 x0i = a[j0 + 1] + a[j2];
2636 x1r = a[j0] + a[j2 + 1];
2637 x1i = a[j0 + 1] - a[j2];
2638 x2r = a[j1] - a[j3 + 1];
2639 x2i = a[j1 + 1] + a[j3];
2640 x3r = a[j1] + a[j3 + 1];
2641 x3i = a[j1 + 1] - a[j3];
2642 y0r = wd1i * x0r - wd1r * x0i;
2643 y0i = wd1i * x0i + wd1r * x0r;
2644 y2r = wk1i * x2r - wk1r * x2i;
2645 y2i = wk1i * x2i + wk1r * x2r;
2647 a[j0 + 1] = y0i + y2i;
2649 a[j1 + 1] = y0i - y2i;
2650 y0r = wd3i * x1r + wd3r * x1i;
2651 y0i = wd3i * x1i - wd3r * x1r;
2652 y2r = wk3i * x3r + wk3r * x3i;
2653 y2i = wk3i * x3i - wk3r * x3r;
2655 a[j2 + 1] = y0i + y2i;
2657 a[j3 + 1] = y0i - y2i;
2665 x0r = a[j0] - a[j2 + 1];
2666 x0i = a[j0 + 1] + a[j2];
2667 x1r = a[j0] + a[j2 + 1];
2668 x1i = a[j0 + 1] - a[j2];
2669 x2r = a[j1] - a[j3 + 1];
2670 x2i = a[j1 + 1] + a[j3];
2671 x3r = a[j1] + a[j3 + 1];
2672 x3i = a[j1 + 1] - a[j3];
2673 y0r = wk1r * x0r - wk1i * x0i;
2674 y0i = wk1r * x0i + wk1i * x0r;
2675 y2r = wk1i * x2r - wk1r * x2i;
2676 y2i = wk1i * x2i + wk1r * x2r;
2678 a[j0 + 1] = y0i + y2i;
2680 a[j1 + 1] = y0i - y2i;
2681 y0r = wk1i * x1r - wk1r * x1i;
2682 y0i = wk1i * x1i + wk1r * x1r;
2683 y2r = wk1r * x3r - wk1i * x3i;
2684 y2i = wk1r * x3i + wk1i * x3r;
2686 a[j2 + 1] = y0i - y2i;
2688 a[j3 + 1] = y0i + y2i;
2692 void cftfx41(int n, double *a, int nw, double *w)
2694 void cftf161(double *a, double *w);
2695 void cftf162(double *a, double *w);
2696 void cftf081(double *a, double *w);
2697 void cftf082(double *a, double *w);
2700 cftf161(a, &w[nw - 8]);
2701 cftf162(&a[32], &w[nw - 32]);
2702 cftf161(&a[64], &w[nw - 8]);
2703 cftf161(&a[96], &w[nw - 8]);
2705 cftf081(a, &w[nw - 8]);
2706 cftf082(&a[16], &w[nw - 8]);
2707 cftf081(&a[32], &w[nw - 8]);
2708 cftf081(&a[48], &w[nw - 8]);
2713 void cftf161(double *a, double *w)
2715 double wn4r, wk1r, wk1i,
2716 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2717 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2718 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2719 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2720 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2745 x2r = a[10] + a[26];
2746 x2i = a[11] + a[27];
2747 x3r = a[10] - a[26];
2748 x3i = a[11] - a[27];
2755 y9r = wk1r * x0r - wk1i * x0i;
2756 y9i = wk1r * x0i + wk1i * x0r;
2759 y13r = wk1i * x0r - wk1r * x0i;
2760 y13i = wk1i * x0i + wk1r * x0r;
2765 x2r = a[12] + a[28];
2766 x2i = a[13] + a[29];
2767 x3r = a[12] - a[28];
2768 x3i = a[13] - a[29];
2775 y10r = wn4r * (x0r - x0i);
2776 y10i = wn4r * (x0i + x0r);
2779 y14r = wn4r * (x0r + x0i);
2780 y14i = wn4r * (x0i - x0r);
2785 x2r = a[14] + a[30];
2786 x2i = a[15] + a[31];
2787 x3r = a[14] - a[30];
2788 x3i = a[15] - a[31];
2795 y11r = wk1i * x0r - wk1r * x0i;
2796 y11i = wk1i * x0i + wk1r * x0r;
2799 y15r = wk1r * x0r - wk1i * x0i;
2800 y15i = wk1r * x0i + wk1i * x0r;
2835 x2r = wn4r * (x0r - x0i);
2836 x2i = wn4r * (x0i + x0r);
2839 x3r = wn4r * (x0r - x0i);
2840 x3i = wn4r * (x0i + x0r);
2872 void cftf162(double *a, double *w)
2874 double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
2875 x0r, x0i, x1r, x1i, x2r, x2i,
2876 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2877 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2878 y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2879 y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2892 x2r = wn4r * (x0r - x0i);
2893 x2i = wn4r * (x0i + x0r);
2902 x2r = wn4r * (x0r - x0i);
2903 x2i = wn4r * (x0i + x0r);
2910 x1r = wk1r * x0r - wk1i * x0i;
2911 x1i = wk1r * x0i + wk1i * x0r;
2912 x0r = a[10] - a[27];
2913 x0i = a[11] + a[26];
2914 x2r = wk3i * x0r - wk3r * x0i;
2915 x2i = wk3i * x0i + wk3r * x0r;
2922 x1r = wk3r * x0r - wk3i * x0i;
2923 x1i = wk3r * x0i + wk3i * x0r;
2924 x0r = a[10] + a[27];
2925 x0i = a[11] - a[26];
2926 x2r = wk1r * x0r + wk1i * x0i;
2927 x2i = wk1r * x0i - wk1i * x0r;
2934 x1r = wk2r * x0r - wk2i * x0i;
2935 x1i = wk2r * x0i + wk2i * x0r;
2936 x0r = a[12] - a[29];
2937 x0i = a[13] + a[28];
2938 x2r = wk2i * x0r - wk2r * x0i;
2939 x2i = wk2i * x0i + wk2r * x0r;
2946 x1r = wk2i * x0r - wk2r * x0i;
2947 x1i = wk2i * x0i + wk2r * x0r;
2948 x0r = a[12] + a[29];
2949 x0i = a[13] - a[28];
2950 x2r = wk2r * x0r - wk2i * x0i;
2951 x2i = wk2r * x0i + wk2i * x0r;
2958 x1r = wk3r * x0r - wk3i * x0i;
2959 x1i = wk3r * x0i + wk3i * x0r;
2960 x0r = a[14] - a[31];
2961 x0i = a[15] + a[30];
2962 x2r = wk1i * x0r - wk1r * x0i;
2963 x2i = wk1i * x0i + wk1r * x0r;
2970 x1r = wk1i * x0r + wk1r * x0i;
2971 x1i = wk1i * x0i - wk1r * x0r;
2972 x0r = a[14] + a[31];
2973 x0i = a[15] - a[30];
2974 x2r = wk3i * x0r - wk3r * x0i;
2975 x2i = wk3i * x0i + wk3r * x0r;
3000 x2r = wn4r * (x0r - x0i);
3001 x2i = wn4r * (x0i + x0r);
3010 x2r = wn4r * (x0r - x0i);
3011 x2i = wn4r * (x0i + x0r);
3036 x2r = wn4r * (x0r - x0i);
3037 x2i = wn4r * (x0i + x0r);
3046 x2r = wn4r * (x0r - x0i);
3047 x2i = wn4r * (x0i + x0r);
3055 void cftf081(double *a, double *w)
3057 double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
3058 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3059 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3094 y5r = wn4r * (x0r - x0i);
3095 y5i = wn4r * (x0r + x0i);
3096 y7r = wn4r * (x2r - x2i);
3097 y7i = wn4r * (x2r + x2i);
3117 void cftf082(double *a, double *w)
3119 double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
3120 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3121 y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3132 y2r = wn4r * (x0r - x0i);
3133 y2i = wn4r * (x0i + x0r);
3136 y3r = wn4r * (x0r - x0i);
3137 y3i = wn4r * (x0i + x0r);
3140 y4r = wk1r * x0r - wk1i * x0i;
3141 y4i = wk1r * x0i + wk1i * x0r;
3144 y5r = wk1i * x0r - wk1r * x0i;
3145 y5i = wk1i * x0i + wk1r * x0r;
3148 y6r = wk1i * x0r - wk1r * x0i;
3149 y6i = wk1i * x0i + wk1r * x0r;
3152 y7r = wk1r * x0r - wk1i * x0i;
3153 y7i = wk1r * x0i + wk1i * x0r;
3189 void cftf040(double *a)
3191 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3212 void cftb040(double *a)
3214 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3235 void cftx020(double *a)
3248 void rftfsub(int n, double *a, int nc, double *c)
3250 int j, k, kk, ks, m;
3251 double wkr, wki, xr, xi, yr, yi;
3256 for (j = 2; j < m; j += 2) {
3259 wkr = 0.5 - c[nc - kk];
3262 xi = a[j + 1] + a[k + 1];
3263 yr = wkr * xr - wki * xi;
3264 yi = wkr * xi + wki * xr;
3273 void rftbsub(int n, double *a, int nc, double *c)
3275 int j, k, kk, ks, m;
3276 double wkr, wki, xr, xi, yr, yi;
3281 for (j = 2; j < m; j += 2) {
3284 wkr = 0.5 - c[nc - kk];
3287 xi = a[j + 1] + a[k + 1];
3288 yr = wkr * xr + wki * xi;
3289 yi = wkr * xi - wki * xr;
3298 void dctsub(int n, double *a, int nc, double *c)
3300 int j, k, kk, ks, m;
3301 double wkr, wki, xr;
3306 for (j = 1; j < m; j++) {
3309 wkr = c[kk] - c[nc - kk];
3310 wki = c[kk] + c[nc - kk];
3311 xr = wki * a[j] - wkr * a[k];
3312 a[j] = wkr * a[j] + wki * a[k];
3319 void dstsub(int n, double *a, int nc, double *c)
3321 int j, k, kk, ks, m;
3322 double wkr, wki, xr;
3327 for (j = 1; j < m; j++) {
3330 wkr = c[kk] - c[nc - kk];
3331 wki = c[kk] + c[nc - kk];
3332 xr = wki * a[k] - wkr * a[j];
3333 a[k] = wkr * a[k] + wki * a[j];