1 # Lossy compression algorithms very often make use of DCT or DFT calculations,
2 # or variations of these calculations. This file is intended to be a short
3 # reference about classical DCT and DFT algorithms.
15 return cmath.exp (alpha*1j)
19 return c.real-1j*c.imag
25 # Let us start with the canonical definition of the unscaled DCT algorithm :
26 # (I can not draw sigmas in text mode so I'll use python code instead) :)
28 def unscaled_DCT (N, input, output):
29 for o in range(N): # o is output index
31 for i in range(N): # i is input index
32 output[o] = output[o] + input[i] * cos (((2*i+1)*o*pi)/(2*N))
34 # This trivial algorithm uses N*N multiplications and N*(N-1) additions.
37 # And the unscaled DFT algorithm :
40 return exp_j ((-2*pi*k)/N)
42 def unscaled_DFT (N, input, output):
43 for o in range(N): # o is output index
46 output[o] = output[o] + input[i] * W(i*o,N)
48 # This algorithm takes complex input and output. There are N*N complex
49 # multiplications and N*(N-1) complex additions. One complex addition can be
50 # implemented with 2 real additions, and one complex multiplication by a
51 # constant can be implemented with either 4 real multiplications and 2 real
52 # additions, or 3 real multiplications and 3 real additions.
55 # Of course these algorithms are extremely naive implementations and there are
56 # some ways to use the trigonometric properties of the coefficients to find
57 # some decompositions that can accelerate the calculations by several orders
61 # The Lee algorithm splits a DCT calculation of size N into two DCT
62 # calculations of size N/2
64 def unscaled_DCT_Lee (N, input, output):
65 even_input = vector(N/2)
66 odd_input = vector(N/2)
67 even_output = vector(N/2)
68 odd_output = vector(N/2)
71 even_input[i] = input[i] + input[N-1-i]
72 odd_input[i] = input[i] - input[N-1-i]
75 odd_input[i] = odd_input[i] * (0.5 / cos (((2*i+1)*pi)/(2*N)))
77 unscaled_DCT (N/2, even_input, even_output)
78 unscaled_DCT (N/2, odd_input, odd_output)
80 for i in range(N/2-1):
81 odd_output[i] = odd_output[i] + odd_output[i+1]
84 output[2*i] = even_output[i]
85 output[2*i+1] = odd_output[i];
87 # Notes about this algorithm :
89 # The algorithm can be easily inverted to calculate the IDCT instead :
90 # each of the basic stages are separately inversible...
92 # This function does N adds, then N/2 muls, then 2 recursive calls with
93 # size N/2, then N/2-1 adds again. The total number of operations will be
94 # N*log2(N)/2 multiplies and less than 3*N*log2(N)/2 additions.
95 # (exactly N*(3*log2(N)/2-1) + 1 additions). So this is much
96 # faster than the canonical algorithm.
98 # Some of the multiplication coefficient, 0.5/cos(...) can get quite large.
99 # This means that a small error in the input will give a large error on the
100 # output... For a DCT of size N the biggest coefficient will be at i=N/2-1
101 # and it will be slighly more than N/pi which can be large for large N's.
103 # In the IDCT however, the multiplication coefficients for the reverse
104 # transformation are of the form 2*cos(...) so they can not get big and there
105 # is no accuracy problem.
107 # You can find another description of this algorithm at
108 # http://www.intel.com/drg/mmx/appnotes/ap533.htm
111 # The AAN algorithm uses another approach, transforming a DCT calculation into
112 # a DFT calculation of size 2N:
114 def unscaled_DCT_AAN (N, input, output):
115 DFT_input = vector (2*N)
116 DFT_output = vector (2*N)
119 DFT_input[i] = input[i]
120 DFT_input[2*N-1-i] = input[i]
122 unscaled_DFT (2*N, DFT_input, DFT_output)
125 output[i] = DFT_output[i].real * (0.5 / cos ((i*pi)/(2*N)))
127 # Notes about the AAN algorithm :
129 # The cost of this function is N real multiplies and a DFT of size 2*N. The
130 # DFT to calculate has special properties : the inputs are real and symmetric.
131 # Also, we only need to calculate the real parts of the N first DFT outputs.
132 # We will see how we can take advantage of that later.
134 # We can invert this algorithm to calculate the IDCT. The final multiply
135 # stage is trivially invertible. The DFT stage is invertible too, but we have
136 # to take into account the special properties of this particular DFT for that.
138 # Once again we have to take care of numerical precision for the DFT : the
139 # output coefficients can get large, so that a small error in the input will
140 # give a large error on the output... For a DCT of size N the biggest
141 # coefficient will be at i=N/2-1 and it will be slightly more than N/pi
143 # You can find another description of this algorithm at this url :
144 # www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fastdct.html
147 # The DFT calculation can be decomposed into smaller DFT calculations just like
148 # the Lee algorithm does for DCT calculations. This is a well known and studied
149 # problem. One of the available explanations of this process is at this url :
150 # www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fft.html
151 # (This is on the same server as the AAN algorithm description !)
154 # Let's start with the radix-2 decimation-in-time algorithm :
156 def unscaled_DFT_radix2_time (N, input, output):
157 even_input = vector(N/2)
158 odd_input = vector(N/2)
159 even_output = vector(N/2)
160 odd_output = vector(N/2)
163 even_input[i] = input[2*i]
164 odd_input[i] = input[2*i+1]
166 unscaled_DFT (N/2, even_input, even_output)
167 unscaled_DFT (N/2, odd_input, odd_output)
170 odd_output[i] = odd_output[i] * W(i,N)
173 output[i] = even_output[i] + odd_output[i]
174 output[i+N/2] = even_output[i] - odd_output[i]
176 # This algorithm takes complex input and output.
178 # We divide the DFT calculation into 2 DFT calculations of size N/2
179 # We then do N/2 complex multiplies followed by N complex additions.
180 # (actually W(0,N) = 1 and W(N/4,N) = -j so we can skip a few of these complex
181 # multiplies... we will skip 1 for i=0 and 1 for i=N/4. Also for i=N/8 and for
182 # i=3*N/8 the W(i,N) values can be special-cased to implement the complex
183 # multiplication using only 2 real additions and 2 real multiplies)
185 # Also note that all the basic stages of this DFT algorithm are easily
186 # reversible, so we can calculate the IDFT with the same complexity.
189 # A varient of this is the radix-2 decimation-in-frequency algorithm :
191 def unscaled_DFT_radix2_freq (N, input, output):
192 even_input = vector(N/2)
193 odd_input = vector(N/2)
194 even_output = vector(N/2)
195 odd_output = vector(N/2)
198 even_input[i] = input[i] + input[i+N/2]
199 odd_input[i] = input[i] - input[i+N/2]
202 odd_input[i] = odd_input[i] * W(i,N)
204 unscaled_DFT (N/2, even_input, even_output)
205 unscaled_DFT (N/2, odd_input, odd_output)
208 output[2*i] = even_output[i]
209 output[2*i+1] = odd_output[i]
211 # Note that the decimation-in-time and the decimation-in-frequency varients
212 # have exactly the same complexity, they only do the operations in a different
215 # Actually, if you look at the decimation-in-time varient of the DFT, and
216 # reverse it to calculate an IDFT, you get something that is extremely close
217 # to the decimation-in-frequency DFT algorithm...
220 # The radix-4 algorithms are slightly more efficient : they take into account
221 # the fact that with complex numbers, multiplications by j and -j are also
224 # Let's start with the radix-4 decimation-in-time algorithm :
226 def unscaled_DFT_radix4_time (N, input, output):
227 input_0 = vector(N/4)
228 input_1 = vector(N/4)
229 input_2 = vector(N/4)
230 input_3 = vector(N/4)
231 output_0 = vector(N/4)
232 output_1 = vector(N/4)
233 output_2 = vector(N/4)
234 output_3 = vector(N/4)
241 input_0[i] = input[4*i]
242 input_1[i] = input[4*i+1]
243 input_2[i] = input[4*i+2]
244 input_3[i] = input[4*i+3]
246 unscaled_DFT (N/4, input_0, output_0)
247 unscaled_DFT (N/4, input_1, output_1)
248 unscaled_DFT (N/4, input_2, output_2)
249 unscaled_DFT (N/4, input_3, output_3)
252 output_1[i] = output_1[i] * W(i,N)
253 output_2[i] = output_2[i] * W(2*i,N)
254 output_3[i] = output_3[i] * W(3*i,N)
257 tmp_0[i] = output_0[i] + output_2[i]
258 tmp_1[i] = output_0[i] - output_2[i]
259 tmp_2[i] = output_1[i] + output_3[i]
260 tmp_3[i] = output_1[i] - output_3[i]
263 output[i] = tmp_0[i] + tmp_2[i]
264 output[i+N/4] = tmp_1[i] - 1j * tmp_3[i]
265 output[i+N/2] = tmp_0[i] - tmp_2[i]
266 output[i+3*N/4] = tmp_1[i] + 1j * tmp_3[i]
268 # This algorithm takes complex input and output.
270 # We divide the DFT calculation into 4 DFT calculations of size N/4
271 # We then do 3*N/4 complex multiplies followed by 2*N complex additions.
272 # (actually W(0,N) = 1 and W(N/4,N) = -j so we can skip a few of these complex
273 # multiplies... we will skip 3 for i=0 and 1 for i=N/8. Also for i=N/8
274 # the remaining W(i,N) and W(3*i,N) multiplies can be implemented using only
275 # 2 real additions and 2 real multiplies. For i=N/16 and i=3*N/16 we can also
276 # optimise the W(2*i/N) multiply this way.
278 # If we wanted to do the same decomposition with one radix-2 decomposition
279 # of size N and 2 radix-2 decompositions of size N/2, the total cost would be
280 # N complex multiplies and 2*N complex additions. Thus we see that the
281 # decomposition of one DFT calculation of size N into 4 calculations of size
282 # N/4 using the radix-4 algorithm instead of the radix-2 algorithm saved N/4
283 # complex multiplies...
286 # The radix-4 decimation-in-frequency algorithm is similar :
288 def unscaled_DFT_radix4_freq (N, input, output):
289 input_0 = vector(N/4)
290 input_1 = vector(N/4)
291 input_2 = vector(N/4)
292 input_3 = vector(N/4)
293 output_0 = vector(N/4)
294 output_1 = vector(N/4)
295 output_2 = vector(N/4)
296 output_3 = vector(N/4)
303 tmp_0[i] = input[i] + input[i+N/2]
304 tmp_1[i] = input[i+N/4] + input[i+3*N/4]
305 tmp_2[i] = input[i] - input[i+N/2]
306 tmp_3[i] = input[i+N/4] - input[i+3*N/4]
309 input_0[i] = tmp_0[i] + tmp_1[i]
310 input_1[i] = tmp_2[i] - 1j * tmp_3[i]
311 input_2[i] = tmp_0[i] - tmp_1[i]
312 input_3[i] = tmp_2[i] + 1j * tmp_3[i]
315 input_1[i] = input_1[i] * W(i,N)
316 input_2[i] = input_2[i] * W(2*i,N)
317 input_3[i] = input_3[i] * W(3*i,N)
319 unscaled_DFT (N/4, input_0, output_0)
320 unscaled_DFT (N/4, input_1, output_1)
321 unscaled_DFT (N/4, input_2, output_2)
322 unscaled_DFT (N/4, input_3, output_3)
325 output[4*i] = output_0[i]
326 output[4*i+1] = output_1[i]
327 output[4*i+2] = output_2[i]
328 output[4*i+3] = output_3[i]
330 # Once again the complexity is exactly the same as for the radix-4
331 # decimation-in-time DFT algorithm, only the order of the operations is
335 # Now let us reorder the radix-4 algorithms in a different way :
337 #def unscaled_DFT_radix4_time (N, input, output):
338 # input_0 = vector(N/4)
339 # input_1 = vector(N/4)
340 # input_2 = vector(N/4)
341 # input_3 = vector(N/4)
342 # output_0 = vector(N/4)
343 # output_1 = vector(N/4)
344 # output_2 = vector(N/4)
345 # output_3 = vector(N/4)
346 # tmp_0 = vector(N/4)
347 # tmp_1 = vector(N/4)
348 # tmp_2 = vector(N/4)
349 # tmp_3 = vector(N/4)
351 # for i in range(N/4):
352 # input_0[i] = input[4*i]
353 # input_2[i] = input[4*i+2]
355 # unscaled_DFT (N/4, input_0, output_0)
356 # unscaled_DFT (N/4, input_2, output_2)
358 # for i in range(N/4):
359 # output_2[i] = output_2[i] * W(2*i,N)
361 # for i in range(N/4):
362 # tmp_0[i] = output_0[i] + output_2[i]
363 # tmp_1[i] = output_0[i] - output_2[i]
365 # for i in range(N/4):
366 # input_1[i] = input[4*i+1]
367 # input_3[i] = input[4*i+3]
369 # unscaled_DFT (N/4, input_1, output_1)
370 # unscaled_DFT (N/4, input_3, output_3)
372 # for i in range(N/4):
373 # output_1[i] = output_1[i] * W(i,N)
374 # output_3[i] = output_3[i] * W(3*i,N)
376 # for i in range(N/4):
377 # tmp_2[i] = output_1[i] + output_3[i]
378 # tmp_3[i] = output_1[i] - output_3[i]
380 # for i in range(N/4):
381 # output[i] = tmp_0[i] + tmp_2[i]
382 # output[i+N/4] = tmp_1[i] - 1j * tmp_3[i]
383 # output[i+N/2] = tmp_0[i] - tmp_2[i]
384 # output[i+3*N/4] = tmp_1[i] + 1j * tmp_3[i]
386 # We didnt do anything here, only reorder the operations. But now, look at the
387 # first part of this function, up to the calculations of tmp0 and tmp1 : this
388 # is extremely similar to the radix-2 decimation-in-time algorithm ! or more
389 # precisely, it IS the radix-2 decimation-in-time algorithm, with size N/2,
390 # applied on a vector representing the even input coefficients, and giving
391 # an output vector that is the concatenation of tmp0 and tmp1.
392 # This is important to notice, because this means we can now choose to
393 # calculate tmp0 and tmp1 using any DFT algorithm that we want, and we know
394 # that some of them are more efficient than radix-2...
396 # This leads us directly to the split-radix decimation-in-time algorithm :
398 def unscaled_DFT_split_radix_time (N, input, output):
399 even_input = vector(N/2)
400 input_1 = vector(N/4)
401 input_3 = vector(N/4)
402 even_output = vector(N/2)
403 output_1 = vector(N/4)
404 output_3 = vector(N/4)
409 even_input[i] = input[2*i]
412 input_1[i] = input[4*i+1]
413 input_3[i] = input[4*i+3]
415 unscaled_DFT (N/2, even_input, even_output)
416 unscaled_DFT (N/4, input_1, output_1)
417 unscaled_DFT (N/4, input_3, output_3)
420 output_1[i] = output_1[i] * W(i,N)
421 output_3[i] = output_3[i] * W(3*i,N)
424 tmp_0[i] = output_1[i] + output_3[i]
425 tmp_1[i] = output_1[i] - output_3[i]
428 output[i] = even_output[i] + tmp_0[i]
429 output[i+N/4] = even_output[i+N/4] - 1j * tmp_1[i]
430 output[i+N/2] = even_output[i] - tmp_0[i]
431 output[i+3*N/4] = even_output[i+N/4] + 1j * tmp_1[i]
433 # This function performs one DFT of size N/2 and two of size N/4, followed by
434 # N/2 complex multiplies and 3*N/2 complex additions.
435 # (actually W(0,N) = 1 and W(N/4,N) = -j so we can skip a few of these complex
436 # multiplies... we will skip 2 for i=0. Also for i=N/8 the W(i,N) and W(3*i,N)
437 # multiplies can be implemented using only 2 real additions and 2 real
441 # We can similarly define the split-radix decimation-in-frequency DFT :
443 def unscaled_DFT_split_radix_freq (N, input, output):
444 even_input = vector(N/2)
445 input_1 = vector(N/4)
446 input_3 = vector(N/4)
447 even_output = vector(N/2)
448 output_1 = vector(N/4)
449 output_3 = vector(N/4)
454 even_input[i] = input[i] + input[i+N/2]
457 tmp_0[i] = input[i] - input[i+N/2]
458 tmp_1[i] = input[i+N/4] - input[i+3*N/4]
461 input_1[i] = tmp_0[i] - 1j * tmp_1[i]
462 input_3[i] = tmp_0[i] + 1j * tmp_1[i]
465 input_1[i] = input_1[i] * W(i,N)
466 input_3[i] = input_3[i] * W(3*i,N)
468 unscaled_DFT (N/2, even_input, even_output)
469 unscaled_DFT (N/4, input_1, output_1)
470 unscaled_DFT (N/4, input_3, output_3)
473 output[2*i] = even_output[i]
476 output[4*i+1] = output_1[i]
477 output[4*i+3] = output_3[i]
479 # The complexity is again the same as for the decimation-in-time varient.
482 # Now let us now summarize our various algorithms for DFT decomposition :
484 # radix-2 : DFT(N) -> 2*DFT(N/2) using N/2 multiplies and N additions
485 # radix-4 : DFT(N) -> 4*DFT(N/2) using 3*N/4 multiplies and 2*N additions
486 # split-radix : DFT(N) -> DFT(N/2) + 2*DFT(N/4) using N/2 muls and 3*N/2 adds
488 # (we are always speaking of complex multiplies and complex additions...
489 # remember than a complex addition is implemented with 2 real additions, and
490 # a complex multiply is implemented with)
492 # If we want to take into account the special values of W(i,N), we can remove
493 # a few complex multiplies. Supposing N>=16 we can remove :
494 # radix-2 : remove 2 complex multiplies, simplify 2 others
495 # radix-4 : remove 4 complex multiplies, simplify 4 others
496 # split-radix : remove 2 complex multiplies, simplify 2 others
498 # The best performance using these methods is thus :
499 # N complex muls simple muls complex adds method
504 # 16 4 4 64 split radix
505 # 32 16 10 160 split radix
506 # 64 52 20 384 split radix
507 # 128 144 42 896 split radix
508 # 256 372 84 2048 split radix
509 # 512 912 170 4608 split radix
510 # 1024 2164 340 10240 split radix
511 # 2048 5008 682 22528 split radix
512 # 4096 11380 1364 49152 split radix
513 # 8192 25488 2730 106496 split radix
514 # 16384 56436 5460 229376 split radix
515 # 32768 123792 10922 491520 split radix
516 # 65536 269428 21844 1048576 split radix
518 # Now a complex addition is implemented with 2 real additions, a "simple"
519 # complex multiply is implemented with 2 real multiplies and 2 real additions,
520 # and complex multiplies can be implemented with either 2 real additions and
521 # 4 real multiplies, or 3 real additions and 3 real multiplies, so we will
522 # keep them in a separate column. Which gives...
524 # N real additions real multiplies complex multiplies
535 # 1024 21160 680 2164
536 # 2048 46420 1364 5008
537 # 4096 101032 2728 11380
538 # 8192 218452 5460 25488
539 # 16384 469672 10920 56436
540 # 32768 1004884 21844 123792
541 # 65536 2140840 43688 269428
543 # If a complex multiply is implemented with 3 real muls + 3 real adds,
544 # a complex "simple" multiply is implemented with 2 real muls + 2 real adds,
545 # and a complex addition is implemented with 2 real adds, then these results
546 # are consistent with the table at the end of the www.cmlab.csie.ntu.edu.tw
547 # DFT tutorial that I mentionned earlier.
550 # Now another important case for the DFT is the one where the inputs are
551 # real numbers instead of complex ones. We have to find ways to optimize for
552 # this important case.
554 # If the DFT inputs are real-valued, then the DFT outputs have nice properties
555 # too : output[0] and output[N/2] will be real numbers, and output[N-i] will
556 # be the conjugate of output[i] for i in 0...N/2-1
558 # Likewise, if the DFT inputs are purely imaginary numbers, then the DFT
559 # outputs will have special properties too : output[0] and output[N/2] will be
560 # purely imaginary, and output[N-i] will be the opposite of the conjugate of
561 # output[i] for i in 0...N/2-1
563 # We can use these properties to calculate two real-valued DFT at once :
565 def two_real_unscaled_DFT (N, input1, input2, output1, output2):
570 input[i] = input1[i] + 1j * input2[i]
572 unscaled_DFT (N, input, output)
574 output1[0] = output[0].real + 0j
575 output2[0] = output[0].imag + 0j
577 for i in range(N/2)[1:]:
578 output1[i] = 0.5 * (output[i] + conjugate(output[N-i]))
579 output2[i] = -0.5j * (output[i] - conjugate(output[N-i]))
581 output1[N-i] = conjugate(output1[i])
582 output2[N-i] = conjugate(output2[i])
584 output1[N/2] = output[N/2].real + 0j
585 output2[N/2] = output[N/2].imag + 0j
587 # This routine does a total of N-2 complex additions and N-2 complex
590 # This routine can also be inverted to calculate the IDFT of two vectors at
591 # once if we know that the outputs will be real-valued.
594 # If we have only one real-valued DFT calculation to do, we can still cut this
595 # calculation in several parts using one of the decimate-in-time methods
596 # (so that the different parts are still real-valued)
598 # As with complex DFT calculations, the best method is to use a split radix.
599 # There are a lot of symetries in the DFT outputs that we can exploit to
600 # reduce the number of operations...
602 def real_unscaled_DFT_split_radix_1 (N, input, output):
603 even_input = vector(N/2)
604 even_output = vector(N/2)
605 input_1 = vector(N/4)
606 output_1 = vector(N/4)
607 input_3 = vector(N/4)
608 output_3 = vector(N/4)
613 even_input[i] = input[2*i]
616 input_1[i] = input[4*i+1]
617 input_3[i] = input[4*i+3]
619 unscaled_DFT (N/2, even_input, even_output)
620 # this is again a real DFT !
621 # we will only use even_output[i] for i in 0 ... N/4 included. we know that
622 # even_output[N/2-i] is the conjugate of even_output[i]... also we know
623 # that even_output[0] and even_output[N/4] are purely real.
625 unscaled_DFT (N/4, input_1, output_1)
626 unscaled_DFT (N/4, input_3, output_3)
627 # these are real DFTs too... with symetries in the outputs... once again
629 tmp_0[0] = output_1[0] + output_3[0] # real numbers
630 tmp_1[0] = output_1[0] - output_3[0] # real numbers
632 tmp__0 = (output_1[N/8] + output_3[N/8]) * sqrt(0.5) # real numbers
633 tmp__1 = (output_1[N/8] - output_3[N/8]) * sqrt(0.5) # real numbers
634 tmp_0[N/8] = tmp__1 - 1j * tmp__0 # real + 1j * real
635 tmp_1[N/8] = tmp__0 - 1j * tmp__1 # real + 1j * real
637 for i in range(N/8)[1:]:
638 output_1[i] = output_1[i] * W(i,N)
639 output_3[i] = output_3[i] * W(3*i,N)
641 tmp_0[i] = output_1[i] + output_3[i]
642 tmp_1[i] = output_1[i] - output_3[i]
644 tmp_0[N/4-i] = -1j * conjugate(tmp_1[i])
645 tmp_1[N/4-i] = -1j * conjugate(tmp_0[i])
647 output[0] = even_output[0] + tmp_0[0] # real numbers
648 output[N/4] = even_output[N/4] - 1j * tmp_1[0] # real + 1j * real
649 output[N/2] = even_output[0] - tmp_0[0] # real numbers
650 output[3*N/4] = even_output[N/4] + 1j * tmp_1[0] # real + 1j * real
652 for i in range(N/4)[1:]:
653 output[i] = even_output[i] + tmp_0[i]
654 output[i+N/4] = conjugate(even_output[N/4-i]) - 1j * tmp_1[i]
656 output[N-i] = conjugate(output[i])
657 output[3*N/4-i] = conjugate(output[i+N/4])
659 # This function performs one real DFT of size N/2 and two real DFT of size
660 # N/4, followed by 6 real additions, 2 real multiplies, 3*N/4-4 complex
661 # additions and N/4-2 complex multiplies.
664 # We can also try to combine the two real DFT of size N/4 into a single complex
667 def real_unscaled_DFT_split_radix_2 (N, input, output):
668 even_input = vector(N/2)
669 even_output = vector(N/2)
670 odd_input = vector(N/4)
671 odd_output = vector(N/4)
676 even_input[i] = input[2*i]
679 odd_input[i] = input[4*i+1] + 1j * input[4*i+3]
681 unscaled_DFT (N/2, even_input, even_output)
682 # this is again a real DFT !
683 # we will only use even_output[i] for i in 0 ... N/4 included. we know that
684 # even_output[N/2-i] is the conjugate of even_output[i]... also we know
685 # that even_output[0] and even_output[N/4] are purely real.
687 unscaled_DFT (N/4, odd_input, odd_output)
688 # but this one is a complex DFT so no special properties here
690 output_1 = odd_output[0].real
691 output_3 = odd_output[0].imag
692 tmp_0[0] = output_1 + output_3 # real numbers
693 tmp_1[0] = output_1 - output_3 # real numbers
695 output_1 = odd_output[N/8].real
696 output_3 = odd_output[N/8].imag
697 tmp__0 = (output_1 + output_3) * sqrt(0.5) # real numbers
698 tmp__1 = (output_1 - output_3) * sqrt(0.5) # real numbers
699 tmp_0[N/8] = tmp__1 - 1j * tmp__0 # real + 1j * real
700 tmp_1[N/8] = tmp__0 - 1j * tmp__1 # real + 1j * real
702 for i in range(N/8)[1:]:
703 output_1 = odd_output[i] + conjugate(odd_output[N/4-i])
704 output_3 = odd_output[i] - conjugate(odd_output[N/4-i])
706 output_1 = output_1 * 0.5 * W(i,N)
707 output_3 = output_3 * -0.5j * W(3*i,N)
709 tmp_0[i] = output_1 + output_3
710 tmp_1[i] = output_1 - output_3
712 tmp_0[N/4-i] = -1j * conjugate(tmp_1[i])
713 tmp_1[N/4-i] = -1j * conjugate(tmp_0[i])
715 output[0] = even_output[0] + tmp_0[0] # real numbers
716 output[N/4] = even_output[N/4] - 1j * tmp_1[0] # real + 1j * real
717 output[N/2] = even_output[0] - tmp_0[0] # real numbers
718 output[3*N/4] = even_output[N/4] + 1j * tmp_1[0] # real + 1j * real
720 for i in range(N/4)[1:]:
721 output[i] = even_output[i] + tmp_0[i]
722 output[i+N/4] = conjugate(even_output[N/4-i]) - 1j * tmp_1[i]
724 output[N-i] = conjugate(output[i])
725 output[3*N/4-i] = conjugate(output[i+N/4])
727 # This function performs one real DFT of size N/2 and one complex DFT of size
728 # N/4, followed by 6 real additions, 2 real multiplies, N-6 complex additions
729 # and N/4-2 complex multiplies.
732 # After comparing the performance, it turns out that for real-valued DFT, the
733 # version of the algorithm that subdivides the calculation into one real
734 # DFT of size N/2 and two real DFT of size N/4 is the most efficient one.
735 # The other version gives exactly the same number of multiplies and a few more
738 # The performance we get for real-valued DFT is as follows :
740 # N real additions real multiplies complex multiplies
751 # 2048 21164 682 2504
752 # 4096 46422 1364 5690
753 # 8192 101036 2730 12744
754 # 16384 218454 5460 28218
755 # 32768 469676 10922 61896
756 # 65536 1004886 21844 134714
759 # As an example, this is an implementation of a real-valued DFT8, using the
760 # above-mentionned algorithm :
762 def DFT8 (input, output):
763 tmp_0 = input[0] + input[4]
764 tmp_1 = input[0] - input[4]
765 tmp_2 = input[2] + input[6]
766 tmp_3 = input[2] - input[6]
768 even_0 = tmp_0 + tmp_2 # real + real
769 even_1 = tmp_1 - 1j * tmp_3 # real + 1j * real
770 even_2 = tmp_0 - tmp_2 # real + real
771 even_3 = tmp_1 + 1j * tmp_3 # real + 1j * real
773 tmp__0 = input[1] + input[5]
774 tmp__1 = input[1] - input[5]
775 tmp__2 = input[3] + input[7]
776 tmp__3 = input[3] - input[7]
778 tmp_0 = tmp__0 + tmp__2 # real numbers
779 tmp_2 = tmp__0 - tmp__2 # real numbers
781 tmp__0 = (tmp__1 + tmp__3) * sqrt(0.5) # real numbers
782 tmp__1 = (tmp__1 - tmp__3) * sqrt(0.5) # real numbers
783 tmp_1 = tmp__1 - 1j * tmp__0 # real + 1j * real
784 tmp_3 = tmp__0 - 1j * tmp__1 # real + 1j * real
786 output[0] = even_0 + tmp_0 # real numbers
787 output[2] = even_2 - 1j * tmp_2 # real + 1j * real
788 output[4] = even_0 - tmp_0 # real numbers
789 output[6] = even_2 + 1j * tmp_2 # real + 1j * real
791 output[1] = even_1 + tmp_1 # complex numbers
792 output[3] = conjugate(even_1) - 1j * tmp_3 # complex numbers
793 output[5] = conjugate(output[3])
794 output[7] = conjugate(output[1])
797 # Also a basic implementation of the real-valued DFT4 :
799 def DFT4 (input, output):
800 tmp_0 = input[0] + input[2]
801 tmp_1 = input[0] - input[2]
802 tmp_2 = input[1] + input[3]
803 tmp_3 = input[1] - input[3]
805 output[0] = tmp_0 + tmp_2 # real + real
806 output[1] = tmp_1 - 1j * tmp_3 # real + 1j * real
807 output[2] = tmp_0 - tmp_2 # real + real
808 output[3] = tmp_1 + 1j * tmp_3 # real + 1j * real
811 # Now the last piece of the puzzle is the implementation of real-valued DFT
812 # with a symetrical input. If you remember about the AAN DCT algorithm, this
815 # The best method I have found is to use a modification of the radix2
816 # decimate-in-time algorithm here. The trick is that odd_input will be the
817 # symetric of even_input... so we can deduce the value of odd_output from
818 # the value of even_output :
819 # odd_output[i] = conjugate(even_output[i]) * W(-i,N/2)
820 # if we then merge this multiply with the one that is just after it in the
821 # radix-2 decimate-in-time algorithm, and then we take all the symetries into
822 # account to remove the corresponding code, we get the following function :
824 def real_symetric_unscaled_DFT (N, input, output):
825 even_input = vector(N/2)
826 even_output = vector(N/2)
827 odd_output = vector(N/2)
830 even_input[i] = input[2*i]
832 unscaled_DFT (N/2, even_input, even_output)
833 # This is once again a real-valued DFT
835 output[0] = 2 * even_output[0] # real number
838 output[N/4] = (1 + 1j) * even_output[N/4] # complex * real
839 output[3*N/4] = conjugate(output[N/4])
841 for i in range(N/4)[1:]:
842 #odd_output = conjugate(even_output[i]) * W(-i,N)
843 #output[i] = even_output[i] + odd_output
844 #odd_output = even_output[i] * W(N/2+i,N)
845 #output[N/2-i] = conjugate(even_output[i]) + odd_output
850 real = even_output[i].real * (1+cr) + even_output[i].imag * ci
851 imag = even_output[i].real * ci + even_output[i].imag * (1-cr)
852 output[i] = real + 1j * imag
854 real = even_output[i].real * (1-cr) - even_output[i].imag * ci
855 imag = even_output[i].real * ci - even_output[i].imag * (1+cr)
856 output[N/2-i] = real + 1j * imag
858 output[N-i] = conjugate(output[i])
859 output[N/2+i] = conjugate(output[N/2-i])
861 # This function does one real unscaled DFT of size N/2, one multiply by 2, and
862 # N/4-1 times something that can be written with either 6 real muls and 4 real
863 # adds (as I did), or 1 complex mul and 2 complex adds (giving 4 real muls and
864 # 6 adds, or 3 real muls and 7 adds).
867 # Now we can use this new knowledge to write a new optimized version of the
868 # AAN algorithm for the DCT calculation :
870 def unscaled_DCT_AAN_optim (N, input, output):
871 DFT_input = vector (N)
872 DFT_output = vector (N)
875 DFT_input[i] = input[2*i]
876 DFT_input[N-1-i] = input[2*i+1]
878 unscaled_DFT (N, DFT_input, DFT_output)
879 # This is another real-valued DFT
881 output[0] = DFT_output[0]
882 output[N/2] = DFT_output[N/2] * sqrt(0.5)
884 for i in range(N/2)[1:]:
885 tmp = (conjugate(DFT_output[i]) *
886 (1+W(-i,2*N)) * 0.5 / cos ((i*pi)/(2*N)))
888 output[N-i] = tmp.imag
890 # Now the DCT calculation can be reduced to one real-valued DFT calculation of
891 # size N, followed by 1 real multiply and N/2-1 complex multiplies
893 # One funny result is that if we calculate the number of real operations needed
894 # to implement this AAN DCT algorithm, and supposing that we choose to
895 # implement complex multiplies with 3 real adds and 3 real muls, then the
896 # number of operations is *exactly* the same as for the original Lee DCT
905 for i in range(len(vector)):
908 vector[i] = vector[i] + 0j
909 realstr = "%+.4f" % vector[i].real
910 imagstr = "%+.4fj" % vector[i].imag
911 if (realstr == "-0.0000"):
913 if (imagstr == "-0.0000j"):
915 str = str + realstr + imagstr
926 input[i] = whrandom.random()
928 unscaled_DCT_AAN_optim (N, input, output)
929 unscaled_DCT (N, input, verify)
931 if (dump(verify) != dump(output)):