]> git.sesse.net Git - vlc/blob - src/video_decoder/vdec_idct.c
Pour la plus grande joie de tous, le client compile correctement. Attention
[vlc] / src / video_decoder / vdec_idct.c
1 /*****************************************************************************
2  * vdec_idct.c : IDCT functions
3  * (c)1999 VideoLAN
4  *****************************************************************************/
5
6 /*****************************************************************************
7  * Preamble
8  *****************************************************************************/
9
10 #include <errno.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <unistd.h>
14 #include <string.h>
15 #include <sys/uio.h>
16 #include <X11/Xlib.h>
17 #include <X11/extensions/XShm.h>
18
19 #include "config.h"
20 #include "common.h"
21 #include "mtime.h"
22 #include "vlc_thread.h"
23
24 #include "intf_msg.h"
25 #include "debug.h"                    /* ?? temporaire, requis par netlist.h */
26
27 #include "input.h"
28 #include "input_netlist.h"
29 #include "decoder_fifo.h"
30 #include "video.h"
31 #include "video_output.h"
32
33 #include "vdec_idct.h"
34 #include "video_decoder.h"
35 #include "vdec_motion.h"
36
37 #include "vpar_blocks.h"
38 #include "vpar_headers.h"
39 #include "video_fifo.h"
40 #include "vpar_synchro.h"
41 #include "video_parser.h"
42
43 /*
44  * Local prototypes
45  */
46
47 /* Our current implementation is a fast DCT, we might move to a fast DFT or
48  * an MMX DCT in the future. */
49
50 /*****************************************************************************
51  * vdec_DummyIDCT : dummy function that does nothing
52  *****************************************************************************/
53 void vdec_DummyIDCT( vdec_thread_t * p_vdec, elem_t * p_block, 
54                      int i_idontcare )
55 {
56 }
57
58 /*****************************************************************************
59  * init_SparseIDCT : initialize datas for vdec_SparceIDCT
60  * vdec_SparseIDCT : IDCT function for sparse matrices
61  *****************************************************************************/
62
63 void vdec_InitIDCT (vdec_thread_t * p_vdec) 
64 {         
65     int i;
66     
67     elem_t * p_pre = p_vdec->p_pre_idct;
68     memset( p_pre, 0, 64*64*sizeof(elem_t) );
69     
70     for( i=0 ; i < 64 ; i++ ) 
71     {
72         p_pre[i*64+i] = 1 << SPARSE_SCALE_FACTOR;
73         vdec_IDCT( p_vdec, &p_pre[i*64], 0) ;
74     }
75 }
76
77 void vdec_SparseIDCT (vdec_thread_t * p_vdec, elem_t * p_block, 
78                       int i_sparse_pos)
79 {
80     short int val;
81     int * dp;
82     int v;
83     short int * p_dest;
84     short int * p_source;
85     int coeff, rr;
86
87     /* If DC Coefficient. */
88
89     if ( i_sparse_pos == 0 ) 
90     {
91             dp=(int *)p_block;
92             val= *p_block >> 6;
93         /* Compute int to assign.  This speeds things up a bit */
94         v = ((val & 0xffff) | (val << 16));
95         dp[0] = v;     dp[1] = v;     dp[2] = v;     dp[3] = v;
96         dp[4] = v;     dp[5] = v;     dp[6] = v;     dp[7] = v;
97         dp[8] = v;     dp[9] = v;     dp[10] = v;    dp[11] = v;
98         dp[12] = v;    dp[13] = v;    dp[14] = v;    dp[15] = v;
99         dp[16] = v;    dp[17] = v;    dp[18] = v;    dp[19] = v;
100         dp[20] = v;    dp[21] = v;    dp[22] = v;    dp[23] = v;
101         dp[24] = v;    dp[25] = v;    dp[26] = v;    dp[27] = v;
102         dp[28] = v;    dp[29] = v;    dp[30] = v;    dp[31] = v;
103         return;
104     }
105     /* Some other coefficient. */
106     p_dest = (s16*)p_block;
107     p_source = (s16*)&p_vdec->p_pre_idct[i_sparse_pos];
108     coeff = (int)p_dest[i_sparse_pos];
109     for( rr=0 ; rr < 4 ; rr++ )
110     {
111         p_dest[0] = (p_source[0] * coeff) >> SPARSE_SCALE_FACTOR;
112         p_dest[1] = (p_source[1] * coeff) >> SPARSE_SCALE_FACTOR;
113         p_dest[2] = (p_source[2] * coeff) >> SPARSE_SCALE_FACTOR;
114         p_dest[3] = (p_source[3] * coeff) >> SPARSE_SCALE_FACTOR;
115         p_dest[4] = (p_source[4] * coeff) >> SPARSE_SCALE_FACTOR;
116         p_dest[5] = (p_source[5] * coeff) >> SPARSE_SCALE_FACTOR;
117         p_dest[6] = (p_source[6] * coeff) >> SPARSE_SCALE_FACTOR;
118         p_dest[7] = (p_source[7] * coeff) >> SPARSE_SCALE_FACTOR;
119         p_dest[8] = (p_source[8] * coeff) >> SPARSE_SCALE_FACTOR;
120         p_dest[9] = (p_source[9] * coeff) >> SPARSE_SCALE_FACTOR;
121         p_dest[10] = (p_source[10] * coeff) >> SPARSE_SCALE_FACTOR;
122         p_dest[11] = (p_source[11] * coeff) >> SPARSE_SCALE_FACTOR;
123         p_dest[12] = (p_source[12] * coeff) >> SPARSE_SCALE_FACTOR;
124         p_dest[13] = (p_source[13] * coeff) >> SPARSE_SCALE_FACTOR;
125         p_dest[14] = (p_source[14] * coeff) >> SPARSE_SCALE_FACTOR;
126         p_dest[15] = (p_source[15] * coeff) >> SPARSE_SCALE_FACTOR;
127         p_dest += 16;
128         p_source += 16;
129     }
130     return;
131 }
132       
133
134 /*****************************************************************************
135  * vdec_IDCT : IDCT function for normal matrices
136  *****************************************************************************/
137 void vdec_IDCT( vdec_thread_t * p_vdec, elem_t * p_block, int i_idontcare )
138 {
139     s32 tmp0, tmp1, tmp2, tmp3;
140     s32 tmp10, tmp11, tmp12, tmp13;
141     s32 z1, z2, z3, z4, z5;
142     s32 d0, d1, d2, d3, d4, d5, d6, d7;
143     elem_t * dataptr;
144     int rowctr;
145     
146     SHIFT_TEMPS
147    
148     /* Pass 1: process rows. */
149     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
150     /* furthermore, we scale the results by 2**PASS1_BITS. */
151
152     dataptr = p_block;
153
154     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) 
155     {
156         /* Due to quantization, we will usually find that many of the input
157          * coefficients are zero, especially the AC terms.  We can exploit this
158          * by short-circuiting the IDCT calculation for any row in which all
159          * the AC terms are zero.  In that case each output is equal to the
160          * DC coefficient (with scale factor as needed).
161          * With typical images and quantization tables, half or more of the
162          * row DCT calculations can be simplified this way.
163          */
164
165         register int * idataptr = (int*)dataptr;
166         d0 = dataptr[0];
167         d1 = dataptr[1];
168         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) ) 
169         {
170       /* AC terms all zero */
171             if (d0) 
172             {
173       /* Compute a 32 bit value to assign. */
174                 elem_t dcval = (elem_t) (d0 << PASS1_BITS);
175                 register int v = (dcval & 0xffff) | (dcval << 16);
176   
177                 idataptr[0] = v;
178                 idataptr[1] = v;
179                 idataptr[2] = v;
180                 idataptr[3] = v;
181             }
182       
183             dataptr += DCTSIZE; /* advance pointer to next row */
184             continue;
185         }
186         d2 = dataptr[2];
187         d3 = dataptr[3];
188         d4 = dataptr[4];
189         d5 = dataptr[5];
190         d6 = dataptr[6];
191         d7 = dataptr[7];
192
193     /* Even part: reverse the even part of the forward DCT. */
194     /* The rotator is sqrt(2)*c(-6). */
195         if (d6) 
196         {
197             if (d4) 
198             {
199                 if (d2) 
200                 {
201                             if (d0) 
202                     {
203             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
204                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
205                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
206                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
207
208                         tmp0 = (d0 + d4) << CONST_BITS;
209                         tmp1 = (d0 - d4) << CONST_BITS;
210
211                         tmp10 = tmp0 + tmp3;
212                         tmp13 = tmp0 - tmp3;
213                         tmp11 = tmp1 + tmp2;
214                         tmp12 = tmp1 - tmp2;
215                     } 
216                     else 
217                     {
218                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
219                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
220                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
221                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
222
223                         tmp0 = d4 << CONST_BITS;
224     
225                         tmp10 = tmp0 + tmp3;
226                         tmp13 = tmp0 - tmp3;
227                         tmp11 = tmp2 - tmp0;
228                         tmp12 = -(tmp0 + tmp2);
229                         }
230                 } 
231                 else 
232                 {
233                             if (d0) 
234                     {
235             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
236                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
237                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
238     
239                         tmp0 = (d0 + d4) << CONST_BITS;
240                         tmp1 = (d0 - d4) << CONST_BITS;
241
242                         tmp10 = tmp0 + tmp3;
243                         tmp13 = tmp0 - tmp3;
244                         tmp11 = tmp1 + tmp2;
245                         tmp12 = tmp1 - tmp2;
246                         }
247                     else 
248                     {
249                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
250                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
251                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
252     
253                         tmp0 = d4 << CONST_BITS;
254  
255                         tmp10 = tmp0 + tmp3;
256                         tmp13 = tmp0 - tmp3;
257                         tmp11 = tmp2 - tmp0;
258                         tmp12 = -(tmp0 + tmp2);
259                         }
260                 }
261             } 
262             else 
263             {
264                 if (d2) 
265                 {
266                     if (d0) 
267                     {
268             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
269                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
270                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
271                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
272
273                         tmp0 = d0 << CONST_BITS;
274     
275                         tmp10 = tmp0 + tmp3;
276                         tmp13 = tmp0 - tmp3;
277                         tmp11 = tmp0 + tmp2;
278                         tmp12 = tmp0 - tmp2;
279                     } 
280                     else 
281                     {
282                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
283                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
284                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
285                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
286
287                         tmp10 = tmp3;
288                         tmp13 = -tmp3;
289                         tmp11 = tmp2;
290                         tmp12 = -tmp2;
291                             }
292                 }
293                 else 
294                 {
295                     if (d0) 
296                     {
297             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
298                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
299                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
300     
301                         tmp0 = d0 << CONST_BITS;
302     
303                         tmp10 = tmp0 + tmp3;
304                         tmp13 = tmp0 - tmp3;
305                         tmp11 = tmp0 + tmp2;
306                         tmp12 = tmp0 - tmp2;
307                     }
308                     else 
309                     {
310             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
311                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
312                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
313
314                         tmp10 = tmp3;
315                         tmp13 = -tmp3;
316                         tmp11 = tmp2;
317                         tmp12 = -tmp2;
318                     }
319                 }
320             }
321         }
322         else
323         {
324             if (d4) 
325             {
326                 if (d2) 
327                 {
328                     if (d0) 
329                     {
330                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
331                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
332                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
333     
334                         tmp0 = (d0 + d4) << CONST_BITS;
335                         tmp1 = (d0 - d4) << CONST_BITS;
336     
337                         tmp10 = tmp0 + tmp3;
338                         tmp13 = tmp0 - tmp3;
339                         tmp11 = tmp1 + tmp2;
340                         tmp12 = tmp1 - tmp2;
341                     }
342                     else 
343                     {
344             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
345                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
346                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
347     
348                         tmp0 = d4 << CONST_BITS;
349
350                         tmp10 = tmp0 + tmp3;
351                         tmp13 = tmp0 - tmp3;
352                         tmp11 = tmp2 - tmp0;
353                         tmp12 = -(tmp0 + tmp2);
354                     }
355                 }
356                 else 
357                 {
358                     if (d0)
359                     {
360             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
361                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
362                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
363                     } 
364                     else 
365                     {
366             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
367                         tmp10 = tmp13 = d4 << CONST_BITS;
368                         tmp11 = tmp12 = -tmp10;
369                     }
370                 }
371             }
372             else 
373             {
374                 if (d2) 
375                 {
376                     if (d0) 
377                     {
378             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
379                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
380                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
381
382                         tmp0 = d0 << CONST_BITS;
383
384                         tmp10 = tmp0 + tmp3;
385                         tmp13 = tmp0 - tmp3;
386                         tmp11 = tmp0 + tmp2;
387                         tmp12 = tmp0 - tmp2;
388                     }
389                     else
390                     {
391             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
392                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
393                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
394
395                         tmp10 = tmp3;
396                         tmp13 = -tmp3;
397                         tmp11 = tmp2;
398                         tmp12 = -tmp2;
399                     }
400                 }
401                 else
402                 {
403                     if (d0)
404                     {
405             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
406                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
407                     } 
408                     else
409                     {
410             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
411                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
412                     }
413                 }    
414             }
415         }
416
417
418     /* Odd part per figure 8; the matrix is unitary and hence its
419      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
420      */
421
422         if (d7) 
423             {
424                 if (d5)
425             {
426                 if (d3) 
427                 {
428                     if (d1) 
429                     {
430             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
431                         z1 = d7 + d1;
432                         z2 = d5 + d3;
433                         z3 = d7 + d3;
434                         z4 = d5 + d1;
435                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
436                     
437                         tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
438                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
439                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
440                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
441                         z1 = MULTIPLY(z1, - FIX(0.899976223));
442                         z2 = MULTIPLY(z2, - FIX(2.562915447));
443                         z3 = MULTIPLY(z3, - FIX(1.961570560));
444                         z4 = MULTIPLY(z4, - FIX(0.390180644));
445                     
446                         z3 += z5;
447                         z4 += z5;
448                     
449                         tmp0 += z1 + z3;
450                         tmp1 += z2 + z4;
451                         tmp2 += z2 + z3;
452                         tmp3 += z1 + z4;
453                     } 
454                     else 
455                     {
456             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
457                         z2 = d5 + d3;
458                         z3 = d7 + d3;
459                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
460                     
461                         tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
462                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
463                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
464                         z1 = MULTIPLY(d7, - FIX(0.899976223));
465                         z2 = MULTIPLY(z2, - FIX(2.562915447));
466                         z3 = MULTIPLY(z3, - FIX(1.961570560));
467                         z4 = MULTIPLY(d5, - FIX(0.390180644));
468                     
469                         z3 += z5;
470                         z4 += z5;
471                     
472                         tmp0 += z1 + z3;
473                         tmp1 += z2 + z4;
474                         tmp2 += z2 + z3;
475                         tmp3 = z1 + z4;
476                         }
477                     }
478                 else 
479                 {
480                     if (d1)
481                     {
482             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
483                         z1 = d7 + d1;
484                         z4 = d5 + d1;
485                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
486                     
487                         tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
488                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
489                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
490                         z1 = MULTIPLY(z1, - FIX(0.899976223));
491                         z2 = MULTIPLY(d5, - FIX(2.562915447));
492                         z3 = MULTIPLY(d7, - FIX(1.961570560));
493                         z4 = MULTIPLY(z4, - FIX(0.390180644));
494                     
495                         z3 += z5;
496                         z4 += z5;
497                     
498                         tmp0 += z1 + z3;
499                         tmp1 += z2 + z4;
500                         tmp2 = z2 + z3;
501                         tmp3 += z1 + z4;
502                     }
503                     else
504                     {
505             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
506                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
507
508                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
509                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
510                         z1 = MULTIPLY(d7, - FIX(0.899976223));
511                         z3 = MULTIPLY(d7, - FIX(1.961570560));
512                         z2 = MULTIPLY(d5, - FIX(2.562915447));
513                         z4 = MULTIPLY(d5, - FIX(0.390180644));
514                     
515                         z3 += z5;
516                         z4 += z5;
517                         
518                         tmp0 += z3;
519                         tmp1 += z4;
520                         tmp2 = z2 + z3;
521                         tmp3 = z1 + z4;
522                     }
523                 }
524             }
525             else 
526             {
527                 if (d3) 
528                 {
529                     if (d1) 
530                     {
531             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
532                         z1 = d7 + d1;
533                         z3 = d7 + d3;
534                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
535                     
536                         tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
537                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
538                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
539                         z1 = MULTIPLY(z1, - FIX(0.899976223));
540                         z2 = MULTIPLY(d3, - FIX(2.562915447));
541                         z3 = MULTIPLY(z3, - FIX(1.961570560));
542                         z4 = MULTIPLY(d1, - FIX(0.390180644));
543                     
544                         z3 += z5;
545                         z4 += z5;
546                     
547                         tmp0 += z1 + z3;
548                         tmp1 = z2 + z4;
549                         tmp2 += z2 + z3;
550                         tmp3 += z1 + z4;
551                     }
552                     else
553                     {
554             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
555                         z3 = d7 + d3;
556                         z5 = MULTIPLY(z3, FIX(1.175875602));
557                     
558                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
559                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
560                         z1 = MULTIPLY(d7, - FIX(0.899976223));
561                         z2 = MULTIPLY(d3, - FIX(2.562915447));
562                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
563     
564                         tmp0 += z3;
565                         tmp1 = z2 + z5;
566                         tmp2 += z3;
567                         tmp3 = z1 + z5;
568                     }
569                 } 
570                 else 
571                 {
572                     if (d1) 
573                     {
574             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
575                         z1 = d7 + d1;
576                         z5 = MULTIPLY(z1, FIX(1.175875602));
577
578                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
579                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
580                         z1 = MULTIPLY(z1, FIX2(0.275899379));
581                         z3 = MULTIPLY(d7, - FIX(1.961570560));
582                         z4 = MULTIPLY(d1, - FIX(0.390180644));
583     
584                         tmp0 += z1;
585                         tmp1 = z4 + z5;
586                         tmp2 = z3 + z5;
587                         tmp3 += z1;
588                     }   
589                 else 
590                     {
591             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
592                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
593                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
594                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
595                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
596                     }
597                 }
598             }
599         }
600         else
601         { 
602             if (d5)
603             {
604                 if (d3) 
605                 {
606                     if (d1)
607                     {
608             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
609                         z2 = d5 + d3;
610                         z4 = d5 + d1;
611                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
612                     
613                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
614                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
615                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
616                         z1 = MULTIPLY(d1, - FIX(0.899976223));
617                         z2 = MULTIPLY(z2, - FIX(2.562915447));
618                         z3 = MULTIPLY(d3, - FIX(1.961570560));
619                         z4 = MULTIPLY(z4, - FIX(0.390180644));
620                     
621                         z3 += z5;
622                         z4 += z5;
623                     
624                         tmp0 = z1 + z3;
625                         tmp1 += z2 + z4;
626                         tmp2 += z2 + z3;
627                         tmp3 += z1 + z4;
628                     } 
629                     else 
630                     {
631             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
632                         z2 = d5 + d3;
633                         z5 = MULTIPLY(z2, FIX(1.175875602));
634                     
635                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
636                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
637                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
638                         z3 = MULTIPLY(d3, - FIX(1.961570560));
639                         z4 = MULTIPLY(d5, - FIX(0.390180644));
640                     
641                         tmp0 = z3 + z5;
642                         tmp1 += z2;
643                         tmp2 += z2;
644                         tmp3 = z4 + z5;
645                     }
646                 }
647                 else 
648                 {
649                     if (d1) 
650                     {
651             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
652                         z4 = d5 + d1;
653                         z5 = MULTIPLY(z4, FIX(1.175875602));
654                     
655                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
656                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
657                         z1 = MULTIPLY(d1, - FIX(0.899976223));
658                         z2 = MULTIPLY(d5, - FIX(2.562915447));
659                         z4 = MULTIPLY(z4, FIX2(0.785694958));
660                     
661                         tmp0 = z1 + z5;
662                         tmp1 += z4;
663                         tmp2 = z2 + z5;
664                         tmp3 += z4;
665                     }
666                     else 
667                     {
668             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
669                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
670                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
671                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
672                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
673                     }
674                 }
675             }
676             else 
677             {
678                 if (d3)
679                 {
680                     if (d1) 
681                     {
682             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
683                         z5 = d3 + d1;
684
685                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
686                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
687                         z1 = MULTIPLY(d1, FIX(1.061594337));
688                         z2 = MULTIPLY(d3, - FIX(2.172734803));
689                         z4 = MULTIPLY(z5, FIX(0.785694958));
690                         z5 = MULTIPLY(z5, FIX(1.175875602));
691                     
692                         tmp0 = z1 - z4;
693                         tmp1 = z2 + z4;
694                         tmp2 += z5;
695                         tmp3 += z5;
696                     }
697                     else 
698                     {
699             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
700                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
701                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
702                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
703                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
704                     }
705                 }
706                 else 
707                 {
708                     if (d1) 
709                     {
710             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
711                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
712                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
713                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
714                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
715                     }
716                     else 
717                     {
718             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
719                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
720                     }
721                 }
722             }
723         }
724
725     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
726
727         dataptr[0] = (elem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
728         dataptr[7] = (elem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
729         dataptr[1] = (elem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
730         dataptr[6] = (elem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
731         dataptr[2] = (elem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
732         dataptr[5] = (elem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
733         dataptr[3] = (elem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
734         dataptr[4] = (elem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
735
736         dataptr += DCTSIZE;             /* advance pointer to next row */
737     }
738
739   /* Pass 2: process columns. */
740   /* Note that we must descale the results by a factor of 8 == 2**3, */
741   /* and also undo the PASS1_BITS scaling. */
742
743     dataptr = p_block;
744     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) 
745     {
746     /* Columns of zeroes can be exploited in the same way as we did with rows.
747      * However, the row calculation has created many nonzero AC terms, so the
748      * simplification applies less often (typically 5% to 10% of the time).
749      * On machines with very fast multiplication, it's possible that the
750      * test takes more time than it's worth.  In that case this section
751      * may be commented out.
752      */
753
754         d0 = dataptr[DCTSIZE*0];
755         d1 = dataptr[DCTSIZE*1];
756         d2 = dataptr[DCTSIZE*2];
757         d3 = dataptr[DCTSIZE*3];
758         d4 = dataptr[DCTSIZE*4];
759         d5 = dataptr[DCTSIZE*5];
760         d6 = dataptr[DCTSIZE*6];
761         d7 = dataptr[DCTSIZE*7];
762
763     /* Even part: reverse the even part of the forward DCT. */
764     /* The rotator is sqrt(2)*c(-6). */
765         if (d6) 
766         {
767             if (d4) 
768             {
769                 if (d2)
770                 {
771                     if (d0)
772                     {
773             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
774                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
775                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
776                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
777
778                         tmp0 = (d0 + d4) << CONST_BITS;
779                         tmp1 = (d0 - d4) << CONST_BITS;
780
781                         tmp10 = tmp0 + tmp3;
782                         tmp13 = tmp0 - tmp3;
783                         tmp11 = tmp1 + tmp2;
784                         tmp12 = tmp1 - tmp2;
785                     }
786                     else 
787                     {
788             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
789                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
790                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
791                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
792
793                         tmp0 = d4 << CONST_BITS;
794
795                         tmp10 = tmp0 + tmp3;
796                         tmp13 = tmp0 - tmp3;
797                         tmp11 = tmp2 - tmp0;
798                         tmp12 = -(tmp0 + tmp2);
799                     }
800                 }
801                 else 
802                 {
803                     if (d0)
804                     {
805             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
806                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
807                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
808
809                         tmp0 = (d0 + d4) << CONST_BITS;
810                         tmp1 = (d0 - d4) << CONST_BITS;
811
812                         tmp10 = tmp0 + tmp3;
813                         tmp13 = tmp0 - tmp3;
814                         tmp11 = tmp1 + tmp2;
815                         tmp12 = tmp1 - tmp2;
816                     }
817                     else
818                     {
819             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
820                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
821                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
822
823                         tmp0 = d4 << CONST_BITS;
824
825                         tmp10 = tmp0 + tmp3;
826                         tmp13 = tmp0 - tmp3;
827                         tmp11 = tmp2 - tmp0;
828                         tmp12 = -(tmp0 + tmp2);
829                     }
830                 }
831             }
832             else
833             {
834                 if (d2) 
835                 {
836                     if (d0) 
837                     {
838             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
839                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
840                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
841                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
842
843                         tmp0 = d0 << CONST_BITS;
844
845                         tmp10 = tmp0 + tmp3;
846                         tmp13 = tmp0 - tmp3;
847                         tmp11 = tmp0 + tmp2;
848                         tmp12 = tmp0 - tmp2;
849                     }
850                     else
851                     {
852             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
853                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
854                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
855                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
856
857                         tmp10 = tmp3;
858                         tmp13 = -tmp3;
859                         tmp11 = tmp2;
860                         tmp12 = -tmp2;
861                     }
862                 } 
863                 else 
864                 {
865                     if (d0)
866                     {
867             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
868                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
869                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
870
871                     tmp0 = d0 << CONST_BITS;
872
873                     tmp10 = tmp0 + tmp3;
874                     tmp13 = tmp0 - tmp3;
875                     tmp11 = tmp0 + tmp2;
876                     tmp12 = tmp0 - tmp2;
877                 } 
878                 else 
879                 {
880             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
881                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
882                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
883                     tmp10 = tmp3;
884                     tmp13 = -tmp3;
885                     tmp11 = tmp2;
886                     tmp12 = -tmp2;
887                 }
888             }
889         }
890     }
891     else
892     {
893         if (d4) 
894         {
895             if (d2) 
896             {
897                     if (d0) 
898                 {
899             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
900                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
901                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
902
903                     tmp0 = (d0 + d4) << CONST_BITS;
904                     tmp1 = (d0 - d4) << CONST_BITS;
905
906                     tmp10 = tmp0 + tmp3;
907                     tmp13 = tmp0 - tmp3;
908                     tmp11 = tmp1 + tmp2;
909                     tmp12 = tmp1 - tmp2;
910                 }
911                 else 
912                 {
913             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
914                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
915                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
916
917                     tmp0 = d4 << CONST_BITS;
918
919                     tmp10 = tmp0 + tmp3;
920                     tmp13 = tmp0 - tmp3;
921                     tmp11 = tmp2 - tmp0;
922                     tmp12 = -(tmp0 + tmp2);
923                 }
924             }
925             else 
926             {
927                 if (d0)
928                 {
929             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
930                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
931                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
932                 }
933                 else 
934                 {
935             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
936                     tmp10 = tmp13 = d4 << CONST_BITS;
937                     tmp11 = tmp12 = -tmp10;
938                 }
939             }
940         } 
941         else 
942         {
943         if (d2) 
944         {
945             if (d0) 
946             {
947             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
948                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
949                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
950
951                     tmp0 = d0 << CONST_BITS;
952
953                     tmp10 = tmp0 + tmp3;
954                     tmp13 = tmp0 - tmp3;
955                     tmp11 = tmp0 + tmp2;
956                     tmp12 = tmp0 - tmp2;
957             }
958             else 
959             {
960             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
961                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
962                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
963
964                     tmp10 = tmp3;
965                     tmp13 = -tmp3;
966                     tmp11 = tmp2;
967                     tmp12 = -tmp2;
968             }
969         }
970         else 
971         {
972             if (d0) 
973                 {
974             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
975                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
976                 }
977                 else 
978                 {
979             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
980                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
981                 }
982             }
983         }
984     }
985
986     /* Odd part per figure 8; the matrix is unitary and hence its
987      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
988      */
989     if (d7) 
990     {
991             if (d5) 
992         {
993             if (d3) 
994             {
995                 if (d1) 
996                 {
997             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
998                     z1 = d7 + d1;
999                     z2 = d5 + d3;
1000                     z3 = d7 + d3;
1001                     z4 = d5 + d1;
1002                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1003                     
1004                     tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
1005                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1006                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1007                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1008                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1009                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1010                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1011                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1012                     
1013                     z3 += z5;
1014                     z4 += z5;
1015                     
1016                     tmp0 += z1 + z3;
1017                     tmp1 += z2 + z4;
1018                     tmp2 += z2 + z3;
1019                     tmp3 += z1 + z4;
1020                 }
1021                 else 
1022                 {
1023             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1024                     z2 = d5 + d3;
1025                     z3 = d7 + d3;
1026                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1027                     
1028                     tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
1029                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1030                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1031                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1032                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1033                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1034                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1035                     
1036                     z3 += z5;
1037                     z4 += z5;
1038                     
1039                     tmp0 += z1 + z3;
1040                     tmp1 += z2 + z4;
1041                     tmp2 += z2 + z3;
1042                     tmp3 = z1 + z4;
1043                 }
1044             } 
1045             else
1046             {
1047                 if (d1) 
1048                 {
1049             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1050                     z1 = d7 + d1;
1051                     z4 = d5 + d1;
1052                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1053                     
1054                     tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
1055                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1056                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1057                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1058                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1059                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1060                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1061                     
1062                     z3 += z5;
1063                     z4 += z5;
1064                     
1065                     tmp0 += z1 + z3;
1066                     tmp1 += z2 + z4;
1067                     tmp2 = z2 + z3;
1068                     tmp3 += z1 + z4;
1069                 }
1070                 else 
1071                 {
1072             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1073                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1074
1075                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887)); 
1076                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1077                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1078                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1079                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1080                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1081                     
1082                     z3 += z5;
1083                     z4 += z5;
1084                     
1085                     tmp0 += z3;
1086                     tmp1 += z4;
1087                     tmp2 = z2 + z3;
1088                     tmp3 = z1 + z4;
1089                 }
1090             }
1091         }
1092         else 
1093         {
1094             if (d3)
1095             {
1096                 if (d1) 
1097                 {
1098             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1099                     z1 = d7 + d1;
1100                     z3 = d7 + d3;
1101                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1102                     
1103                     tmp0 = MULTIPLY(d7, FIX(0.298631336)); 
1104                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1105                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1106                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1107                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1108                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1109                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1110                     
1111                     z3 += z5;
1112                     z4 += z5;
1113                     
1114                     tmp0 += z1 + z3;
1115                     tmp1 = z2 + z4;
1116                     tmp2 += z2 + z3;
1117                     tmp3 += z1 + z4;
1118                 } 
1119                 else 
1120                 {
1121             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1122                     z3 = d7 + d3;
1123                     z5 = MULTIPLY(z3, FIX(1.175875602));
1124                     
1125                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887)); 
1126                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1127                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1128                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1129                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1130                     
1131                     tmp0 += z3;
1132                     tmp1 = z2 + z5;
1133                     tmp2 += z3;
1134                     tmp3 = z1 + z5;
1135                 }
1136             } 
1137             else
1138             {
1139                 if (d1) 
1140                 {
1141             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1142                     z1 = d7 + d1;
1143                     z5 = MULTIPLY(z1, FIX(1.175875602));
1144
1145                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224)); 
1146                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1147                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1148                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1149                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1150
1151                     tmp0 += z1;
1152                     tmp1 = z4 + z5;
1153                     tmp2 = z3 + z5;
1154                     tmp3 += z1;
1155                 }
1156                 else 
1157                 {
1158             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1159                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1160                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1161                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1162                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1163                 }
1164             }
1165         }
1166     } 
1167     else 
1168     {
1169         if (d5) 
1170         {
1171             if (d3) 
1172             {
1173                 if (d1) 
1174                 {
1175             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1176                     z2 = d5 + d3;
1177                     z4 = d5 + d1;
1178                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1179                     
1180                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1181                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1182                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1183                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1184                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1185                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1186                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1187                     
1188                     z3 += z5;
1189                     z4 += z5;
1190                     
1191                     tmp0 = z1 + z3;
1192                     tmp1 += z2 + z4;
1193                     tmp2 += z2 + z3;
1194                     tmp3 += z1 + z4;
1195                 }
1196                 else 
1197                 {
1198             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1199                     z2 = d5 + d3;
1200                     z5 = MULTIPLY(z2, FIX(1.175875602));
1201
1202                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1203                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1204                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1205                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1206                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1207                     
1208                     tmp0 = z3 + z5;
1209                     tmp1 += z2;
1210                     tmp2 += z2;
1211                     tmp3 = z4 + z5;
1212                 }
1213             } 
1214             else 
1215             {
1216                 if (d1) 
1217                 {
1218             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1219                     z4 = d5 + d1;
1220                     z5 = MULTIPLY(z4, FIX(1.175875602));
1221                     
1222                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1223                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1224                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1225                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1226                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1227                     
1228                     tmp0 = z1 + z5;
1229                     tmp1 += z4;
1230                     tmp2 = z2 + z5;
1231                     tmp3 += z4;
1232                 }
1233                 else
1234                 {
1235             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1236                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1237                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1238                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1239                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1240                 }
1241             }
1242         }
1243         else
1244         {
1245             if (d3)
1246             {
1247                 if (d1) 
1248                 {
1249             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1250                     z5 = d3 + d1;
1251
1252                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1253                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1254                     z1 = MULTIPLY(d1, FIX(1.061594337));
1255                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1256                     z4 = MULTIPLY(z5, FIX(0.785694958));
1257                     z5 = MULTIPLY(z5, FIX(1.175875602));
1258                     
1259                     tmp0 = z1 - z4;
1260                     tmp1 = z2 + z4;
1261                     tmp2 += z5;
1262                     tmp3 += z5;
1263                 }
1264                 else
1265                 {
1266             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1267                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1268                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1269                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1270                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1271                 }
1272             }
1273             else
1274             {
1275                 if (d1)
1276                 {
1277             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1278                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1279                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1280                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1281                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1282                 }
1283                 else 
1284                 {
1285             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1286                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1287                 }
1288             }
1289         }
1290     }
1291
1292     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1293
1294     dataptr[DCTSIZE*0] = (elem_t) DESCALE(tmp10 + tmp3,
1295                        CONST_BITS+PASS1_BITS+3);
1296     dataptr[DCTSIZE*7] = (elem_t) DESCALE(tmp10 - tmp3,
1297                        CONST_BITS+PASS1_BITS+3);
1298     dataptr[DCTSIZE*1] = (elem_t) DESCALE(tmp11 + tmp2,
1299                        CONST_BITS+PASS1_BITS+3);
1300     dataptr[DCTSIZE*6] = (elem_t) DESCALE(tmp11 - tmp2,
1301                        CONST_BITS+PASS1_BITS+3);
1302     dataptr[DCTSIZE*2] = (elem_t) DESCALE(tmp12 + tmp1,
1303                        CONST_BITS+PASS1_BITS+3);
1304     dataptr[DCTSIZE*5] = (elem_t) DESCALE(tmp12 - tmp1,
1305                        CONST_BITS+PASS1_BITS+3);
1306     dataptr[DCTSIZE*3] = (elem_t) DESCALE(tmp13 + tmp0,
1307                        CONST_BITS+PASS1_BITS+3);
1308     dataptr[DCTSIZE*4] = (elem_t) DESCALE(tmp13 - tmp0,
1309                        CONST_BITS+PASS1_BITS+3);
1310     
1311     dataptr++;                  /* advance pointer to next column */
1312     }
1313 }