]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
eb51703da6883d4a355f484f5d18ed5c25b4f30e
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.9 2001/05/06 04:32:02 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include "defs.h"
31
32 #include <stdlib.h>
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "tests.h"
39
40 #include "video.h"
41 #include "video_output.h"
42
43 #include "video_decoder.h"
44
45 #include "modules.h"
46
47 #include "vdec_block.h"
48 #include "vdec_idct.h"
49
50 /*****************************************************************************
51  * Local and extern prototypes.
52  *****************************************************************************/
53 static void idct_getfunctions( function_list_t * p_function_list );
54 static int  idct_Probe      ( probedata_t *p_data );
55 static void vdec_NormScan   ( u8 ppi_scan[2][64] );
56
57
58 /*****************************************************************************
59  * Build configuration tree.
60  *****************************************************************************/
61 MODULE_CONFIG_START
62 ADD_WINDOW( "Configuration for IDCT module" )
63     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
64 MODULE_CONFIG_END
65
66 /*****************************************************************************
67  * InitModule: get the module structure and configuration.
68  *****************************************************************************
69  * We have to fill psz_name, psz_longname and psz_version. These variables
70  * will be strdup()ed later by the main application because the module can
71  * be unloaded later to save memory, and we want to be able to access this
72  * data even after the module has been unloaded.
73  *****************************************************************************/
74 MODULE_INIT
75 {
76     p_module->psz_name = MODULE_STRING;
77     p_module->psz_longname = "IDCT module";
78     p_module->psz_version = VERSION;
79
80     p_module->i_capabilities = MODULE_CAPABILITY_NULL
81                                 | MODULE_CAPABILITY_IDCT;
82
83     return( 0 );
84 }
85
86 /*****************************************************************************
87  * ActivateModule: set the module to an usable state.
88  *****************************************************************************
89  * This function fills the capability functions and the configuration
90  * structure. Once ActivateModule() has been called, the i_usage can
91  * be set to 0 and calls to NeedModule() be made to increment it. To unload
92  * the module, one has to wait until i_usage == 0 and call DeactivateModule().
93  *****************************************************************************/
94 MODULE_ACTIVATE
95 {
96     p_module->p_functions = malloc( sizeof( module_functions_t ) );
97     if( p_module->p_functions == NULL )
98     {
99         return( -1 );
100     }
101
102     idct_getfunctions( &p_module->p_functions->idct );
103
104     p_module->p_config = p_config;
105
106     return( 0 );
107 }
108
109 /*****************************************************************************
110  * DeactivateModule: make sure the module can be unloaded.
111  *****************************************************************************
112  * This function must only be called when i_usage == 0. If it successfully
113  * returns, i_usage can be set to -1 and the module unloaded. Be careful to
114  * lock usage_lock during the whole process.
115  *****************************************************************************/
116 MODULE_DEACTIVATE
117 {
118     free( p_module->p_functions );
119
120     return( 0 );
121 }
122
123 /* Following functions are local */
124
125 /*****************************************************************************
126  * Functions exported as capabilities. They are declared as static so that
127  * we don't pollute the namespace too much.
128  *****************************************************************************/
129 static void idct_getfunctions( function_list_t * p_function_list )
130 {
131     p_function_list->pf_probe = idct_Probe;
132 #define F p_function_list->functions.idct
133     F.pf_idct_init = _M( vdec_InitIDCT );
134     F.pf_sparse_idct = _M( vdec_SparseIDCT );
135     F.pf_idct = _M( vdec_IDCT );
136     F.pf_norm_scan = vdec_NormScan;
137     F.pf_vdec_init = _M( vdec_Init );
138     F.pf_decode_mb_c = _M( vdec_DecodeMacroblockC );
139     F.pf_decode_mb_bw = _M( vdec_DecodeMacroblockBW );
140 #undef F
141 }
142
143 /*****************************************************************************
144  * idct_Probe: returns a preference score
145  *****************************************************************************/
146 static int idct_Probe( probedata_t *p_data )
147 {
148     if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
149     {
150         return( 999 );
151     }
152
153     /* This plugin always works */
154     return( 50 );
155 }
156
157 /*****************************************************************************
158  * vdec_NormScan : Unused in this IDCT
159  *****************************************************************************/
160 static void vdec_NormScan( u8 ppi_scan[2][64] )
161 {
162 }
163
164 /*****************************************************************************
165  * vdec_IDCT : IDCT function for normal matrices
166  *****************************************************************************/
167 void _M( vdec_IDCT )( vdec_thread_t * p_vdec, dctelem_t * p_block,
168                 int i_idontcare )
169 {
170     s32 tmp0, tmp1, tmp2, tmp3;
171     s32 tmp10, tmp11, tmp12, tmp13;
172     s32 z1, z2, z3, z4, z5;
173     s32 d0, d1, d2, d3, d4, d5, d6, d7;
174     dctelem_t * dataptr;
175     int rowctr;
176
177     SHIFT_TEMPS
178
179     /* Pass 1: process rows. */
180     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
181     /* furthermore, we scale the results by 2**PASS1_BITS. */
182
183     dataptr = p_block;
184
185     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
186     {
187         /* Due to quantization, we will usually find that many of the input
188          * coefficients are zero, especially the AC terms.  We can exploit this
189          * by short-circuiting the IDCT calculation for any row in which all
190          * the AC terms are zero.  In that case each output is equal to the
191          * DC coefficient (with scale factor as needed).
192          * With typical images and quantization tables, half or more of the
193          * row DCT calculations can be simplified this way.
194          */
195
196         register int * idataptr = (int*)dataptr;
197         d0 = dataptr[0];
198         d1 = dataptr[1];
199         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
200         {
201       /* AC terms all zero */
202             if (d0)
203             {
204       /* Compute a 32 bit value to assign. */
205                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
206                 register int v = (dcval & 0xffff) | (dcval << 16);
207
208                 idataptr[0] = v;
209                 idataptr[1] = v;
210                 idataptr[2] = v;
211                 idataptr[3] = v;
212             }
213
214             dataptr += DCTSIZE; /* advance pointer to next row */
215             continue;
216         }
217         d2 = dataptr[2];
218         d3 = dataptr[3];
219         d4 = dataptr[4];
220         d5 = dataptr[5];
221         d6 = dataptr[6];
222         d7 = dataptr[7];
223
224     /* Even part: reverse the even part of the forward DCT. */
225     /* The rotator is sqrt(2)*c(-6). */
226         if (d6)
227         {
228             if (d4)
229             {
230                 if (d2)
231                 {
232                     if (d0)
233                     {
234             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
235                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
236                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
237                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
238
239                         tmp0 = (d0 + d4) << CONST_BITS;
240                         tmp1 = (d0 - d4) << CONST_BITS;
241
242                         tmp10 = tmp0 + tmp3;
243                         tmp13 = tmp0 - tmp3;
244                         tmp11 = tmp1 + tmp2;
245                         tmp12 = tmp1 - tmp2;
246                     }
247                     else
248                     {
249                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
250                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
251                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
252                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
253
254                         tmp0 = d4 << CONST_BITS;
255
256                         tmp10 = tmp0 + tmp3;
257                         tmp13 = tmp0 - tmp3;
258                         tmp11 = tmp2 - tmp0;
259                         tmp12 = -(tmp0 + tmp2);
260                         }
261                 }
262                 else
263                 {
264                     if (d0)
265                     {
266             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
267                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
268                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
269
270                         tmp0 = (d0 + d4) << CONST_BITS;
271                         tmp1 = (d0 - d4) << CONST_BITS;
272
273                         tmp10 = tmp0 + tmp3;
274                         tmp13 = tmp0 - tmp3;
275                         tmp11 = tmp1 + tmp2;
276                         tmp12 = tmp1 - tmp2;
277                         }
278                     else
279                     {
280                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
281                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
282                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
283
284                         tmp0 = d4 << CONST_BITS;
285
286                         tmp10 = tmp0 + tmp3;
287                         tmp13 = tmp0 - tmp3;
288                         tmp11 = tmp2 - tmp0;
289                         tmp12 = -(tmp0 + tmp2);
290                         }
291                 }
292             }
293             else
294             {
295                 if (d2)
296                 {
297                     if (d0)
298                     {
299             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
300                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
301                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
302                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
303
304                         tmp0 = d0 << CONST_BITS;
305
306                         tmp10 = tmp0 + tmp3;
307                         tmp13 = tmp0 - tmp3;
308                         tmp11 = tmp0 + tmp2;
309                         tmp12 = tmp0 - tmp2;
310                     }
311                     else
312                     {
313                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
314                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
315                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
316                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
317
318                         tmp10 = tmp3;
319                         tmp13 = -tmp3;
320                         tmp11 = tmp2;
321                         tmp12 = -tmp2;
322                             }
323                 }
324                 else
325                 {
326                     if (d0)
327                     {
328             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
329                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
330                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
331
332                         tmp0 = d0 << CONST_BITS;
333
334                         tmp10 = tmp0 + tmp3;
335                         tmp13 = tmp0 - tmp3;
336                         tmp11 = tmp0 + tmp2;
337                         tmp12 = tmp0 - tmp2;
338                     }
339                     else
340                     {
341             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
342                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
343                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
344
345                         tmp10 = tmp3;
346                         tmp13 = -tmp3;
347                         tmp11 = tmp2;
348                         tmp12 = -tmp2;
349                     }
350                 }
351             }
352         }
353         else
354         {
355             if (d4)
356             {
357                 if (d2)
358                 {
359                     if (d0)
360                     {
361                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
362                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
363                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
364
365                         tmp0 = (d0 + d4) << CONST_BITS;
366                         tmp1 = (d0 - d4) << CONST_BITS;
367
368                         tmp10 = tmp0 + tmp3;
369                         tmp13 = tmp0 - tmp3;
370                         tmp11 = tmp1 + tmp2;
371                         tmp12 = tmp1 - tmp2;
372                     }
373                     else
374                     {
375             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
376                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
377                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
378
379                         tmp0 = d4 << CONST_BITS;
380
381                         tmp10 = tmp0 + tmp3;
382                         tmp13 = tmp0 - tmp3;
383                         tmp11 = tmp2 - tmp0;
384                         tmp12 = -(tmp0 + tmp2);
385                     }
386                 }
387                 else
388                 {
389                     if (d0)
390                     {
391             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
392                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
393                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
394                     }
395                     else
396                     {
397             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
398                         tmp10 = tmp13 = d4 << CONST_BITS;
399                         tmp11 = tmp12 = -tmp10;
400                     }
401                 }
402             }
403             else
404             {
405                 if (d2)
406                 {
407                     if (d0)
408                     {
409             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
410                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
411                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
412
413                         tmp0 = d0 << CONST_BITS;
414
415                         tmp10 = tmp0 + tmp3;
416                         tmp13 = tmp0 - tmp3;
417                         tmp11 = tmp0 + tmp2;
418                         tmp12 = tmp0 - tmp2;
419                     }
420                     else
421                     {
422             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
423                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
424                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
425
426                         tmp10 = tmp3;
427                         tmp13 = -tmp3;
428                         tmp11 = tmp2;
429                         tmp12 = -tmp2;
430                     }
431                 }
432                 else
433                 {
434                     if (d0)
435                     {
436             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
437                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
438                     }
439                     else
440                     {
441             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
442                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
443                     }
444                 }
445             }
446         }
447
448
449     /* Odd part per figure 8; the matrix is unitary and hence its
450      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
451      */
452
453         if (d7)
454             {
455             if (d5)
456             {
457                 if (d3)
458                 {
459                     if (d1)
460                     {
461             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
462                         z1 = d7 + d1;
463                         z2 = d5 + d3;
464                         z3 = d7 + d3;
465                         z4 = d5 + d1;
466                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
467
468                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
469                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
470                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
471                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
472                         z1 = MULTIPLY(z1, - FIX(0.899976223));
473                         z2 = MULTIPLY(z2, - FIX(2.562915447));
474                         z3 = MULTIPLY(z3, - FIX(1.961570560));
475                         z4 = MULTIPLY(z4, - FIX(0.390180644));
476
477                         z3 += z5;
478                         z4 += z5;
479
480                         tmp0 += z1 + z3;
481                         tmp1 += z2 + z4;
482                         tmp2 += z2 + z3;
483                         tmp3 += z1 + z4;
484                     }
485                     else
486                     {
487             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
488                         z2 = d5 + d3;
489                         z3 = d7 + d3;
490                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
491
492                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
493                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
494                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
495                         z1 = MULTIPLY(d7, - FIX(0.899976223));
496                         z2 = MULTIPLY(z2, - FIX(2.562915447));
497                         z3 = MULTIPLY(z3, - FIX(1.961570560));
498                         z4 = MULTIPLY(d5, - FIX(0.390180644));
499
500                         z3 += z5;
501                         z4 += z5;
502
503                         tmp0 += z1 + z3;
504                         tmp1 += z2 + z4;
505                         tmp2 += z2 + z3;
506                         tmp3 = z1 + z4;
507                         }
508                     }
509                 else
510                 {
511                     if (d1)
512                     {
513             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
514                         z1 = d7 + d1;
515                         z4 = d5 + d1;
516                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
517
518                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
519                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
520                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
521                         z1 = MULTIPLY(z1, - FIX(0.899976223));
522                         z2 = MULTIPLY(d5, - FIX(2.562915447));
523                         z3 = MULTIPLY(d7, - FIX(1.961570560));
524                         z4 = MULTIPLY(z4, - FIX(0.390180644));
525
526                         z3 += z5;
527                         z4 += z5;
528
529                         tmp0 += z1 + z3;
530                         tmp1 += z2 + z4;
531                         tmp2 = z2 + z3;
532                         tmp3 += z1 + z4;
533                     }
534                     else
535                     {
536             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
537                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
538
539                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
540                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
541                         z1 = MULTIPLY(d7, - FIX(0.899976223));
542                         z3 = MULTIPLY(d7, - FIX(1.961570560));
543                         z2 = MULTIPLY(d5, - FIX(2.562915447));
544                         z4 = MULTIPLY(d5, - FIX(0.390180644));
545
546                         z3 += z5;
547                         z4 += z5;
548
549                         tmp0 += z3;
550                         tmp1 += z4;
551                         tmp2 = z2 + z3;
552                         tmp3 = z1 + z4;
553                     }
554                 }
555             }
556             else
557             {
558                 if (d3)
559                 {
560                     if (d1)
561                     {
562             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
563                         z1 = d7 + d1;
564                         z3 = d7 + d3;
565                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
566
567                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
568                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
569                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
570                         z1 = MULTIPLY(z1, - FIX(0.899976223));
571                         z2 = MULTIPLY(d3, - FIX(2.562915447));
572                         z3 = MULTIPLY(z3, - FIX(1.961570560));
573                         z4 = MULTIPLY(d1, - FIX(0.390180644));
574
575                         z3 += z5;
576                         z4 += z5;
577
578                         tmp0 += z1 + z3;
579                         tmp1 = z2 + z4;
580                         tmp2 += z2 + z3;
581                         tmp3 += z1 + z4;
582                     }
583                     else
584                     {
585             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
586                         z3 = d7 + d3;
587                         z5 = MULTIPLY(z3, FIX(1.175875602));
588
589                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
590                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
591                         z1 = MULTIPLY(d7, - FIX(0.899976223));
592                         z2 = MULTIPLY(d3, - FIX(2.562915447));
593                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
594
595                         tmp0 += z3;
596                         tmp1 = z2 + z5;
597                         tmp2 += z3;
598                         tmp3 = z1 + z5;
599                     }
600                 }
601                 else
602                 {
603                     if (d1)
604                     {
605             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
606                         z1 = d7 + d1;
607                         z5 = MULTIPLY(z1, FIX(1.175875602));
608
609                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
610                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
611                         z1 = MULTIPLY(z1, FIX2(0.275899379));
612                         z3 = MULTIPLY(d7, - FIX(1.961570560));
613                         z4 = MULTIPLY(d1, - FIX(0.390180644));
614
615                         tmp0 += z1;
616                         tmp1 = z4 + z5;
617                         tmp2 = z3 + z5;
618                         tmp3 += z1;
619                     }
620                 else
621                     {
622             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
623                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
624                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
625                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
626                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
627                     }
628                 }
629             }
630         }
631         else
632         {
633             if (d5)
634             {
635                 if (d3)
636                 {
637                     if (d1)
638                     {
639             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
640                         z2 = d5 + d3;
641                         z4 = d5 + d1;
642                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
643
644                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
645                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
646                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
647                         z1 = MULTIPLY(d1, - FIX(0.899976223));
648                         z2 = MULTIPLY(z2, - FIX(2.562915447));
649                         z3 = MULTIPLY(d3, - FIX(1.961570560));
650                         z4 = MULTIPLY(z4, - FIX(0.390180644));
651
652                         z3 += z5;
653                         z4 += z5;
654
655                         tmp0 = z1 + z3;
656                         tmp1 += z2 + z4;
657                         tmp2 += z2 + z3;
658                         tmp3 += z1 + z4;
659                     }
660                     else
661                     {
662             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
663                         z2 = d5 + d3;
664                         z5 = MULTIPLY(z2, FIX(1.175875602));
665
666                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
667                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
668                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
669                         z3 = MULTIPLY(d3, - FIX(1.961570560));
670                         z4 = MULTIPLY(d5, - FIX(0.390180644));
671
672                         tmp0 = z3 + z5;
673                         tmp1 += z2;
674                         tmp2 += z2;
675                         tmp3 = z4 + z5;
676                     }
677                 }
678                 else
679                 {
680                     if (d1)
681                     {
682             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
683                         z4 = d5 + d1;
684                         z5 = MULTIPLY(z4, FIX(1.175875602));
685
686                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
687                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
688                         z1 = MULTIPLY(d1, - FIX(0.899976223));
689                         z2 = MULTIPLY(d5, - FIX(2.562915447));
690                         z4 = MULTIPLY(z4, FIX2(0.785694958));
691
692                         tmp0 = z1 + z5;
693                         tmp1 += z4;
694                         tmp2 = z2 + z5;
695                         tmp3 += z4;
696                     }
697                     else
698                     {
699             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
700                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
701                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
702                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
703                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
704                     }
705                 }
706             }
707             else
708             {
709                 if (d3)
710                 {
711                     if (d1)
712                     {
713             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
714                         z5 = d3 + d1;
715
716                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
717                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
718                         z1 = MULTIPLY(d1, FIX(1.061594337));
719                         z2 = MULTIPLY(d3, - FIX(2.172734803));
720                         z4 = MULTIPLY(z5, FIX(0.785694958));
721                         z5 = MULTIPLY(z5, FIX(1.175875602));
722
723                         tmp0 = z1 - z4;
724                         tmp1 = z2 + z4;
725                         tmp2 += z5;
726                         tmp3 += z5;
727                     }
728                     else
729                     {
730             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
731                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
732                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
733                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
734                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
735                     }
736                 }
737                 else
738                 {
739                     if (d1)
740                     {
741             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
742                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
743                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
744                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
745                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
746                     }
747                     else
748                     {
749             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
750                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
751                     }
752                 }
753             }
754         }
755
756     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
757
758         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
759         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
760         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
761         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
762         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
763         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
764         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
765         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
766
767         dataptr += DCTSIZE;              /* advance pointer to next row */
768     }
769
770   /* Pass 2: process columns. */
771   /* Note that we must descale the results by a factor of 8 == 2**3, */
772   /* and also undo the PASS1_BITS scaling. */
773
774     dataptr = p_block;
775     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
776     {
777     /* Columns of zeroes can be exploited in the same way as we did with rows.
778      * However, the row calculation has created many nonzero AC terms, so the
779      * simplification applies less often (typically 5% to 10% of the time).
780      * On machines with very fast multiplication, it's possible that the
781      * test takes more time than it's worth.  In that case this section
782      * may be commented out.
783      */
784
785         d0 = dataptr[DCTSIZE*0];
786         d1 = dataptr[DCTSIZE*1];
787         d2 = dataptr[DCTSIZE*2];
788         d3 = dataptr[DCTSIZE*3];
789         d4 = dataptr[DCTSIZE*4];
790         d5 = dataptr[DCTSIZE*5];
791         d6 = dataptr[DCTSIZE*6];
792         d7 = dataptr[DCTSIZE*7];
793
794     /* Even part: reverse the even part of the forward DCT. */
795     /* The rotator is sqrt(2)*c(-6). */
796         if (d6)
797         {
798             if (d4)
799             {
800                 if (d2)
801                 {
802                     if (d0)
803                     {
804             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
805                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
806                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
807                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
808
809                         tmp0 = (d0 + d4) << CONST_BITS;
810                         tmp1 = (d0 - d4) << CONST_BITS;
811
812                         tmp10 = tmp0 + tmp3;
813                         tmp13 = tmp0 - tmp3;
814                         tmp11 = tmp1 + tmp2;
815                         tmp12 = tmp1 - tmp2;
816                     }
817                     else
818                     {
819             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
820                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
821                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
822                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
823
824                         tmp0 = d4 << CONST_BITS;
825
826                         tmp10 = tmp0 + tmp3;
827                         tmp13 = tmp0 - tmp3;
828                         tmp11 = tmp2 - tmp0;
829                         tmp12 = -(tmp0 + tmp2);
830                     }
831                 }
832                 else
833                 {
834                     if (d0)
835                     {
836             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
837                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
838                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
839
840                         tmp0 = (d0 + d4) << CONST_BITS;
841                         tmp1 = (d0 - d4) << CONST_BITS;
842
843                         tmp10 = tmp0 + tmp3;
844                         tmp13 = tmp0 - tmp3;
845                         tmp11 = tmp1 + tmp2;
846                         tmp12 = tmp1 - tmp2;
847                     }
848                     else
849                     {
850             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
851                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
852                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
853
854                         tmp0 = d4 << CONST_BITS;
855
856                         tmp10 = tmp0 + tmp3;
857                         tmp13 = tmp0 - tmp3;
858                         tmp11 = tmp2 - tmp0;
859                         tmp12 = -(tmp0 + tmp2);
860                     }
861                 }
862             }
863             else
864             {
865                 if (d2)
866                 {
867                     if (d0)
868                     {
869             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
870                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
871                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
872                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
873
874                         tmp0 = d0 << CONST_BITS;
875
876                         tmp10 = tmp0 + tmp3;
877                         tmp13 = tmp0 - tmp3;
878                         tmp11 = tmp0 + tmp2;
879                         tmp12 = tmp0 - tmp2;
880                     }
881                     else
882                     {
883             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
884                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
885                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
886                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
887
888                         tmp10 = tmp3;
889                         tmp13 = -tmp3;
890                         tmp11 = tmp2;
891                         tmp12 = -tmp2;
892                     }
893                 }
894                 else
895                 {
896                     if (d0)
897                     {
898             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
899                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
900                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
901
902                     tmp0 = d0 << CONST_BITS;
903
904                     tmp10 = tmp0 + tmp3;
905                     tmp13 = tmp0 - tmp3;
906                     tmp11 = tmp0 + tmp2;
907                     tmp12 = tmp0 - tmp2;
908                 }
909                 else
910                 {
911             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
912                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
913                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
914                     tmp10 = tmp3;
915                     tmp13 = -tmp3;
916                     tmp11 = tmp2;
917                     tmp12 = -tmp2;
918                 }
919             }
920         }
921     }
922     else
923     {
924         if (d4)
925         {
926             if (d2)
927             {
928                 if (d0)
929                 {
930             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
931                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
932                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
933
934                     tmp0 = (d0 + d4) << CONST_BITS;
935                     tmp1 = (d0 - d4) << CONST_BITS;
936
937                     tmp10 = tmp0 + tmp3;
938                     tmp13 = tmp0 - tmp3;
939                     tmp11 = tmp1 + tmp2;
940                     tmp12 = tmp1 - tmp2;
941                 }
942                 else
943                 {
944             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
945                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
946                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
947
948                     tmp0 = d4 << CONST_BITS;
949
950                     tmp10 = tmp0 + tmp3;
951                     tmp13 = tmp0 - tmp3;
952                     tmp11 = tmp2 - tmp0;
953                     tmp12 = -(tmp0 + tmp2);
954                 }
955             }
956             else
957             {
958                 if (d0)
959                 {
960             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
961                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
962                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
963                 }
964                 else
965                 {
966             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
967                     tmp10 = tmp13 = d4 << CONST_BITS;
968                     tmp11 = tmp12 = -tmp10;
969                 }
970             }
971         }
972         else
973         {
974         if (d2)
975         {
976             if (d0)
977             {
978             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
979                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
980                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
981
982                     tmp0 = d0 << CONST_BITS;
983
984                     tmp10 = tmp0 + tmp3;
985                     tmp13 = tmp0 - tmp3;
986                     tmp11 = tmp0 + tmp2;
987                     tmp12 = tmp0 - tmp2;
988             }
989             else
990             {
991             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
992                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
993                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
994
995                     tmp10 = tmp3;
996                     tmp13 = -tmp3;
997                     tmp11 = tmp2;
998                     tmp12 = -tmp2;
999             }
1000         }
1001         else
1002         {
1003             if (d0)
1004                 {
1005             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1006                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1007                 }
1008                 else
1009                 {
1010             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1011                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
1012                 }
1013             }
1014         }
1015     }
1016
1017     /* Odd part per figure 8; the matrix is unitary and hence its
1018      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
1019      */
1020     if (d7)
1021     {
1022         if (d5)
1023         {
1024             if (d3)
1025             {
1026                 if (d1)
1027                 {
1028             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1029                     z1 = d7 + d1;
1030                     z2 = d5 + d3;
1031                     z3 = d7 + d3;
1032                     z4 = d5 + d1;
1033                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1034
1035                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1036                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1037                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1038                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1039                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1040                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1041                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1042                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1043
1044                     z3 += z5;
1045                     z4 += z5;
1046
1047                     tmp0 += z1 + z3;
1048                     tmp1 += z2 + z4;
1049                     tmp2 += z2 + z3;
1050                     tmp3 += z1 + z4;
1051                 }
1052                 else
1053                 {
1054             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1055                     z2 = d5 + d3;
1056                     z3 = d7 + d3;
1057                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1058
1059                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1060                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1061                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1062                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1063                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1064                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1065                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1066
1067                     z3 += z5;
1068                     z4 += z5;
1069
1070                     tmp0 += z1 + z3;
1071                     tmp1 += z2 + z4;
1072                     tmp2 += z2 + z3;
1073                     tmp3 = z1 + z4;
1074                 }
1075             }
1076             else
1077             {
1078                 if (d1)
1079                 {
1080             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1081                     z1 = d7 + d1;
1082                     z4 = d5 + d1;
1083                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1084
1085                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1086                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1087                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1088                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1089                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1090                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1091                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1092
1093                     z3 += z5;
1094                     z4 += z5;
1095
1096                     tmp0 += z1 + z3;
1097                     tmp1 += z2 + z4;
1098                     tmp2 = z2 + z3;
1099                     tmp3 += z1 + z4;
1100                 }
1101                 else
1102                 {
1103             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1104                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1105
1106                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1107                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1108                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1109                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1110                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1111                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1112
1113                     z3 += z5;
1114                     z4 += z5;
1115
1116                     tmp0 += z3;
1117                     tmp1 += z4;
1118                     tmp2 = z2 + z3;
1119                     tmp3 = z1 + z4;
1120                 }
1121             }
1122         }
1123         else
1124         {
1125             if (d3)
1126             {
1127                 if (d1)
1128                 {
1129             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1130                     z1 = d7 + d1;
1131                     z3 = d7 + d3;
1132                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1133
1134                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1135                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1136                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1137                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1138                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1139                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1140                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1141
1142                     z3 += z5;
1143                     z4 += z5;
1144
1145                     tmp0 += z1 + z3;
1146                     tmp1 = z2 + z4;
1147                     tmp2 += z2 + z3;
1148                     tmp3 += z1 + z4;
1149                 }
1150                 else
1151                 {
1152             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1153                     z3 = d7 + d3;
1154                     z5 = MULTIPLY(z3, FIX(1.175875602));
1155
1156                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1157                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1158                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1159                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1160                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1161
1162                     tmp0 += z3;
1163                     tmp1 = z2 + z5;
1164                     tmp2 += z3;
1165                     tmp3 = z1 + z5;
1166                 }
1167             }
1168             else
1169             {
1170                 if (d1)
1171                 {
1172             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1173                     z1 = d7 + d1;
1174                     z5 = MULTIPLY(z1, FIX(1.175875602));
1175
1176                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1177                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1178                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1179                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1180                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1181
1182                     tmp0 += z1;
1183                     tmp1 = z4 + z5;
1184                     tmp2 = z3 + z5;
1185                     tmp3 += z1;
1186                 }
1187                 else
1188                 {
1189             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1190                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1191                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1192                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1193                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1194                 }
1195             }
1196         }
1197     }
1198     else
1199     {
1200         if (d5)
1201         {
1202             if (d3)
1203             {
1204                 if (d1)
1205                 {
1206             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1207                     z2 = d5 + d3;
1208                     z4 = d5 + d1;
1209                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1210
1211                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1212                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1213                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1214                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1215                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1216                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1217                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1218
1219                     z3 += z5;
1220                     z4 += z5;
1221
1222                     tmp0 = z1 + z3;
1223                     tmp1 += z2 + z4;
1224                     tmp2 += z2 + z3;
1225                     tmp3 += z1 + z4;
1226                 }
1227                 else
1228                 {
1229             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1230                     z2 = d5 + d3;
1231                     z5 = MULTIPLY(z2, FIX(1.175875602));
1232
1233                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1234                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1235                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1236                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1237                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1238
1239                     tmp0 = z3 + z5;
1240                     tmp1 += z2;
1241                     tmp2 += z2;
1242                     tmp3 = z4 + z5;
1243                 }
1244             }
1245             else
1246             {
1247                 if (d1)
1248                 {
1249             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1250                     z4 = d5 + d1;
1251                     z5 = MULTIPLY(z4, FIX(1.175875602));
1252
1253                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1254                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1255                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1256                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1257                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1258
1259                     tmp0 = z1 + z5;
1260                     tmp1 += z4;
1261                     tmp2 = z2 + z5;
1262                     tmp3 += z4;
1263                 }
1264                 else
1265                 {
1266             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1267                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1268                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1269                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1270                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1271                 }
1272             }
1273         }
1274         else
1275         {
1276             if (d3)
1277             {
1278                 if (d1)
1279                 {
1280             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1281                     z5 = d3 + d1;
1282
1283                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1284                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1285                     z1 = MULTIPLY(d1, FIX(1.061594337));
1286                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1287                     z4 = MULTIPLY(z5, FIX(0.785694958));
1288                     z5 = MULTIPLY(z5, FIX(1.175875602));
1289
1290                     tmp0 = z1 - z4;
1291                     tmp1 = z2 + z4;
1292                     tmp2 += z5;
1293                     tmp3 += z5;
1294                 }
1295                 else
1296                 {
1297             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1298                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1299                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1300                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1301                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1302                 }
1303             }
1304             else
1305             {
1306                 if (d1)
1307                 {
1308             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1309                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1310                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1311                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1312                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1313                 }
1314                 else
1315                 {
1316             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1317                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1318                 }
1319             }
1320         }
1321     }
1322
1323     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1324
1325     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1326                        CONST_BITS+PASS1_BITS+3);
1327     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1328                        CONST_BITS+PASS1_BITS+3);
1329     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1330                        CONST_BITS+PASS1_BITS+3);
1331     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1332                        CONST_BITS+PASS1_BITS+3);
1333     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1334                        CONST_BITS+PASS1_BITS+3);
1335     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1336                        CONST_BITS+PASS1_BITS+3);
1337     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1338                        CONST_BITS+PASS1_BITS+3);
1339     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1340                        CONST_BITS+PASS1_BITS+3);
1341
1342     dataptr++;             /* advance pointer to next column */
1343     }
1344 }
1345