]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
* Ported Glide and MGA plugins to the new module API. MGA never worked,
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.7 2001/02/20 07:49:13 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include "defs.h"
31
32 #include <stdlib.h>
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "tests.h"
39
40 #include "video.h"
41 #include "video_output.h"
42
43 #include "video_decoder.h"
44
45 #include "modules.h"
46
47 #include "idct.h"
48
49 /*****************************************************************************
50  * Local and extern prototypes.
51  *****************************************************************************/
52 static void idct_getfunctions( function_list_t * p_function_list );
53 static int  idct_Probe      ( probedata_t *p_data );
54 static void vdec_NormScan   ( u8 ppi_scan[2][64] );
55
56
57 /*****************************************************************************
58  * Build configuration tree.
59  *****************************************************************************/
60 MODULE_CONFIG_START
61 ADD_WINDOW( "Configuration for IDCT module" )
62     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
63 MODULE_CONFIG_END
64
65 /*****************************************************************************
66  * InitModule: get the module structure and configuration.
67  *****************************************************************************
68  * We have to fill psz_name, psz_longname and psz_version. These variables
69  * will be strdup()ed later by the main application because the module can
70  * be unloaded later to save memory, and we want to be able to access this
71  * data even after the module has been unloaded.
72  *****************************************************************************/
73 MODULE_INIT
74 {
75     p_module->psz_name = MODULE_STRING;
76     p_module->psz_longname = "IDCT module";
77     p_module->psz_version = VERSION;
78
79     p_module->i_capabilities = MODULE_CAPABILITY_NULL
80                                 | MODULE_CAPABILITY_IDCT;
81
82     return( 0 );
83 }
84
85 /*****************************************************************************
86  * ActivateModule: set the module to an usable state.
87  *****************************************************************************
88  * This function fills the capability functions and the configuration
89  * structure. Once ActivateModule() has been called, the i_usage can
90  * be set to 0 and calls to NeedModule() be made to increment it. To unload
91  * the module, one has to wait until i_usage == 0 and call DeactivateModule().
92  *****************************************************************************/
93 MODULE_ACTIVATE
94 {
95     p_module->p_functions = malloc( sizeof( module_functions_t ) );
96     if( p_module->p_functions == NULL )
97     {
98         return( -1 );
99     }
100
101     idct_getfunctions( &p_module->p_functions->idct );
102
103     p_module->p_config = p_config;
104
105     return( 0 );
106 }
107
108 /*****************************************************************************
109  * DeactivateModule: make sure the module can be unloaded.
110  *****************************************************************************
111  * This function must only be called when i_usage == 0. If it successfully
112  * returns, i_usage can be set to -1 and the module unloaded. Be careful to
113  * lock usage_lock during the whole process.
114  *****************************************************************************/
115 MODULE_DEACTIVATE
116 {
117     free( p_module->p_functions );
118
119     return( 0 );
120 }
121
122 /* Following functions are local */
123
124 /*****************************************************************************
125  * Functions exported as capabilities. They are declared as static so that
126  * we don't pollute the namespace too much.
127  *****************************************************************************/
128 static void idct_getfunctions( function_list_t * p_function_list )
129 {
130     p_function_list->pf_probe = idct_Probe;
131     p_function_list->functions.idct.pf_init = vdec_InitIDCT;
132     p_function_list->functions.idct.pf_sparse_idct = vdec_SparseIDCT;
133     p_function_list->functions.idct.pf_idct = vdec_IDCT;
134     p_function_list->functions.idct.pf_norm_scan = vdec_NormScan;
135 }
136
137 /*****************************************************************************
138  * idct_Probe: returns a preference score
139  *****************************************************************************/
140 static int idct_Probe( probedata_t *p_data )
141 {
142     if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
143     {
144         return( 999 );
145     }
146
147     /* This plugin always works */
148     return( 50 );
149 }
150
151 /*****************************************************************************
152  * vdec_NormScan : Unused in this IDCT
153  *****************************************************************************/
154 static void vdec_NormScan( u8 ppi_scan[2][64] )
155 {
156 }
157
158 /*****************************************************************************
159  * vdec_IDCT : IDCT function for normal matrices
160  *****************************************************************************/
161 void vdec_IDCT( vdec_thread_t * p_vdec, dctelem_t * p_block,
162                 int i_idontcare )
163 {
164     s32 tmp0, tmp1, tmp2, tmp3;
165     s32 tmp10, tmp11, tmp12, tmp13;
166     s32 z1, z2, z3, z4, z5;
167     s32 d0, d1, d2, d3, d4, d5, d6, d7;
168     dctelem_t * dataptr;
169     int rowctr;
170
171     SHIFT_TEMPS
172
173     /* Pass 1: process rows. */
174     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
175     /* furthermore, we scale the results by 2**PASS1_BITS. */
176
177     dataptr = p_block;
178
179     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
180     {
181         /* Due to quantization, we will usually find that many of the input
182          * coefficients are zero, especially the AC terms.  We can exploit this
183          * by short-circuiting the IDCT calculation for any row in which all
184          * the AC terms are zero.  In that case each output is equal to the
185          * DC coefficient (with scale factor as needed).
186          * With typical images and quantization tables, half or more of the
187          * row DCT calculations can be simplified this way.
188          */
189
190         register int * idataptr = (int*)dataptr;
191         d0 = dataptr[0];
192         d1 = dataptr[1];
193         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
194         {
195       /* AC terms all zero */
196             if (d0)
197             {
198       /* Compute a 32 bit value to assign. */
199                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
200                 register int v = (dcval & 0xffff) | (dcval << 16);
201
202                 idataptr[0] = v;
203                 idataptr[1] = v;
204                 idataptr[2] = v;
205                 idataptr[3] = v;
206             }
207
208             dataptr += DCTSIZE; /* advance pointer to next row */
209             continue;
210         }
211         d2 = dataptr[2];
212         d3 = dataptr[3];
213         d4 = dataptr[4];
214         d5 = dataptr[5];
215         d6 = dataptr[6];
216         d7 = dataptr[7];
217
218     /* Even part: reverse the even part of the forward DCT. */
219     /* The rotator is sqrt(2)*c(-6). */
220         if (d6)
221         {
222             if (d4)
223             {
224                 if (d2)
225                 {
226                     if (d0)
227                     {
228             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
229                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
230                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
231                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
232
233                         tmp0 = (d0 + d4) << CONST_BITS;
234                         tmp1 = (d0 - d4) << CONST_BITS;
235
236                         tmp10 = tmp0 + tmp3;
237                         tmp13 = tmp0 - tmp3;
238                         tmp11 = tmp1 + tmp2;
239                         tmp12 = tmp1 - tmp2;
240                     }
241                     else
242                     {
243                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
244                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
245                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
246                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
247
248                         tmp0 = d4 << CONST_BITS;
249
250                         tmp10 = tmp0 + tmp3;
251                         tmp13 = tmp0 - tmp3;
252                         tmp11 = tmp2 - tmp0;
253                         tmp12 = -(tmp0 + tmp2);
254                         }
255                 }
256                 else
257                 {
258                     if (d0)
259                     {
260             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
261                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
262                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
263
264                         tmp0 = (d0 + d4) << CONST_BITS;
265                         tmp1 = (d0 - d4) << CONST_BITS;
266
267                         tmp10 = tmp0 + tmp3;
268                         tmp13 = tmp0 - tmp3;
269                         tmp11 = tmp1 + tmp2;
270                         tmp12 = tmp1 - tmp2;
271                         }
272                     else
273                     {
274                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
275                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
276                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
277
278                         tmp0 = d4 << CONST_BITS;
279
280                         tmp10 = tmp0 + tmp3;
281                         tmp13 = tmp0 - tmp3;
282                         tmp11 = tmp2 - tmp0;
283                         tmp12 = -(tmp0 + tmp2);
284                         }
285                 }
286             }
287             else
288             {
289                 if (d2)
290                 {
291                     if (d0)
292                     {
293             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
294                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
295                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
296                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
297
298                         tmp0 = d0 << CONST_BITS;
299
300                         tmp10 = tmp0 + tmp3;
301                         tmp13 = tmp0 - tmp3;
302                         tmp11 = tmp0 + tmp2;
303                         tmp12 = tmp0 - tmp2;
304                     }
305                     else
306                     {
307                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
308                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
309                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
310                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
311
312                         tmp10 = tmp3;
313                         tmp13 = -tmp3;
314                         tmp11 = tmp2;
315                         tmp12 = -tmp2;
316                             }
317                 }
318                 else
319                 {
320                     if (d0)
321                     {
322             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
323                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
324                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
325
326                         tmp0 = d0 << CONST_BITS;
327
328                         tmp10 = tmp0 + tmp3;
329                         tmp13 = tmp0 - tmp3;
330                         tmp11 = tmp0 + tmp2;
331                         tmp12 = tmp0 - tmp2;
332                     }
333                     else
334                     {
335             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
336                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
337                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
338
339                         tmp10 = tmp3;
340                         tmp13 = -tmp3;
341                         tmp11 = tmp2;
342                         tmp12 = -tmp2;
343                     }
344                 }
345             }
346         }
347         else
348         {
349             if (d4)
350             {
351                 if (d2)
352                 {
353                     if (d0)
354                     {
355                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
356                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
357                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
358
359                         tmp0 = (d0 + d4) << CONST_BITS;
360                         tmp1 = (d0 - d4) << CONST_BITS;
361
362                         tmp10 = tmp0 + tmp3;
363                         tmp13 = tmp0 - tmp3;
364                         tmp11 = tmp1 + tmp2;
365                         tmp12 = tmp1 - tmp2;
366                     }
367                     else
368                     {
369             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
370                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
371                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
372
373                         tmp0 = d4 << CONST_BITS;
374
375                         tmp10 = tmp0 + tmp3;
376                         tmp13 = tmp0 - tmp3;
377                         tmp11 = tmp2 - tmp0;
378                         tmp12 = -(tmp0 + tmp2);
379                     }
380                 }
381                 else
382                 {
383                     if (d0)
384                     {
385             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
386                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
387                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
388                     }
389                     else
390                     {
391             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
392                         tmp10 = tmp13 = d4 << CONST_BITS;
393                         tmp11 = tmp12 = -tmp10;
394                     }
395                 }
396             }
397             else
398             {
399                 if (d2)
400                 {
401                     if (d0)
402                     {
403             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
404                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
405                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
406
407                         tmp0 = d0 << CONST_BITS;
408
409                         tmp10 = tmp0 + tmp3;
410                         tmp13 = tmp0 - tmp3;
411                         tmp11 = tmp0 + tmp2;
412                         tmp12 = tmp0 - tmp2;
413                     }
414                     else
415                     {
416             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
417                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
418                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
419
420                         tmp10 = tmp3;
421                         tmp13 = -tmp3;
422                         tmp11 = tmp2;
423                         tmp12 = -tmp2;
424                     }
425                 }
426                 else
427                 {
428                     if (d0)
429                     {
430             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
431                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
432                     }
433                     else
434                     {
435             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
436                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
437                     }
438                 }
439             }
440         }
441
442
443     /* Odd part per figure 8; the matrix is unitary and hence its
444      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
445      */
446
447         if (d7)
448             {
449             if (d5)
450             {
451                 if (d3)
452                 {
453                     if (d1)
454                     {
455             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
456                         z1 = d7 + d1;
457                         z2 = d5 + d3;
458                         z3 = d7 + d3;
459                         z4 = d5 + d1;
460                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
461
462                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
463                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
464                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
465                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
466                         z1 = MULTIPLY(z1, - FIX(0.899976223));
467                         z2 = MULTIPLY(z2, - FIX(2.562915447));
468                         z3 = MULTIPLY(z3, - FIX(1.961570560));
469                         z4 = MULTIPLY(z4, - FIX(0.390180644));
470
471                         z3 += z5;
472                         z4 += z5;
473
474                         tmp0 += z1 + z3;
475                         tmp1 += z2 + z4;
476                         tmp2 += z2 + z3;
477                         tmp3 += z1 + z4;
478                     }
479                     else
480                     {
481             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
482                         z2 = d5 + d3;
483                         z3 = d7 + d3;
484                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
485
486                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
487                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
488                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
489                         z1 = MULTIPLY(d7, - FIX(0.899976223));
490                         z2 = MULTIPLY(z2, - FIX(2.562915447));
491                         z3 = MULTIPLY(z3, - FIX(1.961570560));
492                         z4 = MULTIPLY(d5, - FIX(0.390180644));
493
494                         z3 += z5;
495                         z4 += z5;
496
497                         tmp0 += z1 + z3;
498                         tmp1 += z2 + z4;
499                         tmp2 += z2 + z3;
500                         tmp3 = z1 + z4;
501                         }
502                     }
503                 else
504                 {
505                     if (d1)
506                     {
507             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
508                         z1 = d7 + d1;
509                         z4 = d5 + d1;
510                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
511
512                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
513                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
514                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
515                         z1 = MULTIPLY(z1, - FIX(0.899976223));
516                         z2 = MULTIPLY(d5, - FIX(2.562915447));
517                         z3 = MULTIPLY(d7, - FIX(1.961570560));
518                         z4 = MULTIPLY(z4, - FIX(0.390180644));
519
520                         z3 += z5;
521                         z4 += z5;
522
523                         tmp0 += z1 + z3;
524                         tmp1 += z2 + z4;
525                         tmp2 = z2 + z3;
526                         tmp3 += z1 + z4;
527                     }
528                     else
529                     {
530             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
531                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
532
533                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
534                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
535                         z1 = MULTIPLY(d7, - FIX(0.899976223));
536                         z3 = MULTIPLY(d7, - FIX(1.961570560));
537                         z2 = MULTIPLY(d5, - FIX(2.562915447));
538                         z4 = MULTIPLY(d5, - FIX(0.390180644));
539
540                         z3 += z5;
541                         z4 += z5;
542
543                         tmp0 += z3;
544                         tmp1 += z4;
545                         tmp2 = z2 + z3;
546                         tmp3 = z1 + z4;
547                     }
548                 }
549             }
550             else
551             {
552                 if (d3)
553                 {
554                     if (d1)
555                     {
556             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
557                         z1 = d7 + d1;
558                         z3 = d7 + d3;
559                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
560
561                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
562                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
563                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
564                         z1 = MULTIPLY(z1, - FIX(0.899976223));
565                         z2 = MULTIPLY(d3, - FIX(2.562915447));
566                         z3 = MULTIPLY(z3, - FIX(1.961570560));
567                         z4 = MULTIPLY(d1, - FIX(0.390180644));
568
569                         z3 += z5;
570                         z4 += z5;
571
572                         tmp0 += z1 + z3;
573                         tmp1 = z2 + z4;
574                         tmp2 += z2 + z3;
575                         tmp3 += z1 + z4;
576                     }
577                     else
578                     {
579             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
580                         z3 = d7 + d3;
581                         z5 = MULTIPLY(z3, FIX(1.175875602));
582
583                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
584                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
585                         z1 = MULTIPLY(d7, - FIX(0.899976223));
586                         z2 = MULTIPLY(d3, - FIX(2.562915447));
587                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
588
589                         tmp0 += z3;
590                         tmp1 = z2 + z5;
591                         tmp2 += z3;
592                         tmp3 = z1 + z5;
593                     }
594                 }
595                 else
596                 {
597                     if (d1)
598                     {
599             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
600                         z1 = d7 + d1;
601                         z5 = MULTIPLY(z1, FIX(1.175875602));
602
603                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
604                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
605                         z1 = MULTIPLY(z1, FIX2(0.275899379));
606                         z3 = MULTIPLY(d7, - FIX(1.961570560));
607                         z4 = MULTIPLY(d1, - FIX(0.390180644));
608
609                         tmp0 += z1;
610                         tmp1 = z4 + z5;
611                         tmp2 = z3 + z5;
612                         tmp3 += z1;
613                     }
614                 else
615                     {
616             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
617                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
618                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
619                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
620                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
621                     }
622                 }
623             }
624         }
625         else
626         {
627             if (d5)
628             {
629                 if (d3)
630                 {
631                     if (d1)
632                     {
633             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
634                         z2 = d5 + d3;
635                         z4 = d5 + d1;
636                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
637
638                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
639                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
640                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
641                         z1 = MULTIPLY(d1, - FIX(0.899976223));
642                         z2 = MULTIPLY(z2, - FIX(2.562915447));
643                         z3 = MULTIPLY(d3, - FIX(1.961570560));
644                         z4 = MULTIPLY(z4, - FIX(0.390180644));
645
646                         z3 += z5;
647                         z4 += z5;
648
649                         tmp0 = z1 + z3;
650                         tmp1 += z2 + z4;
651                         tmp2 += z2 + z3;
652                         tmp3 += z1 + z4;
653                     }
654                     else
655                     {
656             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
657                         z2 = d5 + d3;
658                         z5 = MULTIPLY(z2, FIX(1.175875602));
659
660                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
661                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
662                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
663                         z3 = MULTIPLY(d3, - FIX(1.961570560));
664                         z4 = MULTIPLY(d5, - FIX(0.390180644));
665
666                         tmp0 = z3 + z5;
667                         tmp1 += z2;
668                         tmp2 += z2;
669                         tmp3 = z4 + z5;
670                     }
671                 }
672                 else
673                 {
674                     if (d1)
675                     {
676             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
677                         z4 = d5 + d1;
678                         z5 = MULTIPLY(z4, FIX(1.175875602));
679
680                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
681                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
682                         z1 = MULTIPLY(d1, - FIX(0.899976223));
683                         z2 = MULTIPLY(d5, - FIX(2.562915447));
684                         z4 = MULTIPLY(z4, FIX2(0.785694958));
685
686                         tmp0 = z1 + z5;
687                         tmp1 += z4;
688                         tmp2 = z2 + z5;
689                         tmp3 += z4;
690                     }
691                     else
692                     {
693             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
694                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
695                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
696                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
697                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
698                     }
699                 }
700             }
701             else
702             {
703                 if (d3)
704                 {
705                     if (d1)
706                     {
707             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
708                         z5 = d3 + d1;
709
710                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
711                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
712                         z1 = MULTIPLY(d1, FIX(1.061594337));
713                         z2 = MULTIPLY(d3, - FIX(2.172734803));
714                         z4 = MULTIPLY(z5, FIX(0.785694958));
715                         z5 = MULTIPLY(z5, FIX(1.175875602));
716
717                         tmp0 = z1 - z4;
718                         tmp1 = z2 + z4;
719                         tmp2 += z5;
720                         tmp3 += z5;
721                     }
722                     else
723                     {
724             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
725                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
726                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
727                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
728                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
729                     }
730                 }
731                 else
732                 {
733                     if (d1)
734                     {
735             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
736                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
737                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
738                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
739                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
740                     }
741                     else
742                     {
743             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
744                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
745                     }
746                 }
747             }
748         }
749
750     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
751
752         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
753         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
754         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
755         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
756         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
757         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
758         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
759         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
760
761         dataptr += DCTSIZE;              /* advance pointer to next row */
762     }
763
764   /* Pass 2: process columns. */
765   /* Note that we must descale the results by a factor of 8 == 2**3, */
766   /* and also undo the PASS1_BITS scaling. */
767
768     dataptr = p_block;
769     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
770     {
771     /* Columns of zeroes can be exploited in the same way as we did with rows.
772      * However, the row calculation has created many nonzero AC terms, so the
773      * simplification applies less often (typically 5% to 10% of the time).
774      * On machines with very fast multiplication, it's possible that the
775      * test takes more time than it's worth.  In that case this section
776      * may be commented out.
777      */
778
779         d0 = dataptr[DCTSIZE*0];
780         d1 = dataptr[DCTSIZE*1];
781         d2 = dataptr[DCTSIZE*2];
782         d3 = dataptr[DCTSIZE*3];
783         d4 = dataptr[DCTSIZE*4];
784         d5 = dataptr[DCTSIZE*5];
785         d6 = dataptr[DCTSIZE*6];
786         d7 = dataptr[DCTSIZE*7];
787
788     /* Even part: reverse the even part of the forward DCT. */
789     /* The rotator is sqrt(2)*c(-6). */
790         if (d6)
791         {
792             if (d4)
793             {
794                 if (d2)
795                 {
796                     if (d0)
797                     {
798             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
799                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
800                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
801                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
802
803                         tmp0 = (d0 + d4) << CONST_BITS;
804                         tmp1 = (d0 - d4) << CONST_BITS;
805
806                         tmp10 = tmp0 + tmp3;
807                         tmp13 = tmp0 - tmp3;
808                         tmp11 = tmp1 + tmp2;
809                         tmp12 = tmp1 - tmp2;
810                     }
811                     else
812                     {
813             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
814                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
815                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
816                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
817
818                         tmp0 = d4 << CONST_BITS;
819
820                         tmp10 = tmp0 + tmp3;
821                         tmp13 = tmp0 - tmp3;
822                         tmp11 = tmp2 - tmp0;
823                         tmp12 = -(tmp0 + tmp2);
824                     }
825                 }
826                 else
827                 {
828                     if (d0)
829                     {
830             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
831                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
832                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
833
834                         tmp0 = (d0 + d4) << CONST_BITS;
835                         tmp1 = (d0 - d4) << CONST_BITS;
836
837                         tmp10 = tmp0 + tmp3;
838                         tmp13 = tmp0 - tmp3;
839                         tmp11 = tmp1 + tmp2;
840                         tmp12 = tmp1 - tmp2;
841                     }
842                     else
843                     {
844             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
845                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
846                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
847
848                         tmp0 = d4 << CONST_BITS;
849
850                         tmp10 = tmp0 + tmp3;
851                         tmp13 = tmp0 - tmp3;
852                         tmp11 = tmp2 - tmp0;
853                         tmp12 = -(tmp0 + tmp2);
854                     }
855                 }
856             }
857             else
858             {
859                 if (d2)
860                 {
861                     if (d0)
862                     {
863             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
864                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
865                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
866                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
867
868                         tmp0 = d0 << CONST_BITS;
869
870                         tmp10 = tmp0 + tmp3;
871                         tmp13 = tmp0 - tmp3;
872                         tmp11 = tmp0 + tmp2;
873                         tmp12 = tmp0 - tmp2;
874                     }
875                     else
876                     {
877             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
878                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
879                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
880                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
881
882                         tmp10 = tmp3;
883                         tmp13 = -tmp3;
884                         tmp11 = tmp2;
885                         tmp12 = -tmp2;
886                     }
887                 }
888                 else
889                 {
890                     if (d0)
891                     {
892             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
893                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
894                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
895
896                     tmp0 = d0 << CONST_BITS;
897
898                     tmp10 = tmp0 + tmp3;
899                     tmp13 = tmp0 - tmp3;
900                     tmp11 = tmp0 + tmp2;
901                     tmp12 = tmp0 - tmp2;
902                 }
903                 else
904                 {
905             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
906                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
907                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
908                     tmp10 = tmp3;
909                     tmp13 = -tmp3;
910                     tmp11 = tmp2;
911                     tmp12 = -tmp2;
912                 }
913             }
914         }
915     }
916     else
917     {
918         if (d4)
919         {
920             if (d2)
921             {
922                 if (d0)
923                 {
924             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
925                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
926                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
927
928                     tmp0 = (d0 + d4) << CONST_BITS;
929                     tmp1 = (d0 - d4) << CONST_BITS;
930
931                     tmp10 = tmp0 + tmp3;
932                     tmp13 = tmp0 - tmp3;
933                     tmp11 = tmp1 + tmp2;
934                     tmp12 = tmp1 - tmp2;
935                 }
936                 else
937                 {
938             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
939                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
940                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
941
942                     tmp0 = d4 << CONST_BITS;
943
944                     tmp10 = tmp0 + tmp3;
945                     tmp13 = tmp0 - tmp3;
946                     tmp11 = tmp2 - tmp0;
947                     tmp12 = -(tmp0 + tmp2);
948                 }
949             }
950             else
951             {
952                 if (d0)
953                 {
954             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
955                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
956                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
957                 }
958                 else
959                 {
960             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
961                     tmp10 = tmp13 = d4 << CONST_BITS;
962                     tmp11 = tmp12 = -tmp10;
963                 }
964             }
965         }
966         else
967         {
968         if (d2)
969         {
970             if (d0)
971             {
972             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
973                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
974                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
975
976                     tmp0 = d0 << CONST_BITS;
977
978                     tmp10 = tmp0 + tmp3;
979                     tmp13 = tmp0 - tmp3;
980                     tmp11 = tmp0 + tmp2;
981                     tmp12 = tmp0 - tmp2;
982             }
983             else
984             {
985             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
986                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
987                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
988
989                     tmp10 = tmp3;
990                     tmp13 = -tmp3;
991                     tmp11 = tmp2;
992                     tmp12 = -tmp2;
993             }
994         }
995         else
996         {
997             if (d0)
998                 {
999             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1000                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1001                 }
1002                 else
1003                 {
1004             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1005                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
1006                 }
1007             }
1008         }
1009     }
1010
1011     /* Odd part per figure 8; the matrix is unitary and hence its
1012      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
1013      */
1014     if (d7)
1015     {
1016         if (d5)
1017         {
1018             if (d3)
1019             {
1020                 if (d1)
1021                 {
1022             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1023                     z1 = d7 + d1;
1024                     z2 = d5 + d3;
1025                     z3 = d7 + d3;
1026                     z4 = d5 + d1;
1027                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1028
1029                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1030                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1031                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1032                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1033                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1034                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1035                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1036                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1037
1038                     z3 += z5;
1039                     z4 += z5;
1040
1041                     tmp0 += z1 + z3;
1042                     tmp1 += z2 + z4;
1043                     tmp2 += z2 + z3;
1044                     tmp3 += z1 + z4;
1045                 }
1046                 else
1047                 {
1048             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1049                     z2 = d5 + d3;
1050                     z3 = d7 + d3;
1051                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1052
1053                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1054                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1055                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1056                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1057                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1058                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1059                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1060
1061                     z3 += z5;
1062                     z4 += z5;
1063
1064                     tmp0 += z1 + z3;
1065                     tmp1 += z2 + z4;
1066                     tmp2 += z2 + z3;
1067                     tmp3 = z1 + z4;
1068                 }
1069             }
1070             else
1071             {
1072                 if (d1)
1073                 {
1074             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1075                     z1 = d7 + d1;
1076                     z4 = d5 + d1;
1077                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1078
1079                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1080                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1081                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1082                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1083                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1084                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1085                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1086
1087                     z3 += z5;
1088                     z4 += z5;
1089
1090                     tmp0 += z1 + z3;
1091                     tmp1 += z2 + z4;
1092                     tmp2 = z2 + z3;
1093                     tmp3 += z1 + z4;
1094                 }
1095                 else
1096                 {
1097             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1098                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1099
1100                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1101                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1102                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1103                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1104                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1105                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1106
1107                     z3 += z5;
1108                     z4 += z5;
1109
1110                     tmp0 += z3;
1111                     tmp1 += z4;
1112                     tmp2 = z2 + z3;
1113                     tmp3 = z1 + z4;
1114                 }
1115             }
1116         }
1117         else
1118         {
1119             if (d3)
1120             {
1121                 if (d1)
1122                 {
1123             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1124                     z1 = d7 + d1;
1125                     z3 = d7 + d3;
1126                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1127
1128                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1129                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1130                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1131                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1132                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1133                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1134                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1135
1136                     z3 += z5;
1137                     z4 += z5;
1138
1139                     tmp0 += z1 + z3;
1140                     tmp1 = z2 + z4;
1141                     tmp2 += z2 + z3;
1142                     tmp3 += z1 + z4;
1143                 }
1144                 else
1145                 {
1146             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1147                     z3 = d7 + d3;
1148                     z5 = MULTIPLY(z3, FIX(1.175875602));
1149
1150                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1151                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1152                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1153                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1154                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1155
1156                     tmp0 += z3;
1157                     tmp1 = z2 + z5;
1158                     tmp2 += z3;
1159                     tmp3 = z1 + z5;
1160                 }
1161             }
1162             else
1163             {
1164                 if (d1)
1165                 {
1166             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1167                     z1 = d7 + d1;
1168                     z5 = MULTIPLY(z1, FIX(1.175875602));
1169
1170                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1171                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1172                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1173                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1174                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1175
1176                     tmp0 += z1;
1177                     tmp1 = z4 + z5;
1178                     tmp2 = z3 + z5;
1179                     tmp3 += z1;
1180                 }
1181                 else
1182                 {
1183             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1184                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1185                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1186                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1187                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1188                 }
1189             }
1190         }
1191     }
1192     else
1193     {
1194         if (d5)
1195         {
1196             if (d3)
1197             {
1198                 if (d1)
1199                 {
1200             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1201                     z2 = d5 + d3;
1202                     z4 = d5 + d1;
1203                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1204
1205                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1206                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1207                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1208                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1209                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1210                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1211                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1212
1213                     z3 += z5;
1214                     z4 += z5;
1215
1216                     tmp0 = z1 + z3;
1217                     tmp1 += z2 + z4;
1218                     tmp2 += z2 + z3;
1219                     tmp3 += z1 + z4;
1220                 }
1221                 else
1222                 {
1223             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1224                     z2 = d5 + d3;
1225                     z5 = MULTIPLY(z2, FIX(1.175875602));
1226
1227                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1228                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1229                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1230                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1231                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1232
1233                     tmp0 = z3 + z5;
1234                     tmp1 += z2;
1235                     tmp2 += z2;
1236                     tmp3 = z4 + z5;
1237                 }
1238             }
1239             else
1240             {
1241                 if (d1)
1242                 {
1243             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1244                     z4 = d5 + d1;
1245                     z5 = MULTIPLY(z4, FIX(1.175875602));
1246
1247                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1248                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1249                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1250                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1251                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1252
1253                     tmp0 = z1 + z5;
1254                     tmp1 += z4;
1255                     tmp2 = z2 + z5;
1256                     tmp3 += z4;
1257                 }
1258                 else
1259                 {
1260             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1261                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1262                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1263                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1264                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1265                 }
1266             }
1267         }
1268         else
1269         {
1270             if (d3)
1271             {
1272                 if (d1)
1273                 {
1274             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1275                     z5 = d3 + d1;
1276
1277                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1278                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1279                     z1 = MULTIPLY(d1, FIX(1.061594337));
1280                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1281                     z4 = MULTIPLY(z5, FIX(0.785694958));
1282                     z5 = MULTIPLY(z5, FIX(1.175875602));
1283
1284                     tmp0 = z1 - z4;
1285                     tmp1 = z2 + z4;
1286                     tmp2 += z5;
1287                     tmp3 += z5;
1288                 }
1289                 else
1290                 {
1291             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1292                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1293                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1294                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1295                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1296                 }
1297             }
1298             else
1299             {
1300                 if (d1)
1301                 {
1302             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1303                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1304                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1305                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1306                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1307                 }
1308                 else
1309                 {
1310             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1311                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1312                 }
1313             }
1314         }
1315     }
1316
1317     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1318
1319     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1320                        CONST_BITS+PASS1_BITS+3);
1321     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1322                        CONST_BITS+PASS1_BITS+3);
1323     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1324                        CONST_BITS+PASS1_BITS+3);
1325     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1326                        CONST_BITS+PASS1_BITS+3);
1327     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1328                        CONST_BITS+PASS1_BITS+3);
1329     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1330                        CONST_BITS+PASS1_BITS+3);
1331     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1332                        CONST_BITS+PASS1_BITS+3);
1333     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1334                        CONST_BITS+PASS1_BITS+3);
1335
1336     dataptr++;             /* advance pointer to next column */
1337     }
1338 }
1339