]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
. fixed the bug-that-made-the-vlc-segfault-on-exit, which means that
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.2 2001/01/15 06:18:23 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include <stdlib.h>
32
33 #include "config.h"
34 #include "common.h"
35 #include "threads.h"
36 #include "mtime.h"
37
38 #include "video.h"
39 #include "video_output.h"
40
41 #include "video_decoder.h"
42
43 #include "modules.h"
44 #include "modules_inner.h"
45
46 #include "idct.h"
47
48 /*****************************************************************************
49  * Local and extern prototypes.
50  *****************************************************************************/
51 static void idct_getfunctions( function_list_t * p_function_list );
52
53 static int  idct_Probe      ( probedata_t *p_data );
54 static void vdec_InitIDCT   ( vdec_thread_t * p_vdec);
55        void vdec_SparseIDCT ( vdec_thread_t * p_vdec, dctelem_t * p_block,
56                               int i_sparse_pos);
57 static void vdec_IDCT       ( vdec_thread_t * p_vdec, dctelem_t * p_block,
58                               int i_idontcare );
59
60
61 /*****************************************************************************
62  * Build configuration tree.
63  *****************************************************************************/
64 MODULE_CONFIG_START
65 ADD_WINDOW( "Configuration for IDCT module" )
66     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
67 MODULE_CONFIG_END
68
69 /*****************************************************************************
70  * InitModule: get the module structure and configuration.
71  *****************************************************************************
72  * We have to fill psz_name, psz_longname and psz_version. These variables
73  * will be strdup()ed later by the main application because the module can
74  * be unloaded later to save memory, and we want to be able to access this
75  * data even after the module has been unloaded.
76  *****************************************************************************/
77 int InitModule( module_t * p_module )
78 {
79     p_module->psz_name = MODULE_STRING;
80     p_module->psz_longname = "C IDCT module";
81     p_module->psz_version = VERSION;
82
83     p_module->i_capabilities = MODULE_CAPABILITY_NULL
84                                 | MODULE_CAPABILITY_IDCT;
85
86     return( 0 );
87 }
88
89 /*****************************************************************************
90  * ActivateModule: set the module to an usable state.
91  *****************************************************************************
92  * This function fills the capability functions and the configuration
93  * structure. Once ActivateModule() has been called, the i_usage can
94  * be set to 0 and calls to NeedModule() be made to increment it. To unload
95  * the module, one has to wait until i_usage == 0 and call DeactivateModule().
96  *****************************************************************************/
97 int ActivateModule( module_t * p_module )
98 {
99     p_module->p_functions = malloc( sizeof( module_functions_t ) );
100     if( p_module->p_functions == NULL )
101     {
102         return( -1 );
103     }
104
105     idct_getfunctions( &p_module->p_functions->idct );
106
107     p_module->p_config = p_config;
108
109     return( 0 );
110 }
111
112 /*****************************************************************************
113  * DeactivateModule: make sure the module can be unloaded.
114  *****************************************************************************
115  * This function must only be called when i_usage == 0. If it successfully
116  * returns, i_usage can be set to -1 and the module unloaded. Be careful to
117  * lock usage_lock during the whole process.
118  *****************************************************************************/
119 int DeactivateModule( module_t * p_module )
120 {
121     free( p_module->p_functions );
122
123     return( 0 );
124 }
125
126 /* Following functions are local */
127
128 /*****************************************************************************
129  * Functions exported as capabilities. They are declared as static so that
130  * we don't pollute the namespace too much.
131  *****************************************************************************/
132 static void idct_getfunctions( function_list_t * p_function_list )
133 {
134     p_function_list->pf_probe = idct_Probe;
135     p_function_list->functions.idct.pf_init = vdec_InitIDCT;
136     p_function_list->functions.idct.pf_sparse_idct = vdec_SparseIDCT;
137     p_function_list->functions.idct.pf_idct = vdec_IDCT;
138 }
139
140 /*****************************************************************************
141  * idct_Probe: returns a preference score
142  *****************************************************************************/
143 static int idct_Probe( probedata_t *p_data )
144 {
145     /* This plugin always works */
146     return( 100 );
147 }
148
149 /*****************************************************************************
150  * vdec_InitIDCT : initialize datas for vdec_SparseIDCT
151  *****************************************************************************/
152 static void vdec_InitIDCT (vdec_thread_t * p_vdec)
153 {
154     int i;
155
156     dctelem_t * p_pre = p_vdec->p_pre_idct;
157     memset( p_pre, 0, 64*64*sizeof(dctelem_t) );
158
159     for( i=0 ; i < 64 ; i++ )
160     {
161         p_pre[i*64+i] = 1 << SPARSE_SCALE_FACTOR;
162         vdec_IDCT( p_vdec, &p_pre[i*64], 0) ;
163     }
164     return;
165 }
166
167 /*****************************************************************************
168  * vdec_IDCT : IDCT function for normal matrices
169  *****************************************************************************/
170 static void vdec_IDCT( vdec_thread_t * p_vdec, dctelem_t * p_block,
171                        int i_idontcare )
172 {
173     s32 tmp0, tmp1, tmp2, tmp3;
174     s32 tmp10, tmp11, tmp12, tmp13;
175     s32 z1, z2, z3, z4, z5;
176     s32 d0, d1, d2, d3, d4, d5, d6, d7;
177     dctelem_t * dataptr;
178     int rowctr;
179
180     SHIFT_TEMPS
181
182     /* Pass 1: process rows. */
183     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
184     /* furthermore, we scale the results by 2**PASS1_BITS. */
185
186     dataptr = p_block;
187
188     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
189     {
190         /* Due to quantization, we will usually find that many of the input
191          * coefficients are zero, especially the AC terms.  We can exploit this
192          * by short-circuiting the IDCT calculation for any row in which all
193          * the AC terms are zero.  In that case each output is equal to the
194          * DC coefficient (with scale factor as needed).
195          * With typical images and quantization tables, half or more of the
196          * row DCT calculations can be simplified this way.
197          */
198
199         register int * idataptr = (int*)dataptr;
200         d0 = dataptr[0];
201         d1 = dataptr[1];
202         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
203         {
204       /* AC terms all zero */
205             if (d0)
206             {
207       /* Compute a 32 bit value to assign. */
208                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
209                 register int v = (dcval & 0xffff) | (dcval << 16);
210
211                 idataptr[0] = v;
212                 idataptr[1] = v;
213                 idataptr[2] = v;
214                 idataptr[3] = v;
215             }
216
217             dataptr += DCTSIZE; /* advance pointer to next row */
218             continue;
219         }
220         d2 = dataptr[2];
221         d3 = dataptr[3];
222         d4 = dataptr[4];
223         d5 = dataptr[5];
224         d6 = dataptr[6];
225         d7 = dataptr[7];
226
227     /* Even part: reverse the even part of the forward DCT. */
228     /* The rotator is sqrt(2)*c(-6). */
229         if (d6)
230         {
231             if (d4)
232             {
233                 if (d2)
234                 {
235                     if (d0)
236                     {
237             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
238                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
239                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
240                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
241
242                         tmp0 = (d0 + d4) << CONST_BITS;
243                         tmp1 = (d0 - d4) << CONST_BITS;
244
245                         tmp10 = tmp0 + tmp3;
246                         tmp13 = tmp0 - tmp3;
247                         tmp11 = tmp1 + tmp2;
248                         tmp12 = tmp1 - tmp2;
249                     }
250                     else
251                     {
252                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
253                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
254                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
255                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
256
257                         tmp0 = d4 << CONST_BITS;
258
259                         tmp10 = tmp0 + tmp3;
260                         tmp13 = tmp0 - tmp3;
261                         tmp11 = tmp2 - tmp0;
262                         tmp12 = -(tmp0 + tmp2);
263                         }
264                 }
265                 else
266                 {
267                     if (d0)
268                     {
269             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
270                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
271                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
272
273                         tmp0 = (d0 + d4) << CONST_BITS;
274                         tmp1 = (d0 - d4) << CONST_BITS;
275
276                         tmp10 = tmp0 + tmp3;
277                         tmp13 = tmp0 - tmp3;
278                         tmp11 = tmp1 + tmp2;
279                         tmp12 = tmp1 - tmp2;
280                         }
281                     else
282                     {
283                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
284                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
285                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
286
287                         tmp0 = d4 << CONST_BITS;
288
289                         tmp10 = tmp0 + tmp3;
290                         tmp13 = tmp0 - tmp3;
291                         tmp11 = tmp2 - tmp0;
292                         tmp12 = -(tmp0 + tmp2);
293                         }
294                 }
295             }
296             else
297             {
298                 if (d2)
299                 {
300                     if (d0)
301                     {
302             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
303                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
304                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
305                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
306
307                         tmp0 = d0 << CONST_BITS;
308
309                         tmp10 = tmp0 + tmp3;
310                         tmp13 = tmp0 - tmp3;
311                         tmp11 = tmp0 + tmp2;
312                         tmp12 = tmp0 - tmp2;
313                     }
314                     else
315                     {
316                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
317                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
318                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
319                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
320
321                         tmp10 = tmp3;
322                         tmp13 = -tmp3;
323                         tmp11 = tmp2;
324                         tmp12 = -tmp2;
325                             }
326                 }
327                 else
328                 {
329                     if (d0)
330                     {
331             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
332                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
333                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
334
335                         tmp0 = d0 << CONST_BITS;
336
337                         tmp10 = tmp0 + tmp3;
338                         tmp13 = tmp0 - tmp3;
339                         tmp11 = tmp0 + tmp2;
340                         tmp12 = tmp0 - tmp2;
341                     }
342                     else
343                     {
344             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
345                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
346                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
347
348                         tmp10 = tmp3;
349                         tmp13 = -tmp3;
350                         tmp11 = tmp2;
351                         tmp12 = -tmp2;
352                     }
353                 }
354             }
355         }
356         else
357         {
358             if (d4)
359             {
360                 if (d2)
361                 {
362                     if (d0)
363                     {
364                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
365                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
366                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
367
368                         tmp0 = (d0 + d4) << CONST_BITS;
369                         tmp1 = (d0 - d4) << CONST_BITS;
370
371                         tmp10 = tmp0 + tmp3;
372                         tmp13 = tmp0 - tmp3;
373                         tmp11 = tmp1 + tmp2;
374                         tmp12 = tmp1 - tmp2;
375                     }
376                     else
377                     {
378             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
379                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
380                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
381
382                         tmp0 = d4 << CONST_BITS;
383
384                         tmp10 = tmp0 + tmp3;
385                         tmp13 = tmp0 - tmp3;
386                         tmp11 = tmp2 - tmp0;
387                         tmp12 = -(tmp0 + tmp2);
388                     }
389                 }
390                 else
391                 {
392                     if (d0)
393                     {
394             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
395                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
396                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
397                     }
398                     else
399                     {
400             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
401                         tmp10 = tmp13 = d4 << CONST_BITS;
402                         tmp11 = tmp12 = -tmp10;
403                     }
404                 }
405             }
406             else
407             {
408                 if (d2)
409                 {
410                     if (d0)
411                     {
412             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
413                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
414                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
415
416                         tmp0 = d0 << CONST_BITS;
417
418                         tmp10 = tmp0 + tmp3;
419                         tmp13 = tmp0 - tmp3;
420                         tmp11 = tmp0 + tmp2;
421                         tmp12 = tmp0 - tmp2;
422                     }
423                     else
424                     {
425             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
426                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
427                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
428
429                         tmp10 = tmp3;
430                         tmp13 = -tmp3;
431                         tmp11 = tmp2;
432                         tmp12 = -tmp2;
433                     }
434                 }
435                 else
436                 {
437                     if (d0)
438                     {
439             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
440                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
441                     }
442                     else
443                     {
444             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
445                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
446                     }
447                 }
448             }
449         }
450
451
452     /* Odd part per figure 8; the matrix is unitary and hence its
453      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
454      */
455
456         if (d7)
457             {
458             if (d5)
459             {
460                 if (d3)
461                 {
462                     if (d1)
463                     {
464             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
465                         z1 = d7 + d1;
466                         z2 = d5 + d3;
467                         z3 = d7 + d3;
468                         z4 = d5 + d1;
469                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
470
471                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
472                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
473                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
474                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
475                         z1 = MULTIPLY(z1, - FIX(0.899976223));
476                         z2 = MULTIPLY(z2, - FIX(2.562915447));
477                         z3 = MULTIPLY(z3, - FIX(1.961570560));
478                         z4 = MULTIPLY(z4, - FIX(0.390180644));
479
480                         z3 += z5;
481                         z4 += z5;
482
483                         tmp0 += z1 + z3;
484                         tmp1 += z2 + z4;
485                         tmp2 += z2 + z3;
486                         tmp3 += z1 + z4;
487                     }
488                     else
489                     {
490             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
491                         z2 = d5 + d3;
492                         z3 = d7 + d3;
493                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
494
495                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
496                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
497                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
498                         z1 = MULTIPLY(d7, - FIX(0.899976223));
499                         z2 = MULTIPLY(z2, - FIX(2.562915447));
500                         z3 = MULTIPLY(z3, - FIX(1.961570560));
501                         z4 = MULTIPLY(d5, - FIX(0.390180644));
502
503                         z3 += z5;
504                         z4 += z5;
505
506                         tmp0 += z1 + z3;
507                         tmp1 += z2 + z4;
508                         tmp2 += z2 + z3;
509                         tmp3 = z1 + z4;
510                         }
511                     }
512                 else
513                 {
514                     if (d1)
515                     {
516             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
517                         z1 = d7 + d1;
518                         z4 = d5 + d1;
519                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
520
521                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
522                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
523                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
524                         z1 = MULTIPLY(z1, - FIX(0.899976223));
525                         z2 = MULTIPLY(d5, - FIX(2.562915447));
526                         z3 = MULTIPLY(d7, - FIX(1.961570560));
527                         z4 = MULTIPLY(z4, - FIX(0.390180644));
528
529                         z3 += z5;
530                         z4 += z5;
531
532                         tmp0 += z1 + z3;
533                         tmp1 += z2 + z4;
534                         tmp2 = z2 + z3;
535                         tmp3 += z1 + z4;
536                     }
537                     else
538                     {
539             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
540                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
541
542                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
543                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
544                         z1 = MULTIPLY(d7, - FIX(0.899976223));
545                         z3 = MULTIPLY(d7, - FIX(1.961570560));
546                         z2 = MULTIPLY(d5, - FIX(2.562915447));
547                         z4 = MULTIPLY(d5, - FIX(0.390180644));
548
549                         z3 += z5;
550                         z4 += z5;
551
552                         tmp0 += z3;
553                         tmp1 += z4;
554                         tmp2 = z2 + z3;
555                         tmp3 = z1 + z4;
556                     }
557                 }
558             }
559             else
560             {
561                 if (d3)
562                 {
563                     if (d1)
564                     {
565             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
566                         z1 = d7 + d1;
567                         z3 = d7 + d3;
568                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
569
570                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
571                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
572                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
573                         z1 = MULTIPLY(z1, - FIX(0.899976223));
574                         z2 = MULTIPLY(d3, - FIX(2.562915447));
575                         z3 = MULTIPLY(z3, - FIX(1.961570560));
576                         z4 = MULTIPLY(d1, - FIX(0.390180644));
577
578                         z3 += z5;
579                         z4 += z5;
580
581                         tmp0 += z1 + z3;
582                         tmp1 = z2 + z4;
583                         tmp2 += z2 + z3;
584                         tmp3 += z1 + z4;
585                     }
586                     else
587                     {
588             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
589                         z3 = d7 + d3;
590                         z5 = MULTIPLY(z3, FIX(1.175875602));
591
592                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
593                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
594                         z1 = MULTIPLY(d7, - FIX(0.899976223));
595                         z2 = MULTIPLY(d3, - FIX(2.562915447));
596                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
597
598                         tmp0 += z3;
599                         tmp1 = z2 + z5;
600                         tmp2 += z3;
601                         tmp3 = z1 + z5;
602                     }
603                 }
604                 else
605                 {
606                     if (d1)
607                     {
608             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
609                         z1 = d7 + d1;
610                         z5 = MULTIPLY(z1, FIX(1.175875602));
611
612                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
613                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
614                         z1 = MULTIPLY(z1, FIX2(0.275899379));
615                         z3 = MULTIPLY(d7, - FIX(1.961570560));
616                         z4 = MULTIPLY(d1, - FIX(0.390180644));
617
618                         tmp0 += z1;
619                         tmp1 = z4 + z5;
620                         tmp2 = z3 + z5;
621                         tmp3 += z1;
622                     }
623                 else
624                     {
625             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
626                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
627                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
628                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
629                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
630                     }
631                 }
632             }
633         }
634         else
635         {
636             if (d5)
637             {
638                 if (d3)
639                 {
640                     if (d1)
641                     {
642             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
643                         z2 = d5 + d3;
644                         z4 = d5 + d1;
645                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
646
647                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
648                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
649                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
650                         z1 = MULTIPLY(d1, - FIX(0.899976223));
651                         z2 = MULTIPLY(z2, - FIX(2.562915447));
652                         z3 = MULTIPLY(d3, - FIX(1.961570560));
653                         z4 = MULTIPLY(z4, - FIX(0.390180644));
654
655                         z3 += z5;
656                         z4 += z5;
657
658                         tmp0 = z1 + z3;
659                         tmp1 += z2 + z4;
660                         tmp2 += z2 + z3;
661                         tmp3 += z1 + z4;
662                     }
663                     else
664                     {
665             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
666                         z2 = d5 + d3;
667                         z5 = MULTIPLY(z2, FIX(1.175875602));
668
669                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
670                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
671                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
672                         z3 = MULTIPLY(d3, - FIX(1.961570560));
673                         z4 = MULTIPLY(d5, - FIX(0.390180644));
674
675                         tmp0 = z3 + z5;
676                         tmp1 += z2;
677                         tmp2 += z2;
678                         tmp3 = z4 + z5;
679                     }
680                 }
681                 else
682                 {
683                     if (d1)
684                     {
685             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
686                         z4 = d5 + d1;
687                         z5 = MULTIPLY(z4, FIX(1.175875602));
688
689                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
690                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
691                         z1 = MULTIPLY(d1, - FIX(0.899976223));
692                         z2 = MULTIPLY(d5, - FIX(2.562915447));
693                         z4 = MULTIPLY(z4, FIX2(0.785694958));
694
695                         tmp0 = z1 + z5;
696                         tmp1 += z4;
697                         tmp2 = z2 + z5;
698                         tmp3 += z4;
699                     }
700                     else
701                     {
702             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
703                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
704                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
705                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
706                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
707                     }
708                 }
709             }
710             else
711             {
712                 if (d3)
713                 {
714                     if (d1)
715                     {
716             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
717                         z5 = d3 + d1;
718
719                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
720                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
721                         z1 = MULTIPLY(d1, FIX(1.061594337));
722                         z2 = MULTIPLY(d3, - FIX(2.172734803));
723                         z4 = MULTIPLY(z5, FIX(0.785694958));
724                         z5 = MULTIPLY(z5, FIX(1.175875602));
725
726                         tmp0 = z1 - z4;
727                         tmp1 = z2 + z4;
728                         tmp2 += z5;
729                         tmp3 += z5;
730                     }
731                     else
732                     {
733             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
734                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
735                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
736                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
737                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
738                     }
739                 }
740                 else
741                 {
742                     if (d1)
743                     {
744             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
745                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
746                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
747                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
748                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
749                     }
750                     else
751                     {
752             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
753                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
754                     }
755                 }
756             }
757         }
758
759     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
760
761         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
762         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
763         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
764         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
765         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
766         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
767         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
768         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
769
770         dataptr += DCTSIZE;              /* advance pointer to next row */
771     }
772
773   /* Pass 2: process columns. */
774   /* Note that we must descale the results by a factor of 8 == 2**3, */
775   /* and also undo the PASS1_BITS scaling. */
776
777     dataptr = p_block;
778     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
779     {
780     /* Columns of zeroes can be exploited in the same way as we did with rows.
781      * However, the row calculation has created many nonzero AC terms, so the
782      * simplification applies less often (typically 5% to 10% of the time).
783      * On machines with very fast multiplication, it's possible that the
784      * test takes more time than it's worth.  In that case this section
785      * may be commented out.
786      */
787
788         d0 = dataptr[DCTSIZE*0];
789         d1 = dataptr[DCTSIZE*1];
790         d2 = dataptr[DCTSIZE*2];
791         d3 = dataptr[DCTSIZE*3];
792         d4 = dataptr[DCTSIZE*4];
793         d5 = dataptr[DCTSIZE*5];
794         d6 = dataptr[DCTSIZE*6];
795         d7 = dataptr[DCTSIZE*7];
796
797     /* Even part: reverse the even part of the forward DCT. */
798     /* The rotator is sqrt(2)*c(-6). */
799         if (d6)
800         {
801             if (d4)
802             {
803                 if (d2)
804                 {
805                     if (d0)
806                     {
807             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
808                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
809                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
810                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
811
812                         tmp0 = (d0 + d4) << CONST_BITS;
813                         tmp1 = (d0 - d4) << CONST_BITS;
814
815                         tmp10 = tmp0 + tmp3;
816                         tmp13 = tmp0 - tmp3;
817                         tmp11 = tmp1 + tmp2;
818                         tmp12 = tmp1 - tmp2;
819                     }
820                     else
821                     {
822             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
823                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
824                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
825                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
826
827                         tmp0 = d4 << CONST_BITS;
828
829                         tmp10 = tmp0 + tmp3;
830                         tmp13 = tmp0 - tmp3;
831                         tmp11 = tmp2 - tmp0;
832                         tmp12 = -(tmp0 + tmp2);
833                     }
834                 }
835                 else
836                 {
837                     if (d0)
838                     {
839             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
840                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
841                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
842
843                         tmp0 = (d0 + d4) << CONST_BITS;
844                         tmp1 = (d0 - d4) << CONST_BITS;
845
846                         tmp10 = tmp0 + tmp3;
847                         tmp13 = tmp0 - tmp3;
848                         tmp11 = tmp1 + tmp2;
849                         tmp12 = tmp1 - tmp2;
850                     }
851                     else
852                     {
853             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
854                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
855                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
856
857                         tmp0 = d4 << CONST_BITS;
858
859                         tmp10 = tmp0 + tmp3;
860                         tmp13 = tmp0 - tmp3;
861                         tmp11 = tmp2 - tmp0;
862                         tmp12 = -(tmp0 + tmp2);
863                     }
864                 }
865             }
866             else
867             {
868                 if (d2)
869                 {
870                     if (d0)
871                     {
872             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
873                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
874                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
875                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
876
877                         tmp0 = d0 << CONST_BITS;
878
879                         tmp10 = tmp0 + tmp3;
880                         tmp13 = tmp0 - tmp3;
881                         tmp11 = tmp0 + tmp2;
882                         tmp12 = tmp0 - tmp2;
883                     }
884                     else
885                     {
886             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
887                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
888                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
889                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
890
891                         tmp10 = tmp3;
892                         tmp13 = -tmp3;
893                         tmp11 = tmp2;
894                         tmp12 = -tmp2;
895                     }
896                 }
897                 else
898                 {
899                     if (d0)
900                     {
901             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
902                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
903                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
904
905                     tmp0 = d0 << CONST_BITS;
906
907                     tmp10 = tmp0 + tmp3;
908                     tmp13 = tmp0 - tmp3;
909                     tmp11 = tmp0 + tmp2;
910                     tmp12 = tmp0 - tmp2;
911                 }
912                 else
913                 {
914             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
915                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
916                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
917                     tmp10 = tmp3;
918                     tmp13 = -tmp3;
919                     tmp11 = tmp2;
920                     tmp12 = -tmp2;
921                 }
922             }
923         }
924     }
925     else
926     {
927         if (d4)
928         {
929             if (d2)
930             {
931                 if (d0)
932                 {
933             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
934                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
935                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
936
937                     tmp0 = (d0 + d4) << CONST_BITS;
938                     tmp1 = (d0 - d4) << CONST_BITS;
939
940                     tmp10 = tmp0 + tmp3;
941                     tmp13 = tmp0 - tmp3;
942                     tmp11 = tmp1 + tmp2;
943                     tmp12 = tmp1 - tmp2;
944                 }
945                 else
946                 {
947             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
948                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
949                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
950
951                     tmp0 = d4 << CONST_BITS;
952
953                     tmp10 = tmp0 + tmp3;
954                     tmp13 = tmp0 - tmp3;
955                     tmp11 = tmp2 - tmp0;
956                     tmp12 = -(tmp0 + tmp2);
957                 }
958             }
959             else
960             {
961                 if (d0)
962                 {
963             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
964                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
965                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
966                 }
967                 else
968                 {
969             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
970                     tmp10 = tmp13 = d4 << CONST_BITS;
971                     tmp11 = tmp12 = -tmp10;
972                 }
973             }
974         }
975         else
976         {
977         if (d2)
978         {
979             if (d0)
980             {
981             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
982                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
983                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
984
985                     tmp0 = d0 << CONST_BITS;
986
987                     tmp10 = tmp0 + tmp3;
988                     tmp13 = tmp0 - tmp3;
989                     tmp11 = tmp0 + tmp2;
990                     tmp12 = tmp0 - tmp2;
991             }
992             else
993             {
994             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
995                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
996                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
997
998                     tmp10 = tmp3;
999                     tmp13 = -tmp3;
1000                     tmp11 = tmp2;
1001                     tmp12 = -tmp2;
1002             }
1003         }
1004         else
1005         {
1006             if (d0)
1007                 {
1008             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1009                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1010                 }
1011                 else
1012                 {
1013             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1014                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
1015                 }
1016             }
1017         }
1018     }
1019
1020     /* Odd part per figure 8; the matrix is unitary and hence its
1021      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
1022      */
1023     if (d7)
1024     {
1025         if (d5)
1026         {
1027             if (d3)
1028             {
1029                 if (d1)
1030                 {
1031             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1032                     z1 = d7 + d1;
1033                     z2 = d5 + d3;
1034                     z3 = d7 + d3;
1035                     z4 = d5 + d1;
1036                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1037
1038                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1039                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1040                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1041                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1042                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1043                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1044                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1045                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1046
1047                     z3 += z5;
1048                     z4 += z5;
1049
1050                     tmp0 += z1 + z3;
1051                     tmp1 += z2 + z4;
1052                     tmp2 += z2 + z3;
1053                     tmp3 += z1 + z4;
1054                 }
1055                 else
1056                 {
1057             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1058                     z2 = d5 + d3;
1059                     z3 = d7 + d3;
1060                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1061
1062                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1063                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1064                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1065                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1066                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1067                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1068                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1069
1070                     z3 += z5;
1071                     z4 += z5;
1072
1073                     tmp0 += z1 + z3;
1074                     tmp1 += z2 + z4;
1075                     tmp2 += z2 + z3;
1076                     tmp3 = z1 + z4;
1077                 }
1078             }
1079             else
1080             {
1081                 if (d1)
1082                 {
1083             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1084                     z1 = d7 + d1;
1085                     z4 = d5 + d1;
1086                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1087
1088                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1089                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1090                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1091                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1092                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1093                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1094                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1095
1096                     z3 += z5;
1097                     z4 += z5;
1098
1099                     tmp0 += z1 + z3;
1100                     tmp1 += z2 + z4;
1101                     tmp2 = z2 + z3;
1102                     tmp3 += z1 + z4;
1103                 }
1104                 else
1105                 {
1106             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1107                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1108
1109                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1110                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1111                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1112                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1113                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1114                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1115
1116                     z3 += z5;
1117                     z4 += z5;
1118
1119                     tmp0 += z3;
1120                     tmp1 += z4;
1121                     tmp2 = z2 + z3;
1122                     tmp3 = z1 + z4;
1123                 }
1124             }
1125         }
1126         else
1127         {
1128             if (d3)
1129             {
1130                 if (d1)
1131                 {
1132             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1133                     z1 = d7 + d1;
1134                     z3 = d7 + d3;
1135                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1136
1137                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1138                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1139                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1140                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1141                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1142                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1143                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1144
1145                     z3 += z5;
1146                     z4 += z5;
1147
1148                     tmp0 += z1 + z3;
1149                     tmp1 = z2 + z4;
1150                     tmp2 += z2 + z3;
1151                     tmp3 += z1 + z4;
1152                 }
1153                 else
1154                 {
1155             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1156                     z3 = d7 + d3;
1157                     z5 = MULTIPLY(z3, FIX(1.175875602));
1158
1159                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1160                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1161                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1162                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1163                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1164
1165                     tmp0 += z3;
1166                     tmp1 = z2 + z5;
1167                     tmp2 += z3;
1168                     tmp3 = z1 + z5;
1169                 }
1170             }
1171             else
1172             {
1173                 if (d1)
1174                 {
1175             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1176                     z1 = d7 + d1;
1177                     z5 = MULTIPLY(z1, FIX(1.175875602));
1178
1179                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1180                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1181                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1182                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1183                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1184
1185                     tmp0 += z1;
1186                     tmp1 = z4 + z5;
1187                     tmp2 = z3 + z5;
1188                     tmp3 += z1;
1189                 }
1190                 else
1191                 {
1192             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1193                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1194                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1195                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1196                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1197                 }
1198             }
1199         }
1200     }
1201     else
1202     {
1203         if (d5)
1204         {
1205             if (d3)
1206             {
1207                 if (d1)
1208                 {
1209             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1210                     z2 = d5 + d3;
1211                     z4 = d5 + d1;
1212                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1213
1214                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1215                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1216                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1217                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1218                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1219                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1220                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1221
1222                     z3 += z5;
1223                     z4 += z5;
1224
1225                     tmp0 = z1 + z3;
1226                     tmp1 += z2 + z4;
1227                     tmp2 += z2 + z3;
1228                     tmp3 += z1 + z4;
1229                 }
1230                 else
1231                 {
1232             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1233                     z2 = d5 + d3;
1234                     z5 = MULTIPLY(z2, FIX(1.175875602));
1235
1236                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1237                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1238                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1239                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1240                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1241
1242                     tmp0 = z3 + z5;
1243                     tmp1 += z2;
1244                     tmp2 += z2;
1245                     tmp3 = z4 + z5;
1246                 }
1247             }
1248             else
1249             {
1250                 if (d1)
1251                 {
1252             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1253                     z4 = d5 + d1;
1254                     z5 = MULTIPLY(z4, FIX(1.175875602));
1255
1256                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1257                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1258                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1259                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1260                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1261
1262                     tmp0 = z1 + z5;
1263                     tmp1 += z4;
1264                     tmp2 = z2 + z5;
1265                     tmp3 += z4;
1266                 }
1267                 else
1268                 {
1269             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1270                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1271                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1272                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1273                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1274                 }
1275             }
1276         }
1277         else
1278         {
1279             if (d3)
1280             {
1281                 if (d1)
1282                 {
1283             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1284                     z5 = d3 + d1;
1285
1286                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1287                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1288                     z1 = MULTIPLY(d1, FIX(1.061594337));
1289                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1290                     z4 = MULTIPLY(z5, FIX(0.785694958));
1291                     z5 = MULTIPLY(z5, FIX(1.175875602));
1292
1293                     tmp0 = z1 - z4;
1294                     tmp1 = z2 + z4;
1295                     tmp2 += z5;
1296                     tmp3 += z5;
1297                 }
1298                 else
1299                 {
1300             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1301                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1302                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1303                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1304                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1305                 }
1306             }
1307             else
1308             {
1309                 if (d1)
1310                 {
1311             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1312                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1313                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1314                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1315                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1316                 }
1317                 else
1318                 {
1319             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1320                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1321                 }
1322             }
1323         }
1324     }
1325
1326     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1327
1328     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1329                        CONST_BITS+PASS1_BITS+3);
1330     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1331                        CONST_BITS+PASS1_BITS+3);
1332     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1333                        CONST_BITS+PASS1_BITS+3);
1334     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1335                        CONST_BITS+PASS1_BITS+3);
1336     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1337                        CONST_BITS+PASS1_BITS+3);
1338     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1339                        CONST_BITS+PASS1_BITS+3);
1340     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1341                        CONST_BITS+PASS1_BITS+3);
1342     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1343                        CONST_BITS+PASS1_BITS+3);
1344
1345     dataptr++;             /* advance pointer to next column */
1346     }
1347 }
1348