]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
b6927cedb250bf7f346fa94ea816675254b375c4
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.4 2001/01/16 05:04:25 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include "defs.h"
30
31 #include <stdlib.h>
32
33 #include "config.h"
34 #include "common.h"
35 #include "threads.h"
36 #include "mtime.h"
37 #include "tests.h"
38
39 #include "video.h"
40 #include "video_output.h"
41
42 #include "video_decoder.h"
43
44 #include "modules.h"
45 #include "modules_inner.h"
46
47 #include "idct.h"
48
49 /*****************************************************************************
50  * Local and extern prototypes.
51  *****************************************************************************/
52 static void idct_getfunctions( function_list_t * p_function_list );
53
54 static int  idct_Probe      ( probedata_t *p_data );
55 static void vdec_InitIDCT   ( vdec_thread_t * p_vdec);
56        void vdec_SparseIDCT ( vdec_thread_t * p_vdec, dctelem_t * p_block,
57                               int i_sparse_pos);
58 static void vdec_IDCT       ( vdec_thread_t * p_vdec, dctelem_t * p_block,
59                               int i_idontcare );
60
61
62 /*****************************************************************************
63  * Build configuration tree.
64  *****************************************************************************/
65 MODULE_CONFIG_START
66 ADD_WINDOW( "Configuration for IDCT module" )
67     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
68 MODULE_CONFIG_END
69
70 /*****************************************************************************
71  * InitModule: get the module structure and configuration.
72  *****************************************************************************
73  * We have to fill psz_name, psz_longname and psz_version. These variables
74  * will be strdup()ed later by the main application because the module can
75  * be unloaded later to save memory, and we want to be able to access this
76  * data even after the module has been unloaded.
77  *****************************************************************************/
78 int InitModule( module_t * p_module )
79 {
80     p_module->psz_name = MODULE_STRING;
81     p_module->psz_longname = "C IDCT module";
82     p_module->psz_version = VERSION;
83
84     p_module->i_capabilities = MODULE_CAPABILITY_NULL
85                                 | MODULE_CAPABILITY_IDCT;
86
87     return( 0 );
88 }
89
90 /*****************************************************************************
91  * ActivateModule: set the module to an usable state.
92  *****************************************************************************
93  * This function fills the capability functions and the configuration
94  * structure. Once ActivateModule() has been called, the i_usage can
95  * be set to 0 and calls to NeedModule() be made to increment it. To unload
96  * the module, one has to wait until i_usage == 0 and call DeactivateModule().
97  *****************************************************************************/
98 int ActivateModule( module_t * p_module )
99 {
100     p_module->p_functions = malloc( sizeof( module_functions_t ) );
101     if( p_module->p_functions == NULL )
102     {
103         return( -1 );
104     }
105
106     idct_getfunctions( &p_module->p_functions->idct );
107
108     p_module->p_config = p_config;
109
110     return( 0 );
111 }
112
113 /*****************************************************************************
114  * DeactivateModule: make sure the module can be unloaded.
115  *****************************************************************************
116  * This function must only be called when i_usage == 0. If it successfully
117  * returns, i_usage can be set to -1 and the module unloaded. Be careful to
118  * lock usage_lock during the whole process.
119  *****************************************************************************/
120 int DeactivateModule( module_t * p_module )
121 {
122     free( p_module->p_functions );
123
124     return( 0 );
125 }
126
127 /* Following functions are local */
128
129 /*****************************************************************************
130  * Functions exported as capabilities. They are declared as static so that
131  * we don't pollute the namespace too much.
132  *****************************************************************************/
133 static void idct_getfunctions( function_list_t * p_function_list )
134 {
135     p_function_list->pf_probe = idct_Probe;
136     p_function_list->functions.idct.pf_init = vdec_InitIDCT;
137     p_function_list->functions.idct.pf_sparse_idct = vdec_SparseIDCT;
138     p_function_list->functions.idct.pf_idct = vdec_IDCT;
139 }
140
141 /*****************************************************************************
142  * idct_Probe: returns a preference score
143  *****************************************************************************/
144 static int idct_Probe( probedata_t *p_data )
145 {
146     if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
147     {
148         return( 999 );
149     }
150
151     /* This plugin always works */
152     return( 50 );
153 }
154
155 /*****************************************************************************
156  * vdec_InitIDCT : initialize datas for vdec_SparseIDCT
157  *****************************************************************************/
158 static void vdec_InitIDCT (vdec_thread_t * p_vdec)
159 {
160     int i;
161
162     dctelem_t * p_pre = p_vdec->p_pre_idct;
163     memset( p_pre, 0, 64*64*sizeof(dctelem_t) );
164
165     for( i=0 ; i < 64 ; i++ )
166     {
167         p_pre[i*64+i] = 1 << SPARSE_SCALE_FACTOR;
168         vdec_IDCT( p_vdec, &p_pre[i*64], 0) ;
169     }
170     return;
171 }
172
173 /*****************************************************************************
174  * vdec_IDCT : IDCT function for normal matrices
175  *****************************************************************************/
176 static void vdec_IDCT( vdec_thread_t * p_vdec, dctelem_t * p_block,
177                        int i_idontcare )
178 {
179     s32 tmp0, tmp1, tmp2, tmp3;
180     s32 tmp10, tmp11, tmp12, tmp13;
181     s32 z1, z2, z3, z4, z5;
182     s32 d0, d1, d2, d3, d4, d5, d6, d7;
183     dctelem_t * dataptr;
184     int rowctr;
185
186     SHIFT_TEMPS
187
188     /* Pass 1: process rows. */
189     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
190     /* furthermore, we scale the results by 2**PASS1_BITS. */
191
192     dataptr = p_block;
193
194     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
195     {
196         /* Due to quantization, we will usually find that many of the input
197          * coefficients are zero, especially the AC terms.  We can exploit this
198          * by short-circuiting the IDCT calculation for any row in which all
199          * the AC terms are zero.  In that case each output is equal to the
200          * DC coefficient (with scale factor as needed).
201          * With typical images and quantization tables, half or more of the
202          * row DCT calculations can be simplified this way.
203          */
204
205         register int * idataptr = (int*)dataptr;
206         d0 = dataptr[0];
207         d1 = dataptr[1];
208         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
209         {
210       /* AC terms all zero */
211             if (d0)
212             {
213       /* Compute a 32 bit value to assign. */
214                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
215                 register int v = (dcval & 0xffff) | (dcval << 16);
216
217                 idataptr[0] = v;
218                 idataptr[1] = v;
219                 idataptr[2] = v;
220                 idataptr[3] = v;
221             }
222
223             dataptr += DCTSIZE; /* advance pointer to next row */
224             continue;
225         }
226         d2 = dataptr[2];
227         d3 = dataptr[3];
228         d4 = dataptr[4];
229         d5 = dataptr[5];
230         d6 = dataptr[6];
231         d7 = dataptr[7];
232
233     /* Even part: reverse the even part of the forward DCT. */
234     /* The rotator is sqrt(2)*c(-6). */
235         if (d6)
236         {
237             if (d4)
238             {
239                 if (d2)
240                 {
241                     if (d0)
242                     {
243             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
244                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
245                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
246                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
247
248                         tmp0 = (d0 + d4) << CONST_BITS;
249                         tmp1 = (d0 - d4) << CONST_BITS;
250
251                         tmp10 = tmp0 + tmp3;
252                         tmp13 = tmp0 - tmp3;
253                         tmp11 = tmp1 + tmp2;
254                         tmp12 = tmp1 - tmp2;
255                     }
256                     else
257                     {
258                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
259                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
260                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
261                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
262
263                         tmp0 = d4 << CONST_BITS;
264
265                         tmp10 = tmp0 + tmp3;
266                         tmp13 = tmp0 - tmp3;
267                         tmp11 = tmp2 - tmp0;
268                         tmp12 = -(tmp0 + tmp2);
269                         }
270                 }
271                 else
272                 {
273                     if (d0)
274                     {
275             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
276                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
277                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
278
279                         tmp0 = (d0 + d4) << CONST_BITS;
280                         tmp1 = (d0 - d4) << CONST_BITS;
281
282                         tmp10 = tmp0 + tmp3;
283                         tmp13 = tmp0 - tmp3;
284                         tmp11 = tmp1 + tmp2;
285                         tmp12 = tmp1 - tmp2;
286                         }
287                     else
288                     {
289                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
290                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
291                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
292
293                         tmp0 = d4 << CONST_BITS;
294
295                         tmp10 = tmp0 + tmp3;
296                         tmp13 = tmp0 - tmp3;
297                         tmp11 = tmp2 - tmp0;
298                         tmp12 = -(tmp0 + tmp2);
299                         }
300                 }
301             }
302             else
303             {
304                 if (d2)
305                 {
306                     if (d0)
307                     {
308             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
309                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
310                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
311                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
312
313                         tmp0 = d0 << CONST_BITS;
314
315                         tmp10 = tmp0 + tmp3;
316                         tmp13 = tmp0 - tmp3;
317                         tmp11 = tmp0 + tmp2;
318                         tmp12 = tmp0 - tmp2;
319                     }
320                     else
321                     {
322                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
323                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
324                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
325                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
326
327                         tmp10 = tmp3;
328                         tmp13 = -tmp3;
329                         tmp11 = tmp2;
330                         tmp12 = -tmp2;
331                             }
332                 }
333                 else
334                 {
335                     if (d0)
336                     {
337             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
338                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
339                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
340
341                         tmp0 = d0 << CONST_BITS;
342
343                         tmp10 = tmp0 + tmp3;
344                         tmp13 = tmp0 - tmp3;
345                         tmp11 = tmp0 + tmp2;
346                         tmp12 = tmp0 - tmp2;
347                     }
348                     else
349                     {
350             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
351                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
352                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
353
354                         tmp10 = tmp3;
355                         tmp13 = -tmp3;
356                         tmp11 = tmp2;
357                         tmp12 = -tmp2;
358                     }
359                 }
360             }
361         }
362         else
363         {
364             if (d4)
365             {
366                 if (d2)
367                 {
368                     if (d0)
369                     {
370                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
371                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
372                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
373
374                         tmp0 = (d0 + d4) << CONST_BITS;
375                         tmp1 = (d0 - d4) << CONST_BITS;
376
377                         tmp10 = tmp0 + tmp3;
378                         tmp13 = tmp0 - tmp3;
379                         tmp11 = tmp1 + tmp2;
380                         tmp12 = tmp1 - tmp2;
381                     }
382                     else
383                     {
384             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
385                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
386                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
387
388                         tmp0 = d4 << CONST_BITS;
389
390                         tmp10 = tmp0 + tmp3;
391                         tmp13 = tmp0 - tmp3;
392                         tmp11 = tmp2 - tmp0;
393                         tmp12 = -(tmp0 + tmp2);
394                     }
395                 }
396                 else
397                 {
398                     if (d0)
399                     {
400             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
401                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
402                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
403                     }
404                     else
405                     {
406             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
407                         tmp10 = tmp13 = d4 << CONST_BITS;
408                         tmp11 = tmp12 = -tmp10;
409                     }
410                 }
411             }
412             else
413             {
414                 if (d2)
415                 {
416                     if (d0)
417                     {
418             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
419                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
420                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
421
422                         tmp0 = d0 << CONST_BITS;
423
424                         tmp10 = tmp0 + tmp3;
425                         tmp13 = tmp0 - tmp3;
426                         tmp11 = tmp0 + tmp2;
427                         tmp12 = tmp0 - tmp2;
428                     }
429                     else
430                     {
431             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
432                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
433                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
434
435                         tmp10 = tmp3;
436                         tmp13 = -tmp3;
437                         tmp11 = tmp2;
438                         tmp12 = -tmp2;
439                     }
440                 }
441                 else
442                 {
443                     if (d0)
444                     {
445             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
446                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
447                     }
448                     else
449                     {
450             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
451                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
452                     }
453                 }
454             }
455         }
456
457
458     /* Odd part per figure 8; the matrix is unitary and hence its
459      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
460      */
461
462         if (d7)
463             {
464             if (d5)
465             {
466                 if (d3)
467                 {
468                     if (d1)
469                     {
470             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
471                         z1 = d7 + d1;
472                         z2 = d5 + d3;
473                         z3 = d7 + d3;
474                         z4 = d5 + d1;
475                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
476
477                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
478                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
479                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
480                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
481                         z1 = MULTIPLY(z1, - FIX(0.899976223));
482                         z2 = MULTIPLY(z2, - FIX(2.562915447));
483                         z3 = MULTIPLY(z3, - FIX(1.961570560));
484                         z4 = MULTIPLY(z4, - FIX(0.390180644));
485
486                         z3 += z5;
487                         z4 += z5;
488
489                         tmp0 += z1 + z3;
490                         tmp1 += z2 + z4;
491                         tmp2 += z2 + z3;
492                         tmp3 += z1 + z4;
493                     }
494                     else
495                     {
496             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
497                         z2 = d5 + d3;
498                         z3 = d7 + d3;
499                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
500
501                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
502                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
503                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
504                         z1 = MULTIPLY(d7, - FIX(0.899976223));
505                         z2 = MULTIPLY(z2, - FIX(2.562915447));
506                         z3 = MULTIPLY(z3, - FIX(1.961570560));
507                         z4 = MULTIPLY(d5, - FIX(0.390180644));
508
509                         z3 += z5;
510                         z4 += z5;
511
512                         tmp0 += z1 + z3;
513                         tmp1 += z2 + z4;
514                         tmp2 += z2 + z3;
515                         tmp3 = z1 + z4;
516                         }
517                     }
518                 else
519                 {
520                     if (d1)
521                     {
522             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
523                         z1 = d7 + d1;
524                         z4 = d5 + d1;
525                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
526
527                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
528                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
529                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
530                         z1 = MULTIPLY(z1, - FIX(0.899976223));
531                         z2 = MULTIPLY(d5, - FIX(2.562915447));
532                         z3 = MULTIPLY(d7, - FIX(1.961570560));
533                         z4 = MULTIPLY(z4, - FIX(0.390180644));
534
535                         z3 += z5;
536                         z4 += z5;
537
538                         tmp0 += z1 + z3;
539                         tmp1 += z2 + z4;
540                         tmp2 = z2 + z3;
541                         tmp3 += z1 + z4;
542                     }
543                     else
544                     {
545             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
546                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
547
548                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
549                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
550                         z1 = MULTIPLY(d7, - FIX(0.899976223));
551                         z3 = MULTIPLY(d7, - FIX(1.961570560));
552                         z2 = MULTIPLY(d5, - FIX(2.562915447));
553                         z4 = MULTIPLY(d5, - FIX(0.390180644));
554
555                         z3 += z5;
556                         z4 += z5;
557
558                         tmp0 += z3;
559                         tmp1 += z4;
560                         tmp2 = z2 + z3;
561                         tmp3 = z1 + z4;
562                     }
563                 }
564             }
565             else
566             {
567                 if (d3)
568                 {
569                     if (d1)
570                     {
571             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
572                         z1 = d7 + d1;
573                         z3 = d7 + d3;
574                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
575
576                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
577                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
578                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
579                         z1 = MULTIPLY(z1, - FIX(0.899976223));
580                         z2 = MULTIPLY(d3, - FIX(2.562915447));
581                         z3 = MULTIPLY(z3, - FIX(1.961570560));
582                         z4 = MULTIPLY(d1, - FIX(0.390180644));
583
584                         z3 += z5;
585                         z4 += z5;
586
587                         tmp0 += z1 + z3;
588                         tmp1 = z2 + z4;
589                         tmp2 += z2 + z3;
590                         tmp3 += z1 + z4;
591                     }
592                     else
593                     {
594             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
595                         z3 = d7 + d3;
596                         z5 = MULTIPLY(z3, FIX(1.175875602));
597
598                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
599                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
600                         z1 = MULTIPLY(d7, - FIX(0.899976223));
601                         z2 = MULTIPLY(d3, - FIX(2.562915447));
602                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
603
604                         tmp0 += z3;
605                         tmp1 = z2 + z5;
606                         tmp2 += z3;
607                         tmp3 = z1 + z5;
608                     }
609                 }
610                 else
611                 {
612                     if (d1)
613                     {
614             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
615                         z1 = d7 + d1;
616                         z5 = MULTIPLY(z1, FIX(1.175875602));
617
618                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
619                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
620                         z1 = MULTIPLY(z1, FIX2(0.275899379));
621                         z3 = MULTIPLY(d7, - FIX(1.961570560));
622                         z4 = MULTIPLY(d1, - FIX(0.390180644));
623
624                         tmp0 += z1;
625                         tmp1 = z4 + z5;
626                         tmp2 = z3 + z5;
627                         tmp3 += z1;
628                     }
629                 else
630                     {
631             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
632                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
633                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
634                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
635                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
636                     }
637                 }
638             }
639         }
640         else
641         {
642             if (d5)
643             {
644                 if (d3)
645                 {
646                     if (d1)
647                     {
648             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
649                         z2 = d5 + d3;
650                         z4 = d5 + d1;
651                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
652
653                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
654                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
655                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
656                         z1 = MULTIPLY(d1, - FIX(0.899976223));
657                         z2 = MULTIPLY(z2, - FIX(2.562915447));
658                         z3 = MULTIPLY(d3, - FIX(1.961570560));
659                         z4 = MULTIPLY(z4, - FIX(0.390180644));
660
661                         z3 += z5;
662                         z4 += z5;
663
664                         tmp0 = z1 + z3;
665                         tmp1 += z2 + z4;
666                         tmp2 += z2 + z3;
667                         tmp3 += z1 + z4;
668                     }
669                     else
670                     {
671             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
672                         z2 = d5 + d3;
673                         z5 = MULTIPLY(z2, FIX(1.175875602));
674
675                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
676                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
677                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
678                         z3 = MULTIPLY(d3, - FIX(1.961570560));
679                         z4 = MULTIPLY(d5, - FIX(0.390180644));
680
681                         tmp0 = z3 + z5;
682                         tmp1 += z2;
683                         tmp2 += z2;
684                         tmp3 = z4 + z5;
685                     }
686                 }
687                 else
688                 {
689                     if (d1)
690                     {
691             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
692                         z4 = d5 + d1;
693                         z5 = MULTIPLY(z4, FIX(1.175875602));
694
695                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
696                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
697                         z1 = MULTIPLY(d1, - FIX(0.899976223));
698                         z2 = MULTIPLY(d5, - FIX(2.562915447));
699                         z4 = MULTIPLY(z4, FIX2(0.785694958));
700
701                         tmp0 = z1 + z5;
702                         tmp1 += z4;
703                         tmp2 = z2 + z5;
704                         tmp3 += z4;
705                     }
706                     else
707                     {
708             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
709                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
710                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
711                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
712                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
713                     }
714                 }
715             }
716             else
717             {
718                 if (d3)
719                 {
720                     if (d1)
721                     {
722             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
723                         z5 = d3 + d1;
724
725                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
726                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
727                         z1 = MULTIPLY(d1, FIX(1.061594337));
728                         z2 = MULTIPLY(d3, - FIX(2.172734803));
729                         z4 = MULTIPLY(z5, FIX(0.785694958));
730                         z5 = MULTIPLY(z5, FIX(1.175875602));
731
732                         tmp0 = z1 - z4;
733                         tmp1 = z2 + z4;
734                         tmp2 += z5;
735                         tmp3 += z5;
736                     }
737                     else
738                     {
739             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
740                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
741                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
742                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
743                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
744                     }
745                 }
746                 else
747                 {
748                     if (d1)
749                     {
750             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
751                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
752                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
753                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
754                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
755                     }
756                     else
757                     {
758             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
759                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
760                     }
761                 }
762             }
763         }
764
765     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
766
767         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
768         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
769         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
770         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
771         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
772         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
773         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
774         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
775
776         dataptr += DCTSIZE;              /* advance pointer to next row */
777     }
778
779   /* Pass 2: process columns. */
780   /* Note that we must descale the results by a factor of 8 == 2**3, */
781   /* and also undo the PASS1_BITS scaling. */
782
783     dataptr = p_block;
784     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
785     {
786     /* Columns of zeroes can be exploited in the same way as we did with rows.
787      * However, the row calculation has created many nonzero AC terms, so the
788      * simplification applies less often (typically 5% to 10% of the time).
789      * On machines with very fast multiplication, it's possible that the
790      * test takes more time than it's worth.  In that case this section
791      * may be commented out.
792      */
793
794         d0 = dataptr[DCTSIZE*0];
795         d1 = dataptr[DCTSIZE*1];
796         d2 = dataptr[DCTSIZE*2];
797         d3 = dataptr[DCTSIZE*3];
798         d4 = dataptr[DCTSIZE*4];
799         d5 = dataptr[DCTSIZE*5];
800         d6 = dataptr[DCTSIZE*6];
801         d7 = dataptr[DCTSIZE*7];
802
803     /* Even part: reverse the even part of the forward DCT. */
804     /* The rotator is sqrt(2)*c(-6). */
805         if (d6)
806         {
807             if (d4)
808             {
809                 if (d2)
810                 {
811                     if (d0)
812                     {
813             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
814                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
815                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
816                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
817
818                         tmp0 = (d0 + d4) << CONST_BITS;
819                         tmp1 = (d0 - d4) << CONST_BITS;
820
821                         tmp10 = tmp0 + tmp3;
822                         tmp13 = tmp0 - tmp3;
823                         tmp11 = tmp1 + tmp2;
824                         tmp12 = tmp1 - tmp2;
825                     }
826                     else
827                     {
828             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
829                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
830                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
831                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
832
833                         tmp0 = d4 << CONST_BITS;
834
835                         tmp10 = tmp0 + tmp3;
836                         tmp13 = tmp0 - tmp3;
837                         tmp11 = tmp2 - tmp0;
838                         tmp12 = -(tmp0 + tmp2);
839                     }
840                 }
841                 else
842                 {
843                     if (d0)
844                     {
845             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
846                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
847                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
848
849                         tmp0 = (d0 + d4) << CONST_BITS;
850                         tmp1 = (d0 - d4) << CONST_BITS;
851
852                         tmp10 = tmp0 + tmp3;
853                         tmp13 = tmp0 - tmp3;
854                         tmp11 = tmp1 + tmp2;
855                         tmp12 = tmp1 - tmp2;
856                     }
857                     else
858                     {
859             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
860                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
861                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
862
863                         tmp0 = d4 << CONST_BITS;
864
865                         tmp10 = tmp0 + tmp3;
866                         tmp13 = tmp0 - tmp3;
867                         tmp11 = tmp2 - tmp0;
868                         tmp12 = -(tmp0 + tmp2);
869                     }
870                 }
871             }
872             else
873             {
874                 if (d2)
875                 {
876                     if (d0)
877                     {
878             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
879                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
880                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
881                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
882
883                         tmp0 = d0 << CONST_BITS;
884
885                         tmp10 = tmp0 + tmp3;
886                         tmp13 = tmp0 - tmp3;
887                         tmp11 = tmp0 + tmp2;
888                         tmp12 = tmp0 - tmp2;
889                     }
890                     else
891                     {
892             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
893                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
894                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
895                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
896
897                         tmp10 = tmp3;
898                         tmp13 = -tmp3;
899                         tmp11 = tmp2;
900                         tmp12 = -tmp2;
901                     }
902                 }
903                 else
904                 {
905                     if (d0)
906                     {
907             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
908                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
909                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
910
911                     tmp0 = d0 << CONST_BITS;
912
913                     tmp10 = tmp0 + tmp3;
914                     tmp13 = tmp0 - tmp3;
915                     tmp11 = tmp0 + tmp2;
916                     tmp12 = tmp0 - tmp2;
917                 }
918                 else
919                 {
920             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
921                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
922                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
923                     tmp10 = tmp3;
924                     tmp13 = -tmp3;
925                     tmp11 = tmp2;
926                     tmp12 = -tmp2;
927                 }
928             }
929         }
930     }
931     else
932     {
933         if (d4)
934         {
935             if (d2)
936             {
937                 if (d0)
938                 {
939             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
940                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
941                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
942
943                     tmp0 = (d0 + d4) << CONST_BITS;
944                     tmp1 = (d0 - d4) << CONST_BITS;
945
946                     tmp10 = tmp0 + tmp3;
947                     tmp13 = tmp0 - tmp3;
948                     tmp11 = tmp1 + tmp2;
949                     tmp12 = tmp1 - tmp2;
950                 }
951                 else
952                 {
953             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
954                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
955                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
956
957                     tmp0 = d4 << CONST_BITS;
958
959                     tmp10 = tmp0 + tmp3;
960                     tmp13 = tmp0 - tmp3;
961                     tmp11 = tmp2 - tmp0;
962                     tmp12 = -(tmp0 + tmp2);
963                 }
964             }
965             else
966             {
967                 if (d0)
968                 {
969             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
970                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
971                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
972                 }
973                 else
974                 {
975             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
976                     tmp10 = tmp13 = d4 << CONST_BITS;
977                     tmp11 = tmp12 = -tmp10;
978                 }
979             }
980         }
981         else
982         {
983         if (d2)
984         {
985             if (d0)
986             {
987             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
988                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
989                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
990
991                     tmp0 = d0 << CONST_BITS;
992
993                     tmp10 = tmp0 + tmp3;
994                     tmp13 = tmp0 - tmp3;
995                     tmp11 = tmp0 + tmp2;
996                     tmp12 = tmp0 - tmp2;
997             }
998             else
999             {
1000             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1001                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1002                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1003
1004                     tmp10 = tmp3;
1005                     tmp13 = -tmp3;
1006                     tmp11 = tmp2;
1007                     tmp12 = -tmp2;
1008             }
1009         }
1010         else
1011         {
1012             if (d0)
1013                 {
1014             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1015                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1016                 }
1017                 else
1018                 {
1019             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1020                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
1021                 }
1022             }
1023         }
1024     }
1025
1026     /* Odd part per figure 8; the matrix is unitary and hence its
1027      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
1028      */
1029     if (d7)
1030     {
1031         if (d5)
1032         {
1033             if (d3)
1034             {
1035                 if (d1)
1036                 {
1037             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1038                     z1 = d7 + d1;
1039                     z2 = d5 + d3;
1040                     z3 = d7 + d3;
1041                     z4 = d5 + d1;
1042                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1043
1044                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1045                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1046                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1047                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1048                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1049                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1050                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1051                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1052
1053                     z3 += z5;
1054                     z4 += z5;
1055
1056                     tmp0 += z1 + z3;
1057                     tmp1 += z2 + z4;
1058                     tmp2 += z2 + z3;
1059                     tmp3 += z1 + z4;
1060                 }
1061                 else
1062                 {
1063             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1064                     z2 = d5 + d3;
1065                     z3 = d7 + d3;
1066                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1067
1068                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1069                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1070                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1071                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1072                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1073                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1074                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1075
1076                     z3 += z5;
1077                     z4 += z5;
1078
1079                     tmp0 += z1 + z3;
1080                     tmp1 += z2 + z4;
1081                     tmp2 += z2 + z3;
1082                     tmp3 = z1 + z4;
1083                 }
1084             }
1085             else
1086             {
1087                 if (d1)
1088                 {
1089             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1090                     z1 = d7 + d1;
1091                     z4 = d5 + d1;
1092                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1093
1094                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1095                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1096                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1097                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1098                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1099                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1100                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1101
1102                     z3 += z5;
1103                     z4 += z5;
1104
1105                     tmp0 += z1 + z3;
1106                     tmp1 += z2 + z4;
1107                     tmp2 = z2 + z3;
1108                     tmp3 += z1 + z4;
1109                 }
1110                 else
1111                 {
1112             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1113                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1114
1115                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1116                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1117                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1118                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1119                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1120                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1121
1122                     z3 += z5;
1123                     z4 += z5;
1124
1125                     tmp0 += z3;
1126                     tmp1 += z4;
1127                     tmp2 = z2 + z3;
1128                     tmp3 = z1 + z4;
1129                 }
1130             }
1131         }
1132         else
1133         {
1134             if (d3)
1135             {
1136                 if (d1)
1137                 {
1138             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1139                     z1 = d7 + d1;
1140                     z3 = d7 + d3;
1141                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1142
1143                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1144                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1145                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1146                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1147                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1148                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1149                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1150
1151                     z3 += z5;
1152                     z4 += z5;
1153
1154                     tmp0 += z1 + z3;
1155                     tmp1 = z2 + z4;
1156                     tmp2 += z2 + z3;
1157                     tmp3 += z1 + z4;
1158                 }
1159                 else
1160                 {
1161             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1162                     z3 = d7 + d3;
1163                     z5 = MULTIPLY(z3, FIX(1.175875602));
1164
1165                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1166                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1167                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1168                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1169                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1170
1171                     tmp0 += z3;
1172                     tmp1 = z2 + z5;
1173                     tmp2 += z3;
1174                     tmp3 = z1 + z5;
1175                 }
1176             }
1177             else
1178             {
1179                 if (d1)
1180                 {
1181             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1182                     z1 = d7 + d1;
1183                     z5 = MULTIPLY(z1, FIX(1.175875602));
1184
1185                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1186                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1187                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1188                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1189                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1190
1191                     tmp0 += z1;
1192                     tmp1 = z4 + z5;
1193                     tmp2 = z3 + z5;
1194                     tmp3 += z1;
1195                 }
1196                 else
1197                 {
1198             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1199                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1200                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1201                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1202                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1203                 }
1204             }
1205         }
1206     }
1207     else
1208     {
1209         if (d5)
1210         {
1211             if (d3)
1212             {
1213                 if (d1)
1214                 {
1215             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1216                     z2 = d5 + d3;
1217                     z4 = d5 + d1;
1218                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1219
1220                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1221                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1222                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1223                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1224                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1225                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1226                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1227
1228                     z3 += z5;
1229                     z4 += z5;
1230
1231                     tmp0 = z1 + z3;
1232                     tmp1 += z2 + z4;
1233                     tmp2 += z2 + z3;
1234                     tmp3 += z1 + z4;
1235                 }
1236                 else
1237                 {
1238             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1239                     z2 = d5 + d3;
1240                     z5 = MULTIPLY(z2, FIX(1.175875602));
1241
1242                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1243                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1244                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1245                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1246                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1247
1248                     tmp0 = z3 + z5;
1249                     tmp1 += z2;
1250                     tmp2 += z2;
1251                     tmp3 = z4 + z5;
1252                 }
1253             }
1254             else
1255             {
1256                 if (d1)
1257                 {
1258             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1259                     z4 = d5 + d1;
1260                     z5 = MULTIPLY(z4, FIX(1.175875602));
1261
1262                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1263                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1264                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1265                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1266                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1267
1268                     tmp0 = z1 + z5;
1269                     tmp1 += z4;
1270                     tmp2 = z2 + z5;
1271                     tmp3 += z4;
1272                 }
1273                 else
1274                 {
1275             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1276                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1277                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1278                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1279                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1280                 }
1281             }
1282         }
1283         else
1284         {
1285             if (d3)
1286             {
1287                 if (d1)
1288                 {
1289             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1290                     z5 = d3 + d1;
1291
1292                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1293                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1294                     z1 = MULTIPLY(d1, FIX(1.061594337));
1295                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1296                     z4 = MULTIPLY(z5, FIX(0.785694958));
1297                     z5 = MULTIPLY(z5, FIX(1.175875602));
1298
1299                     tmp0 = z1 - z4;
1300                     tmp1 = z2 + z4;
1301                     tmp2 += z5;
1302                     tmp3 += z5;
1303                 }
1304                 else
1305                 {
1306             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1307                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1308                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1309                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1310                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1311                 }
1312             }
1313             else
1314             {
1315                 if (d1)
1316                 {
1317             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1318                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1319                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1320                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1321                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1322                 }
1323                 else
1324                 {
1325             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1326                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1327                 }
1328             }
1329         }
1330     }
1331
1332     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1333
1334     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1335                        CONST_BITS+PASS1_BITS+3);
1336     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1337                        CONST_BITS+PASS1_BITS+3);
1338     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1339                        CONST_BITS+PASS1_BITS+3);
1340     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1341                        CONST_BITS+PASS1_BITS+3);
1342     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1343                        CONST_BITS+PASS1_BITS+3);
1344     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1345                        CONST_BITS+PASS1_BITS+3);
1346     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1347                        CONST_BITS+PASS1_BITS+3);
1348     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1349                        CONST_BITS+PASS1_BITS+3);
1350
1351     dataptr++;             /* advance pointer to next column */
1352     }
1353 }
1354