]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
* Fixed the BeOS compile typo.
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.10 2001/05/30 17:03:12 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include "defs.h"
31
32 #include <stdlib.h>
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "tests.h"
39
40 #include "video.h"
41 #include "video_output.h"
42
43 #include "video_decoder.h"
44
45 #include "modules.h"
46
47 #include "vdec_block.h"
48 #include "vdec_idct.h"
49
50 /*****************************************************************************
51  * Local and extern prototypes.
52  *****************************************************************************/
53 static void idct_getfunctions( function_list_t * p_function_list );
54 static int  idct_Probe      ( probedata_t *p_data );
55 static void vdec_NormScan   ( u8 ppi_scan[2][64] );
56
57 /*****************************************************************************
58  * Build configuration tree.
59  *****************************************************************************/
60 MODULE_CONFIG_START
61 ADD_WINDOW( "Configuration for IDCT module" )
62     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
63 MODULE_CONFIG_STOP
64
65 MODULE_INIT_START
66     p_module->i_capabilities = MODULE_CAPABILITY_NULL
67                                 | MODULE_CAPABILITY_IDCT;
68     p_module->psz_longname = "IDCT module";
69 MODULE_INIT_STOP
70
71 MODULE_ACTIVATE_START
72     idct_getfunctions( &p_module->p_functions->idct );
73 MODULE_ACTIVATE_STOP
74
75 MODULE_DEACTIVATE_START
76 MODULE_DEACTIVATE_STOP
77
78 /* Following functions are local */
79
80 /*****************************************************************************
81  * Functions exported as capabilities. They are declared as static so that
82  * we don't pollute the namespace too much.
83  *****************************************************************************/
84 static void idct_getfunctions( function_list_t * p_function_list )
85 {
86     p_function_list->pf_probe = idct_Probe;
87 #define F p_function_list->functions.idct
88     F.pf_idct_init = _M( vdec_InitIDCT );
89     F.pf_sparse_idct = _M( vdec_SparseIDCT );
90     F.pf_idct = _M( vdec_IDCT );
91     F.pf_norm_scan = vdec_NormScan;
92     F.pf_decode_init = _M( vdec_InitDecode );
93     F.pf_decode_mb_c = _M( vdec_DecodeMacroblockC );
94     F.pf_decode_mb_bw = _M( vdec_DecodeMacroblockBW );
95 #undef F
96 }
97
98 /*****************************************************************************
99  * idct_Probe: returns a preference score
100  *****************************************************************************/
101 static int idct_Probe( probedata_t *p_data )
102 {
103     if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
104     {
105         return( 999 );
106     }
107
108     /* This plugin always works */
109     return( 50 );
110 }
111
112 /*****************************************************************************
113  * vdec_NormScan : Unused in this IDCT
114  *****************************************************************************/
115 static void vdec_NormScan( u8 ppi_scan[2][64] )
116 {
117 }
118
119 /*****************************************************************************
120  * vdec_IDCT : IDCT function for normal matrices
121  *****************************************************************************/
122 void _M( vdec_IDCT )( vdec_thread_t * p_vdec, dctelem_t * p_block,
123                 int i_idontcare )
124 {
125     s32 tmp0, tmp1, tmp2, tmp3;
126     s32 tmp10, tmp11, tmp12, tmp13;
127     s32 z1, z2, z3, z4, z5;
128     s32 d0, d1, d2, d3, d4, d5, d6, d7;
129     dctelem_t * dataptr;
130     int rowctr;
131
132     SHIFT_TEMPS
133
134     /* Pass 1: process rows. */
135     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
136     /* furthermore, we scale the results by 2**PASS1_BITS. */
137
138     dataptr = p_block;
139
140     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
141     {
142         /* Due to quantization, we will usually find that many of the input
143          * coefficients are zero, especially the AC terms.  We can exploit this
144          * by short-circuiting the IDCT calculation for any row in which all
145          * the AC terms are zero.  In that case each output is equal to the
146          * DC coefficient (with scale factor as needed).
147          * With typical images and quantization tables, half or more of the
148          * row DCT calculations can be simplified this way.
149          */
150
151         register int * idataptr = (int*)dataptr;
152         d0 = dataptr[0];
153         d1 = dataptr[1];
154         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
155         {
156       /* AC terms all zero */
157             if (d0)
158             {
159       /* Compute a 32 bit value to assign. */
160                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
161                 register int v = (dcval & 0xffff) | (dcval << 16);
162
163                 idataptr[0] = v;
164                 idataptr[1] = v;
165                 idataptr[2] = v;
166                 idataptr[3] = v;
167             }
168
169             dataptr += DCTSIZE; /* advance pointer to next row */
170             continue;
171         }
172         d2 = dataptr[2];
173         d3 = dataptr[3];
174         d4 = dataptr[4];
175         d5 = dataptr[5];
176         d6 = dataptr[6];
177         d7 = dataptr[7];
178
179     /* Even part: reverse the even part of the forward DCT. */
180     /* The rotator is sqrt(2)*c(-6). */
181         if (d6)
182         {
183             if (d4)
184             {
185                 if (d2)
186                 {
187                     if (d0)
188                     {
189             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
190                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
191                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
192                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
193
194                         tmp0 = (d0 + d4) << CONST_BITS;
195                         tmp1 = (d0 - d4) << CONST_BITS;
196
197                         tmp10 = tmp0 + tmp3;
198                         tmp13 = tmp0 - tmp3;
199                         tmp11 = tmp1 + tmp2;
200                         tmp12 = tmp1 - tmp2;
201                     }
202                     else
203                     {
204                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
205                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
206                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
207                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
208
209                         tmp0 = d4 << CONST_BITS;
210
211                         tmp10 = tmp0 + tmp3;
212                         tmp13 = tmp0 - tmp3;
213                         tmp11 = tmp2 - tmp0;
214                         tmp12 = -(tmp0 + tmp2);
215                         }
216                 }
217                 else
218                 {
219                     if (d0)
220                     {
221             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
222                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
223                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
224
225                         tmp0 = (d0 + d4) << CONST_BITS;
226                         tmp1 = (d0 - d4) << CONST_BITS;
227
228                         tmp10 = tmp0 + tmp3;
229                         tmp13 = tmp0 - tmp3;
230                         tmp11 = tmp1 + tmp2;
231                         tmp12 = tmp1 - tmp2;
232                         }
233                     else
234                     {
235                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
236                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
237                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
238
239                         tmp0 = d4 << CONST_BITS;
240
241                         tmp10 = tmp0 + tmp3;
242                         tmp13 = tmp0 - tmp3;
243                         tmp11 = tmp2 - tmp0;
244                         tmp12 = -(tmp0 + tmp2);
245                         }
246                 }
247             }
248             else
249             {
250                 if (d2)
251                 {
252                     if (d0)
253                     {
254             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
255                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
256                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
257                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
258
259                         tmp0 = d0 << CONST_BITS;
260
261                         tmp10 = tmp0 + tmp3;
262                         tmp13 = tmp0 - tmp3;
263                         tmp11 = tmp0 + tmp2;
264                         tmp12 = tmp0 - tmp2;
265                     }
266                     else
267                     {
268                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
269                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
270                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
271                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
272
273                         tmp10 = tmp3;
274                         tmp13 = -tmp3;
275                         tmp11 = tmp2;
276                         tmp12 = -tmp2;
277                             }
278                 }
279                 else
280                 {
281                     if (d0)
282                     {
283             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
284                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
285                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
286
287                         tmp0 = d0 << CONST_BITS;
288
289                         tmp10 = tmp0 + tmp3;
290                         tmp13 = tmp0 - tmp3;
291                         tmp11 = tmp0 + tmp2;
292                         tmp12 = tmp0 - tmp2;
293                     }
294                     else
295                     {
296             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
297                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
298                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
299
300                         tmp10 = tmp3;
301                         tmp13 = -tmp3;
302                         tmp11 = tmp2;
303                         tmp12 = -tmp2;
304                     }
305                 }
306             }
307         }
308         else
309         {
310             if (d4)
311             {
312                 if (d2)
313                 {
314                     if (d0)
315                     {
316                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
317                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
318                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
319
320                         tmp0 = (d0 + d4) << CONST_BITS;
321                         tmp1 = (d0 - d4) << CONST_BITS;
322
323                         tmp10 = tmp0 + tmp3;
324                         tmp13 = tmp0 - tmp3;
325                         tmp11 = tmp1 + tmp2;
326                         tmp12 = tmp1 - tmp2;
327                     }
328                     else
329                     {
330             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
331                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
332                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
333
334                         tmp0 = d4 << CONST_BITS;
335
336                         tmp10 = tmp0 + tmp3;
337                         tmp13 = tmp0 - tmp3;
338                         tmp11 = tmp2 - tmp0;
339                         tmp12 = -(tmp0 + tmp2);
340                     }
341                 }
342                 else
343                 {
344                     if (d0)
345                     {
346             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
347                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
348                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
349                     }
350                     else
351                     {
352             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
353                         tmp10 = tmp13 = d4 << CONST_BITS;
354                         tmp11 = tmp12 = -tmp10;
355                     }
356                 }
357             }
358             else
359             {
360                 if (d2)
361                 {
362                     if (d0)
363                     {
364             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
365                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
366                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
367
368                         tmp0 = d0 << CONST_BITS;
369
370                         tmp10 = tmp0 + tmp3;
371                         tmp13 = tmp0 - tmp3;
372                         tmp11 = tmp0 + tmp2;
373                         tmp12 = tmp0 - tmp2;
374                     }
375                     else
376                     {
377             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
378                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
379                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
380
381                         tmp10 = tmp3;
382                         tmp13 = -tmp3;
383                         tmp11 = tmp2;
384                         tmp12 = -tmp2;
385                     }
386                 }
387                 else
388                 {
389                     if (d0)
390                     {
391             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
392                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
393                     }
394                     else
395                     {
396             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
397                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
398                     }
399                 }
400             }
401         }
402
403
404     /* Odd part per figure 8; the matrix is unitary and hence its
405      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
406      */
407
408         if (d7)
409             {
410             if (d5)
411             {
412                 if (d3)
413                 {
414                     if (d1)
415                     {
416             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
417                         z1 = d7 + d1;
418                         z2 = d5 + d3;
419                         z3 = d7 + d3;
420                         z4 = d5 + d1;
421                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
422
423                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
424                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
425                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
426                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
427                         z1 = MULTIPLY(z1, - FIX(0.899976223));
428                         z2 = MULTIPLY(z2, - FIX(2.562915447));
429                         z3 = MULTIPLY(z3, - FIX(1.961570560));
430                         z4 = MULTIPLY(z4, - FIX(0.390180644));
431
432                         z3 += z5;
433                         z4 += z5;
434
435                         tmp0 += z1 + z3;
436                         tmp1 += z2 + z4;
437                         tmp2 += z2 + z3;
438                         tmp3 += z1 + z4;
439                     }
440                     else
441                     {
442             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
443                         z2 = d5 + d3;
444                         z3 = d7 + d3;
445                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
446
447                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
448                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
449                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
450                         z1 = MULTIPLY(d7, - FIX(0.899976223));
451                         z2 = MULTIPLY(z2, - FIX(2.562915447));
452                         z3 = MULTIPLY(z3, - FIX(1.961570560));
453                         z4 = MULTIPLY(d5, - FIX(0.390180644));
454
455                         z3 += z5;
456                         z4 += z5;
457
458                         tmp0 += z1 + z3;
459                         tmp1 += z2 + z4;
460                         tmp2 += z2 + z3;
461                         tmp3 = z1 + z4;
462                         }
463                     }
464                 else
465                 {
466                     if (d1)
467                     {
468             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
469                         z1 = d7 + d1;
470                         z4 = d5 + d1;
471                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
472
473                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
474                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
475                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
476                         z1 = MULTIPLY(z1, - FIX(0.899976223));
477                         z2 = MULTIPLY(d5, - FIX(2.562915447));
478                         z3 = MULTIPLY(d7, - FIX(1.961570560));
479                         z4 = MULTIPLY(z4, - FIX(0.390180644));
480
481                         z3 += z5;
482                         z4 += z5;
483
484                         tmp0 += z1 + z3;
485                         tmp1 += z2 + z4;
486                         tmp2 = z2 + z3;
487                         tmp3 += z1 + z4;
488                     }
489                     else
490                     {
491             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
492                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
493
494                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
495                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
496                         z1 = MULTIPLY(d7, - FIX(0.899976223));
497                         z3 = MULTIPLY(d7, - FIX(1.961570560));
498                         z2 = MULTIPLY(d5, - FIX(2.562915447));
499                         z4 = MULTIPLY(d5, - FIX(0.390180644));
500
501                         z3 += z5;
502                         z4 += z5;
503
504                         tmp0 += z3;
505                         tmp1 += z4;
506                         tmp2 = z2 + z3;
507                         tmp3 = z1 + z4;
508                     }
509                 }
510             }
511             else
512             {
513                 if (d3)
514                 {
515                     if (d1)
516                     {
517             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
518                         z1 = d7 + d1;
519                         z3 = d7 + d3;
520                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
521
522                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
523                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
524                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
525                         z1 = MULTIPLY(z1, - FIX(0.899976223));
526                         z2 = MULTIPLY(d3, - FIX(2.562915447));
527                         z3 = MULTIPLY(z3, - FIX(1.961570560));
528                         z4 = MULTIPLY(d1, - FIX(0.390180644));
529
530                         z3 += z5;
531                         z4 += z5;
532
533                         tmp0 += z1 + z3;
534                         tmp1 = z2 + z4;
535                         tmp2 += z2 + z3;
536                         tmp3 += z1 + z4;
537                     }
538                     else
539                     {
540             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
541                         z3 = d7 + d3;
542                         z5 = MULTIPLY(z3, FIX(1.175875602));
543
544                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
545                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
546                         z1 = MULTIPLY(d7, - FIX(0.899976223));
547                         z2 = MULTIPLY(d3, - FIX(2.562915447));
548                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
549
550                         tmp0 += z3;
551                         tmp1 = z2 + z5;
552                         tmp2 += z3;
553                         tmp3 = z1 + z5;
554                     }
555                 }
556                 else
557                 {
558                     if (d1)
559                     {
560             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
561                         z1 = d7 + d1;
562                         z5 = MULTIPLY(z1, FIX(1.175875602));
563
564                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
565                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
566                         z1 = MULTIPLY(z1, FIX2(0.275899379));
567                         z3 = MULTIPLY(d7, - FIX(1.961570560));
568                         z4 = MULTIPLY(d1, - FIX(0.390180644));
569
570                         tmp0 += z1;
571                         tmp1 = z4 + z5;
572                         tmp2 = z3 + z5;
573                         tmp3 += z1;
574                     }
575                 else
576                     {
577             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
578                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
579                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
580                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
581                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
582                     }
583                 }
584             }
585         }
586         else
587         {
588             if (d5)
589             {
590                 if (d3)
591                 {
592                     if (d1)
593                     {
594             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
595                         z2 = d5 + d3;
596                         z4 = d5 + d1;
597                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
598
599                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
600                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
601                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
602                         z1 = MULTIPLY(d1, - FIX(0.899976223));
603                         z2 = MULTIPLY(z2, - FIX(2.562915447));
604                         z3 = MULTIPLY(d3, - FIX(1.961570560));
605                         z4 = MULTIPLY(z4, - FIX(0.390180644));
606
607                         z3 += z5;
608                         z4 += z5;
609
610                         tmp0 = z1 + z3;
611                         tmp1 += z2 + z4;
612                         tmp2 += z2 + z3;
613                         tmp3 += z1 + z4;
614                     }
615                     else
616                     {
617             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
618                         z2 = d5 + d3;
619                         z5 = MULTIPLY(z2, FIX(1.175875602));
620
621                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
622                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
623                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
624                         z3 = MULTIPLY(d3, - FIX(1.961570560));
625                         z4 = MULTIPLY(d5, - FIX(0.390180644));
626
627                         tmp0 = z3 + z5;
628                         tmp1 += z2;
629                         tmp2 += z2;
630                         tmp3 = z4 + z5;
631                     }
632                 }
633                 else
634                 {
635                     if (d1)
636                     {
637             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
638                         z4 = d5 + d1;
639                         z5 = MULTIPLY(z4, FIX(1.175875602));
640
641                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
642                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
643                         z1 = MULTIPLY(d1, - FIX(0.899976223));
644                         z2 = MULTIPLY(d5, - FIX(2.562915447));
645                         z4 = MULTIPLY(z4, FIX2(0.785694958));
646
647                         tmp0 = z1 + z5;
648                         tmp1 += z4;
649                         tmp2 = z2 + z5;
650                         tmp3 += z4;
651                     }
652                     else
653                     {
654             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
655                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
656                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
657                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
658                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
659                     }
660                 }
661             }
662             else
663             {
664                 if (d3)
665                 {
666                     if (d1)
667                     {
668             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
669                         z5 = d3 + d1;
670
671                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
672                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
673                         z1 = MULTIPLY(d1, FIX(1.061594337));
674                         z2 = MULTIPLY(d3, - FIX(2.172734803));
675                         z4 = MULTIPLY(z5, FIX(0.785694958));
676                         z5 = MULTIPLY(z5, FIX(1.175875602));
677
678                         tmp0 = z1 - z4;
679                         tmp1 = z2 + z4;
680                         tmp2 += z5;
681                         tmp3 += z5;
682                     }
683                     else
684                     {
685             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
686                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
687                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
688                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
689                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
690                     }
691                 }
692                 else
693                 {
694                     if (d1)
695                     {
696             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
697                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
698                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
699                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
700                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
701                     }
702                     else
703                     {
704             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
705                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
706                     }
707                 }
708             }
709         }
710
711     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
712
713         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
714         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
715         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
716         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
717         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
718         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
719         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
720         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
721
722         dataptr += DCTSIZE;              /* advance pointer to next row */
723     }
724
725   /* Pass 2: process columns. */
726   /* Note that we must descale the results by a factor of 8 == 2**3, */
727   /* and also undo the PASS1_BITS scaling. */
728
729     dataptr = p_block;
730     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
731     {
732     /* Columns of zeroes can be exploited in the same way as we did with rows.
733      * However, the row calculation has created many nonzero AC terms, so the
734      * simplification applies less often (typically 5% to 10% of the time).
735      * On machines with very fast multiplication, it's possible that the
736      * test takes more time than it's worth.  In that case this section
737      * may be commented out.
738      */
739
740         d0 = dataptr[DCTSIZE*0];
741         d1 = dataptr[DCTSIZE*1];
742         d2 = dataptr[DCTSIZE*2];
743         d3 = dataptr[DCTSIZE*3];
744         d4 = dataptr[DCTSIZE*4];
745         d5 = dataptr[DCTSIZE*5];
746         d6 = dataptr[DCTSIZE*6];
747         d7 = dataptr[DCTSIZE*7];
748
749     /* Even part: reverse the even part of the forward DCT. */
750     /* The rotator is sqrt(2)*c(-6). */
751         if (d6)
752         {
753             if (d4)
754             {
755                 if (d2)
756                 {
757                     if (d0)
758                     {
759             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
760                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
761                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
762                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
763
764                         tmp0 = (d0 + d4) << CONST_BITS;
765                         tmp1 = (d0 - d4) << CONST_BITS;
766
767                         tmp10 = tmp0 + tmp3;
768                         tmp13 = tmp0 - tmp3;
769                         tmp11 = tmp1 + tmp2;
770                         tmp12 = tmp1 - tmp2;
771                     }
772                     else
773                     {
774             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
775                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
776                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
777                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
778
779                         tmp0 = d4 << CONST_BITS;
780
781                         tmp10 = tmp0 + tmp3;
782                         tmp13 = tmp0 - tmp3;
783                         tmp11 = tmp2 - tmp0;
784                         tmp12 = -(tmp0 + tmp2);
785                     }
786                 }
787                 else
788                 {
789                     if (d0)
790                     {
791             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
792                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
793                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
794
795                         tmp0 = (d0 + d4) << CONST_BITS;
796                         tmp1 = (d0 - d4) << CONST_BITS;
797
798                         tmp10 = tmp0 + tmp3;
799                         tmp13 = tmp0 - tmp3;
800                         tmp11 = tmp1 + tmp2;
801                         tmp12 = tmp1 - tmp2;
802                     }
803                     else
804                     {
805             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
806                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
807                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
808
809                         tmp0 = d4 << CONST_BITS;
810
811                         tmp10 = tmp0 + tmp3;
812                         tmp13 = tmp0 - tmp3;
813                         tmp11 = tmp2 - tmp0;
814                         tmp12 = -(tmp0 + tmp2);
815                     }
816                 }
817             }
818             else
819             {
820                 if (d2)
821                 {
822                     if (d0)
823                     {
824             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
825                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
826                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
827                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
828
829                         tmp0 = d0 << CONST_BITS;
830
831                         tmp10 = tmp0 + tmp3;
832                         tmp13 = tmp0 - tmp3;
833                         tmp11 = tmp0 + tmp2;
834                         tmp12 = tmp0 - tmp2;
835                     }
836                     else
837                     {
838             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
839                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
840                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
841                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
842
843                         tmp10 = tmp3;
844                         tmp13 = -tmp3;
845                         tmp11 = tmp2;
846                         tmp12 = -tmp2;
847                     }
848                 }
849                 else
850                 {
851                     if (d0)
852                     {
853             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
854                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
855                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
856
857                     tmp0 = d0 << CONST_BITS;
858
859                     tmp10 = tmp0 + tmp3;
860                     tmp13 = tmp0 - tmp3;
861                     tmp11 = tmp0 + tmp2;
862                     tmp12 = tmp0 - tmp2;
863                 }
864                 else
865                 {
866             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
867                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
868                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
869                     tmp10 = tmp3;
870                     tmp13 = -tmp3;
871                     tmp11 = tmp2;
872                     tmp12 = -tmp2;
873                 }
874             }
875         }
876     }
877     else
878     {
879         if (d4)
880         {
881             if (d2)
882             {
883                 if (d0)
884                 {
885             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
886                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
887                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
888
889                     tmp0 = (d0 + d4) << CONST_BITS;
890                     tmp1 = (d0 - d4) << CONST_BITS;
891
892                     tmp10 = tmp0 + tmp3;
893                     tmp13 = tmp0 - tmp3;
894                     tmp11 = tmp1 + tmp2;
895                     tmp12 = tmp1 - tmp2;
896                 }
897                 else
898                 {
899             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
900                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
901                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
902
903                     tmp0 = d4 << CONST_BITS;
904
905                     tmp10 = tmp0 + tmp3;
906                     tmp13 = tmp0 - tmp3;
907                     tmp11 = tmp2 - tmp0;
908                     tmp12 = -(tmp0 + tmp2);
909                 }
910             }
911             else
912             {
913                 if (d0)
914                 {
915             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
916                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
917                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
918                 }
919                 else
920                 {
921             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
922                     tmp10 = tmp13 = d4 << CONST_BITS;
923                     tmp11 = tmp12 = -tmp10;
924                 }
925             }
926         }
927         else
928         {
929         if (d2)
930         {
931             if (d0)
932             {
933             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
934                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
935                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
936
937                     tmp0 = d0 << CONST_BITS;
938
939                     tmp10 = tmp0 + tmp3;
940                     tmp13 = tmp0 - tmp3;
941                     tmp11 = tmp0 + tmp2;
942                     tmp12 = tmp0 - tmp2;
943             }
944             else
945             {
946             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
947                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
948                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
949
950                     tmp10 = tmp3;
951                     tmp13 = -tmp3;
952                     tmp11 = tmp2;
953                     tmp12 = -tmp2;
954             }
955         }
956         else
957         {
958             if (d0)
959                 {
960             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
961                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
962                 }
963                 else
964                 {
965             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
966                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
967                 }
968             }
969         }
970     }
971
972     /* Odd part per figure 8; the matrix is unitary and hence its
973      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
974      */
975     if (d7)
976     {
977         if (d5)
978         {
979             if (d3)
980             {
981                 if (d1)
982                 {
983             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
984                     z1 = d7 + d1;
985                     z2 = d5 + d3;
986                     z3 = d7 + d3;
987                     z4 = d5 + d1;
988                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
989
990                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
991                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
992                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
993                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
994                     z1 = MULTIPLY(z1, - FIX(0.899976223));
995                     z2 = MULTIPLY(z2, - FIX(2.562915447));
996                     z3 = MULTIPLY(z3, - FIX(1.961570560));
997                     z4 = MULTIPLY(z4, - FIX(0.390180644));
998
999                     z3 += z5;
1000                     z4 += z5;
1001
1002                     tmp0 += z1 + z3;
1003                     tmp1 += z2 + z4;
1004                     tmp2 += z2 + z3;
1005                     tmp3 += z1 + z4;
1006                 }
1007                 else
1008                 {
1009             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1010                     z2 = d5 + d3;
1011                     z3 = d7 + d3;
1012                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1013
1014                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1015                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1016                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1017                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1018                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1019                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1020                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1021
1022                     z3 += z5;
1023                     z4 += z5;
1024
1025                     tmp0 += z1 + z3;
1026                     tmp1 += z2 + z4;
1027                     tmp2 += z2 + z3;
1028                     tmp3 = z1 + z4;
1029                 }
1030             }
1031             else
1032             {
1033                 if (d1)
1034                 {
1035             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1036                     z1 = d7 + d1;
1037                     z4 = d5 + d1;
1038                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1039
1040                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1041                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1042                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1043                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1044                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1045                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1046                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1047
1048                     z3 += z5;
1049                     z4 += z5;
1050
1051                     tmp0 += z1 + z3;
1052                     tmp1 += z2 + z4;
1053                     tmp2 = z2 + z3;
1054                     tmp3 += z1 + z4;
1055                 }
1056                 else
1057                 {
1058             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1059                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1060
1061                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1062                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1063                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1064                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1065                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1066                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1067
1068                     z3 += z5;
1069                     z4 += z5;
1070
1071                     tmp0 += z3;
1072                     tmp1 += z4;
1073                     tmp2 = z2 + z3;
1074                     tmp3 = z1 + z4;
1075                 }
1076             }
1077         }
1078         else
1079         {
1080             if (d3)
1081             {
1082                 if (d1)
1083                 {
1084             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1085                     z1 = d7 + d1;
1086                     z3 = d7 + d3;
1087                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1088
1089                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1090                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1091                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1092                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1093                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1094                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1095                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1096
1097                     z3 += z5;
1098                     z4 += z5;
1099
1100                     tmp0 += z1 + z3;
1101                     tmp1 = z2 + z4;
1102                     tmp2 += z2 + z3;
1103                     tmp3 += z1 + z4;
1104                 }
1105                 else
1106                 {
1107             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1108                     z3 = d7 + d3;
1109                     z5 = MULTIPLY(z3, FIX(1.175875602));
1110
1111                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1112                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1113                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1114                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1115                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1116
1117                     tmp0 += z3;
1118                     tmp1 = z2 + z5;
1119                     tmp2 += z3;
1120                     tmp3 = z1 + z5;
1121                 }
1122             }
1123             else
1124             {
1125                 if (d1)
1126                 {
1127             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1128                     z1 = d7 + d1;
1129                     z5 = MULTIPLY(z1, FIX(1.175875602));
1130
1131                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1132                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1133                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1134                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1135                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1136
1137                     tmp0 += z1;
1138                     tmp1 = z4 + z5;
1139                     tmp2 = z3 + z5;
1140                     tmp3 += z1;
1141                 }
1142                 else
1143                 {
1144             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1145                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1146                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1147                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1148                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1149                 }
1150             }
1151         }
1152     }
1153     else
1154     {
1155         if (d5)
1156         {
1157             if (d3)
1158             {
1159                 if (d1)
1160                 {
1161             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1162                     z2 = d5 + d3;
1163                     z4 = d5 + d1;
1164                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1165
1166                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1167                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1168                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1169                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1170                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1171                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1172                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1173
1174                     z3 += z5;
1175                     z4 += z5;
1176
1177                     tmp0 = z1 + z3;
1178                     tmp1 += z2 + z4;
1179                     tmp2 += z2 + z3;
1180                     tmp3 += z1 + z4;
1181                 }
1182                 else
1183                 {
1184             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1185                     z2 = d5 + d3;
1186                     z5 = MULTIPLY(z2, FIX(1.175875602));
1187
1188                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1189                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1190                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1191                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1192                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1193
1194                     tmp0 = z3 + z5;
1195                     tmp1 += z2;
1196                     tmp2 += z2;
1197                     tmp3 = z4 + z5;
1198                 }
1199             }
1200             else
1201             {
1202                 if (d1)
1203                 {
1204             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1205                     z4 = d5 + d1;
1206                     z5 = MULTIPLY(z4, FIX(1.175875602));
1207
1208                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1209                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1210                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1211                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1212                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1213
1214                     tmp0 = z1 + z5;
1215                     tmp1 += z4;
1216                     tmp2 = z2 + z5;
1217                     tmp3 += z4;
1218                 }
1219                 else
1220                 {
1221             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1222                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1223                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1224                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1225                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1226                 }
1227             }
1228         }
1229         else
1230         {
1231             if (d3)
1232             {
1233                 if (d1)
1234                 {
1235             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1236                     z5 = d3 + d1;
1237
1238                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1239                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1240                     z1 = MULTIPLY(d1, FIX(1.061594337));
1241                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1242                     z4 = MULTIPLY(z5, FIX(0.785694958));
1243                     z5 = MULTIPLY(z5, FIX(1.175875602));
1244
1245                     tmp0 = z1 - z4;
1246                     tmp1 = z2 + z4;
1247                     tmp2 += z5;
1248                     tmp3 += z5;
1249                 }
1250                 else
1251                 {
1252             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1253                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1254                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1255                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1256                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1257                 }
1258             }
1259             else
1260             {
1261                 if (d1)
1262                 {
1263             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1264                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1265                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1266                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1267                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1268                 }
1269                 else
1270                 {
1271             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1272                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1273                 }
1274             }
1275         }
1276     }
1277
1278     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1279
1280     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1281                        CONST_BITS+PASS1_BITS+3);
1282     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1283                        CONST_BITS+PASS1_BITS+3);
1284     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1285                        CONST_BITS+PASS1_BITS+3);
1286     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1287                        CONST_BITS+PASS1_BITS+3);
1288     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1289                        CONST_BITS+PASS1_BITS+3);
1290     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1291                        CONST_BITS+PASS1_BITS+3);
1292     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1293                        CONST_BITS+PASS1_BITS+3);
1294     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1295                        CONST_BITS+PASS1_BITS+3);
1296
1297     dataptr++;             /* advance pointer to next column */
1298     }
1299 }
1300