]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
ab1f57c93ed4f07cfac3cfa2339b79f28d84486c
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.11 2001/06/03 12:47:21 sam Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include "defs.h"
31
32 #include <stdlib.h>
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "tests.h"
39
40 #include "video.h"
41 #include "video_output.h"
42
43 #include "video_decoder.h"
44
45 #include "vdec_block.h"
46 #include "vdec_idct.h"
47
48 #include "modules.h"
49 #include "modules_export.h"
50
51 /*****************************************************************************
52  * Local and extern prototypes.
53  *****************************************************************************/
54 static void idct_getfunctions( function_list_t * p_function_list );
55 static int  idct_Probe      ( probedata_t *p_data );
56 static void vdec_NormScan   ( u8 ppi_scan[2][64] );
57
58 /*****************************************************************************
59  * Build configuration tree.
60  *****************************************************************************/
61 MODULE_CONFIG_START
62 ADD_WINDOW( "Configuration for IDCT module" )
63     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
64 MODULE_CONFIG_STOP
65
66 MODULE_INIT_START
67     p_module->i_capabilities = MODULE_CAPABILITY_NULL
68                                 | MODULE_CAPABILITY_IDCT;
69     p_module->psz_longname = "IDCT module";
70 MODULE_INIT_STOP
71
72 MODULE_ACTIVATE_START
73     idct_getfunctions( &p_module->p_functions->idct );
74 MODULE_ACTIVATE_STOP
75
76 MODULE_DEACTIVATE_START
77 MODULE_DEACTIVATE_STOP
78
79 /* Following functions are local */
80
81 /*****************************************************************************
82  * Functions exported as capabilities. They are declared as static so that
83  * we don't pollute the namespace too much.
84  *****************************************************************************/
85 static void idct_getfunctions( function_list_t * p_function_list )
86 {
87     p_function_list->pf_probe = idct_Probe;
88 #define F p_function_list->functions.idct
89     F.pf_idct_init = _M( vdec_InitIDCT );
90     F.pf_sparse_idct = _M( vdec_SparseIDCT );
91     F.pf_idct = _M( vdec_IDCT );
92     F.pf_norm_scan = vdec_NormScan;
93     F.pf_decode_init = _M( vdec_InitDecode );
94     F.pf_decode_mb_c = _M( vdec_DecodeMacroblockC );
95     F.pf_decode_mb_bw = _M( vdec_DecodeMacroblockBW );
96 #undef F
97 }
98
99 /*****************************************************************************
100  * idct_Probe: returns a preference score
101  *****************************************************************************/
102 static int idct_Probe( probedata_t *p_data )
103 {
104     if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
105     {
106         return( 999 );
107     }
108
109     /* This plugin always works */
110     return( 50 );
111 }
112
113 /*****************************************************************************
114  * vdec_NormScan : Unused in this IDCT
115  *****************************************************************************/
116 static void vdec_NormScan( u8 ppi_scan[2][64] )
117 {
118 }
119
120 /*****************************************************************************
121  * vdec_IDCT : IDCT function for normal matrices
122  *****************************************************************************/
123 void _M( vdec_IDCT )( vdec_thread_t * p_vdec, dctelem_t * p_block,
124                 int i_idontcare )
125 {
126     s32 tmp0, tmp1, tmp2, tmp3;
127     s32 tmp10, tmp11, tmp12, tmp13;
128     s32 z1, z2, z3, z4, z5;
129     s32 d0, d1, d2, d3, d4, d5, d6, d7;
130     dctelem_t * dataptr;
131     int rowctr;
132
133     SHIFT_TEMPS
134
135     /* Pass 1: process rows. */
136     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
137     /* furthermore, we scale the results by 2**PASS1_BITS. */
138
139     dataptr = p_block;
140
141     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
142     {
143         /* Due to quantization, we will usually find that many of the input
144          * coefficients are zero, especially the AC terms.  We can exploit this
145          * by short-circuiting the IDCT calculation for any row in which all
146          * the AC terms are zero.  In that case each output is equal to the
147          * DC coefficient (with scale factor as needed).
148          * With typical images and quantization tables, half or more of the
149          * row DCT calculations can be simplified this way.
150          */
151
152         register int * idataptr = (int*)dataptr;
153         d0 = dataptr[0];
154         d1 = dataptr[1];
155         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
156         {
157       /* AC terms all zero */
158             if (d0)
159             {
160       /* Compute a 32 bit value to assign. */
161                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
162                 register int v = (dcval & 0xffff) | (dcval << 16);
163
164                 idataptr[0] = v;
165                 idataptr[1] = v;
166                 idataptr[2] = v;
167                 idataptr[3] = v;
168             }
169
170             dataptr += DCTSIZE; /* advance pointer to next row */
171             continue;
172         }
173         d2 = dataptr[2];
174         d3 = dataptr[3];
175         d4 = dataptr[4];
176         d5 = dataptr[5];
177         d6 = dataptr[6];
178         d7 = dataptr[7];
179
180     /* Even part: reverse the even part of the forward DCT. */
181     /* The rotator is sqrt(2)*c(-6). */
182         if (d6)
183         {
184             if (d4)
185             {
186                 if (d2)
187                 {
188                     if (d0)
189                     {
190             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
191                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
192                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
193                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
194
195                         tmp0 = (d0 + d4) << CONST_BITS;
196                         tmp1 = (d0 - d4) << CONST_BITS;
197
198                         tmp10 = tmp0 + tmp3;
199                         tmp13 = tmp0 - tmp3;
200                         tmp11 = tmp1 + tmp2;
201                         tmp12 = tmp1 - tmp2;
202                     }
203                     else
204                     {
205                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
206                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
207                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
208                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
209
210                         tmp0 = d4 << CONST_BITS;
211
212                         tmp10 = tmp0 + tmp3;
213                         tmp13 = tmp0 - tmp3;
214                         tmp11 = tmp2 - tmp0;
215                         tmp12 = -(tmp0 + tmp2);
216                         }
217                 }
218                 else
219                 {
220                     if (d0)
221                     {
222             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
223                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
224                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
225
226                         tmp0 = (d0 + d4) << CONST_BITS;
227                         tmp1 = (d0 - d4) << CONST_BITS;
228
229                         tmp10 = tmp0 + tmp3;
230                         tmp13 = tmp0 - tmp3;
231                         tmp11 = tmp1 + tmp2;
232                         tmp12 = tmp1 - tmp2;
233                         }
234                     else
235                     {
236                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
237                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
238                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
239
240                         tmp0 = d4 << CONST_BITS;
241
242                         tmp10 = tmp0 + tmp3;
243                         tmp13 = tmp0 - tmp3;
244                         tmp11 = tmp2 - tmp0;
245                         tmp12 = -(tmp0 + tmp2);
246                         }
247                 }
248             }
249             else
250             {
251                 if (d2)
252                 {
253                     if (d0)
254                     {
255             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
256                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
257                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
258                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
259
260                         tmp0 = d0 << CONST_BITS;
261
262                         tmp10 = tmp0 + tmp3;
263                         tmp13 = tmp0 - tmp3;
264                         tmp11 = tmp0 + tmp2;
265                         tmp12 = tmp0 - tmp2;
266                     }
267                     else
268                     {
269                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
270                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
271                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
272                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
273
274                         tmp10 = tmp3;
275                         tmp13 = -tmp3;
276                         tmp11 = tmp2;
277                         tmp12 = -tmp2;
278                             }
279                 }
280                 else
281                 {
282                     if (d0)
283                     {
284             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
285                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
286                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
287
288                         tmp0 = d0 << CONST_BITS;
289
290                         tmp10 = tmp0 + tmp3;
291                         tmp13 = tmp0 - tmp3;
292                         tmp11 = tmp0 + tmp2;
293                         tmp12 = tmp0 - tmp2;
294                     }
295                     else
296                     {
297             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
298                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
299                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
300
301                         tmp10 = tmp3;
302                         tmp13 = -tmp3;
303                         tmp11 = tmp2;
304                         tmp12 = -tmp2;
305                     }
306                 }
307             }
308         }
309         else
310         {
311             if (d4)
312             {
313                 if (d2)
314                 {
315                     if (d0)
316                     {
317                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
318                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
319                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
320
321                         tmp0 = (d0 + d4) << CONST_BITS;
322                         tmp1 = (d0 - d4) << CONST_BITS;
323
324                         tmp10 = tmp0 + tmp3;
325                         tmp13 = tmp0 - tmp3;
326                         tmp11 = tmp1 + tmp2;
327                         tmp12 = tmp1 - tmp2;
328                     }
329                     else
330                     {
331             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
332                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
333                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
334
335                         tmp0 = d4 << CONST_BITS;
336
337                         tmp10 = tmp0 + tmp3;
338                         tmp13 = tmp0 - tmp3;
339                         tmp11 = tmp2 - tmp0;
340                         tmp12 = -(tmp0 + tmp2);
341                     }
342                 }
343                 else
344                 {
345                     if (d0)
346                     {
347             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
348                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
349                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
350                     }
351                     else
352                     {
353             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
354                         tmp10 = tmp13 = d4 << CONST_BITS;
355                         tmp11 = tmp12 = -tmp10;
356                     }
357                 }
358             }
359             else
360             {
361                 if (d2)
362                 {
363                     if (d0)
364                     {
365             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
366                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
367                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
368
369                         tmp0 = d0 << CONST_BITS;
370
371                         tmp10 = tmp0 + tmp3;
372                         tmp13 = tmp0 - tmp3;
373                         tmp11 = tmp0 + tmp2;
374                         tmp12 = tmp0 - tmp2;
375                     }
376                     else
377                     {
378             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
379                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
380                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
381
382                         tmp10 = tmp3;
383                         tmp13 = -tmp3;
384                         tmp11 = tmp2;
385                         tmp12 = -tmp2;
386                     }
387                 }
388                 else
389                 {
390                     if (d0)
391                     {
392             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
393                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
394                     }
395                     else
396                     {
397             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
398                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
399                     }
400                 }
401             }
402         }
403
404
405     /* Odd part per figure 8; the matrix is unitary and hence its
406      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
407      */
408
409         if (d7)
410             {
411             if (d5)
412             {
413                 if (d3)
414                 {
415                     if (d1)
416                     {
417             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
418                         z1 = d7 + d1;
419                         z2 = d5 + d3;
420                         z3 = d7 + d3;
421                         z4 = d5 + d1;
422                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
423
424                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
425                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
426                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
427                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
428                         z1 = MULTIPLY(z1, - FIX(0.899976223));
429                         z2 = MULTIPLY(z2, - FIX(2.562915447));
430                         z3 = MULTIPLY(z3, - FIX(1.961570560));
431                         z4 = MULTIPLY(z4, - FIX(0.390180644));
432
433                         z3 += z5;
434                         z4 += z5;
435
436                         tmp0 += z1 + z3;
437                         tmp1 += z2 + z4;
438                         tmp2 += z2 + z3;
439                         tmp3 += z1 + z4;
440                     }
441                     else
442                     {
443             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
444                         z2 = d5 + d3;
445                         z3 = d7 + d3;
446                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
447
448                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
449                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
450                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
451                         z1 = MULTIPLY(d7, - FIX(0.899976223));
452                         z2 = MULTIPLY(z2, - FIX(2.562915447));
453                         z3 = MULTIPLY(z3, - FIX(1.961570560));
454                         z4 = MULTIPLY(d5, - FIX(0.390180644));
455
456                         z3 += z5;
457                         z4 += z5;
458
459                         tmp0 += z1 + z3;
460                         tmp1 += z2 + z4;
461                         tmp2 += z2 + z3;
462                         tmp3 = z1 + z4;
463                         }
464                     }
465                 else
466                 {
467                     if (d1)
468                     {
469             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
470                         z1 = d7 + d1;
471                         z4 = d5 + d1;
472                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
473
474                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
475                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
476                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
477                         z1 = MULTIPLY(z1, - FIX(0.899976223));
478                         z2 = MULTIPLY(d5, - FIX(2.562915447));
479                         z3 = MULTIPLY(d7, - FIX(1.961570560));
480                         z4 = MULTIPLY(z4, - FIX(0.390180644));
481
482                         z3 += z5;
483                         z4 += z5;
484
485                         tmp0 += z1 + z3;
486                         tmp1 += z2 + z4;
487                         tmp2 = z2 + z3;
488                         tmp3 += z1 + z4;
489                     }
490                     else
491                     {
492             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
493                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
494
495                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
496                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
497                         z1 = MULTIPLY(d7, - FIX(0.899976223));
498                         z3 = MULTIPLY(d7, - FIX(1.961570560));
499                         z2 = MULTIPLY(d5, - FIX(2.562915447));
500                         z4 = MULTIPLY(d5, - FIX(0.390180644));
501
502                         z3 += z5;
503                         z4 += z5;
504
505                         tmp0 += z3;
506                         tmp1 += z4;
507                         tmp2 = z2 + z3;
508                         tmp3 = z1 + z4;
509                     }
510                 }
511             }
512             else
513             {
514                 if (d3)
515                 {
516                     if (d1)
517                     {
518             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
519                         z1 = d7 + d1;
520                         z3 = d7 + d3;
521                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
522
523                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
524                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
525                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
526                         z1 = MULTIPLY(z1, - FIX(0.899976223));
527                         z2 = MULTIPLY(d3, - FIX(2.562915447));
528                         z3 = MULTIPLY(z3, - FIX(1.961570560));
529                         z4 = MULTIPLY(d1, - FIX(0.390180644));
530
531                         z3 += z5;
532                         z4 += z5;
533
534                         tmp0 += z1 + z3;
535                         tmp1 = z2 + z4;
536                         tmp2 += z2 + z3;
537                         tmp3 += z1 + z4;
538                     }
539                     else
540                     {
541             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
542                         z3 = d7 + d3;
543                         z5 = MULTIPLY(z3, FIX(1.175875602));
544
545                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
546                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
547                         z1 = MULTIPLY(d7, - FIX(0.899976223));
548                         z2 = MULTIPLY(d3, - FIX(2.562915447));
549                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
550
551                         tmp0 += z3;
552                         tmp1 = z2 + z5;
553                         tmp2 += z3;
554                         tmp3 = z1 + z5;
555                     }
556                 }
557                 else
558                 {
559                     if (d1)
560                     {
561             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
562                         z1 = d7 + d1;
563                         z5 = MULTIPLY(z1, FIX(1.175875602));
564
565                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
566                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
567                         z1 = MULTIPLY(z1, FIX2(0.275899379));
568                         z3 = MULTIPLY(d7, - FIX(1.961570560));
569                         z4 = MULTIPLY(d1, - FIX(0.390180644));
570
571                         tmp0 += z1;
572                         tmp1 = z4 + z5;
573                         tmp2 = z3 + z5;
574                         tmp3 += z1;
575                     }
576                 else
577                     {
578             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
579                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
580                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
581                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
582                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
583                     }
584                 }
585             }
586         }
587         else
588         {
589             if (d5)
590             {
591                 if (d3)
592                 {
593                     if (d1)
594                     {
595             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
596                         z2 = d5 + d3;
597                         z4 = d5 + d1;
598                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
599
600                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
601                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
602                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
603                         z1 = MULTIPLY(d1, - FIX(0.899976223));
604                         z2 = MULTIPLY(z2, - FIX(2.562915447));
605                         z3 = MULTIPLY(d3, - FIX(1.961570560));
606                         z4 = MULTIPLY(z4, - FIX(0.390180644));
607
608                         z3 += z5;
609                         z4 += z5;
610
611                         tmp0 = z1 + z3;
612                         tmp1 += z2 + z4;
613                         tmp2 += z2 + z3;
614                         tmp3 += z1 + z4;
615                     }
616                     else
617                     {
618             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
619                         z2 = d5 + d3;
620                         z5 = MULTIPLY(z2, FIX(1.175875602));
621
622                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
623                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
624                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
625                         z3 = MULTIPLY(d3, - FIX(1.961570560));
626                         z4 = MULTIPLY(d5, - FIX(0.390180644));
627
628                         tmp0 = z3 + z5;
629                         tmp1 += z2;
630                         tmp2 += z2;
631                         tmp3 = z4 + z5;
632                     }
633                 }
634                 else
635                 {
636                     if (d1)
637                     {
638             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
639                         z4 = d5 + d1;
640                         z5 = MULTIPLY(z4, FIX(1.175875602));
641
642                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
643                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
644                         z1 = MULTIPLY(d1, - FIX(0.899976223));
645                         z2 = MULTIPLY(d5, - FIX(2.562915447));
646                         z4 = MULTIPLY(z4, FIX2(0.785694958));
647
648                         tmp0 = z1 + z5;
649                         tmp1 += z4;
650                         tmp2 = z2 + z5;
651                         tmp3 += z4;
652                     }
653                     else
654                     {
655             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
656                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
657                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
658                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
659                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
660                     }
661                 }
662             }
663             else
664             {
665                 if (d3)
666                 {
667                     if (d1)
668                     {
669             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
670                         z5 = d3 + d1;
671
672                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
673                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
674                         z1 = MULTIPLY(d1, FIX(1.061594337));
675                         z2 = MULTIPLY(d3, - FIX(2.172734803));
676                         z4 = MULTIPLY(z5, FIX(0.785694958));
677                         z5 = MULTIPLY(z5, FIX(1.175875602));
678
679                         tmp0 = z1 - z4;
680                         tmp1 = z2 + z4;
681                         tmp2 += z5;
682                         tmp3 += z5;
683                     }
684                     else
685                     {
686             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
687                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
688                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
689                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
690                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
691                     }
692                 }
693                 else
694                 {
695                     if (d1)
696                     {
697             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
698                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
699                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
700                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
701                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
702                     }
703                     else
704                     {
705             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
706                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
707                     }
708                 }
709             }
710         }
711
712     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
713
714         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
715         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
716         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
717         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
718         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
719         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
720         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
721         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
722
723         dataptr += DCTSIZE;              /* advance pointer to next row */
724     }
725
726   /* Pass 2: process columns. */
727   /* Note that we must descale the results by a factor of 8 == 2**3, */
728   /* and also undo the PASS1_BITS scaling. */
729
730     dataptr = p_block;
731     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
732     {
733     /* Columns of zeroes can be exploited in the same way as we did with rows.
734      * However, the row calculation has created many nonzero AC terms, so the
735      * simplification applies less often (typically 5% to 10% of the time).
736      * On machines with very fast multiplication, it's possible that the
737      * test takes more time than it's worth.  In that case this section
738      * may be commented out.
739      */
740
741         d0 = dataptr[DCTSIZE*0];
742         d1 = dataptr[DCTSIZE*1];
743         d2 = dataptr[DCTSIZE*2];
744         d3 = dataptr[DCTSIZE*3];
745         d4 = dataptr[DCTSIZE*4];
746         d5 = dataptr[DCTSIZE*5];
747         d6 = dataptr[DCTSIZE*6];
748         d7 = dataptr[DCTSIZE*7];
749
750     /* Even part: reverse the even part of the forward DCT. */
751     /* The rotator is sqrt(2)*c(-6). */
752         if (d6)
753         {
754             if (d4)
755             {
756                 if (d2)
757                 {
758                     if (d0)
759                     {
760             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
761                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
762                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
763                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
764
765                         tmp0 = (d0 + d4) << CONST_BITS;
766                         tmp1 = (d0 - d4) << CONST_BITS;
767
768                         tmp10 = tmp0 + tmp3;
769                         tmp13 = tmp0 - tmp3;
770                         tmp11 = tmp1 + tmp2;
771                         tmp12 = tmp1 - tmp2;
772                     }
773                     else
774                     {
775             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
776                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
777                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
778                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
779
780                         tmp0 = d4 << CONST_BITS;
781
782                         tmp10 = tmp0 + tmp3;
783                         tmp13 = tmp0 - tmp3;
784                         tmp11 = tmp2 - tmp0;
785                         tmp12 = -(tmp0 + tmp2);
786                     }
787                 }
788                 else
789                 {
790                     if (d0)
791                     {
792             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
793                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
794                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
795
796                         tmp0 = (d0 + d4) << CONST_BITS;
797                         tmp1 = (d0 - d4) << CONST_BITS;
798
799                         tmp10 = tmp0 + tmp3;
800                         tmp13 = tmp0 - tmp3;
801                         tmp11 = tmp1 + tmp2;
802                         tmp12 = tmp1 - tmp2;
803                     }
804                     else
805                     {
806             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
807                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
808                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
809
810                         tmp0 = d4 << CONST_BITS;
811
812                         tmp10 = tmp0 + tmp3;
813                         tmp13 = tmp0 - tmp3;
814                         tmp11 = tmp2 - tmp0;
815                         tmp12 = -(tmp0 + tmp2);
816                     }
817                 }
818             }
819             else
820             {
821                 if (d2)
822                 {
823                     if (d0)
824                     {
825             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
826                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
827                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
828                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
829
830                         tmp0 = d0 << CONST_BITS;
831
832                         tmp10 = tmp0 + tmp3;
833                         tmp13 = tmp0 - tmp3;
834                         tmp11 = tmp0 + tmp2;
835                         tmp12 = tmp0 - tmp2;
836                     }
837                     else
838                     {
839             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
840                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
841                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
842                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
843
844                         tmp10 = tmp3;
845                         tmp13 = -tmp3;
846                         tmp11 = tmp2;
847                         tmp12 = -tmp2;
848                     }
849                 }
850                 else
851                 {
852                     if (d0)
853                     {
854             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
855                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
856                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
857
858                     tmp0 = d0 << CONST_BITS;
859
860                     tmp10 = tmp0 + tmp3;
861                     tmp13 = tmp0 - tmp3;
862                     tmp11 = tmp0 + tmp2;
863                     tmp12 = tmp0 - tmp2;
864                 }
865                 else
866                 {
867             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
868                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
869                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
870                     tmp10 = tmp3;
871                     tmp13 = -tmp3;
872                     tmp11 = tmp2;
873                     tmp12 = -tmp2;
874                 }
875             }
876         }
877     }
878     else
879     {
880         if (d4)
881         {
882             if (d2)
883             {
884                 if (d0)
885                 {
886             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
887                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
888                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
889
890                     tmp0 = (d0 + d4) << CONST_BITS;
891                     tmp1 = (d0 - d4) << CONST_BITS;
892
893                     tmp10 = tmp0 + tmp3;
894                     tmp13 = tmp0 - tmp3;
895                     tmp11 = tmp1 + tmp2;
896                     tmp12 = tmp1 - tmp2;
897                 }
898                 else
899                 {
900             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
901                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
902                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
903
904                     tmp0 = d4 << CONST_BITS;
905
906                     tmp10 = tmp0 + tmp3;
907                     tmp13 = tmp0 - tmp3;
908                     tmp11 = tmp2 - tmp0;
909                     tmp12 = -(tmp0 + tmp2);
910                 }
911             }
912             else
913             {
914                 if (d0)
915                 {
916             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
917                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
918                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
919                 }
920                 else
921                 {
922             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
923                     tmp10 = tmp13 = d4 << CONST_BITS;
924                     tmp11 = tmp12 = -tmp10;
925                 }
926             }
927         }
928         else
929         {
930         if (d2)
931         {
932             if (d0)
933             {
934             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
935                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
936                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
937
938                     tmp0 = d0 << CONST_BITS;
939
940                     tmp10 = tmp0 + tmp3;
941                     tmp13 = tmp0 - tmp3;
942                     tmp11 = tmp0 + tmp2;
943                     tmp12 = tmp0 - tmp2;
944             }
945             else
946             {
947             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
948                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
949                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
950
951                     tmp10 = tmp3;
952                     tmp13 = -tmp3;
953                     tmp11 = tmp2;
954                     tmp12 = -tmp2;
955             }
956         }
957         else
958         {
959             if (d0)
960                 {
961             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
962                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
963                 }
964                 else
965                 {
966             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
967                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
968                 }
969             }
970         }
971     }
972
973     /* Odd part per figure 8; the matrix is unitary and hence its
974      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
975      */
976     if (d7)
977     {
978         if (d5)
979         {
980             if (d3)
981             {
982                 if (d1)
983                 {
984             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
985                     z1 = d7 + d1;
986                     z2 = d5 + d3;
987                     z3 = d7 + d3;
988                     z4 = d5 + d1;
989                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
990
991                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
992                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
993                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
994                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
995                     z1 = MULTIPLY(z1, - FIX(0.899976223));
996                     z2 = MULTIPLY(z2, - FIX(2.562915447));
997                     z3 = MULTIPLY(z3, - FIX(1.961570560));
998                     z4 = MULTIPLY(z4, - FIX(0.390180644));
999
1000                     z3 += z5;
1001                     z4 += z5;
1002
1003                     tmp0 += z1 + z3;
1004                     tmp1 += z2 + z4;
1005                     tmp2 += z2 + z3;
1006                     tmp3 += z1 + z4;
1007                 }
1008                 else
1009                 {
1010             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1011                     z2 = d5 + d3;
1012                     z3 = d7 + d3;
1013                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1014
1015                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1016                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1017                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1018                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1019                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1020                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1021                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1022
1023                     z3 += z5;
1024                     z4 += z5;
1025
1026                     tmp0 += z1 + z3;
1027                     tmp1 += z2 + z4;
1028                     tmp2 += z2 + z3;
1029                     tmp3 = z1 + z4;
1030                 }
1031             }
1032             else
1033             {
1034                 if (d1)
1035                 {
1036             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1037                     z1 = d7 + d1;
1038                     z4 = d5 + d1;
1039                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1040
1041                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1042                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1043                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1044                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1045                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1046                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1047                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1048
1049                     z3 += z5;
1050                     z4 += z5;
1051
1052                     tmp0 += z1 + z3;
1053                     tmp1 += z2 + z4;
1054                     tmp2 = z2 + z3;
1055                     tmp3 += z1 + z4;
1056                 }
1057                 else
1058                 {
1059             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1060                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1061
1062                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1063                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1064                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1065                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1066                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1067                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1068
1069                     z3 += z5;
1070                     z4 += z5;
1071
1072                     tmp0 += z3;
1073                     tmp1 += z4;
1074                     tmp2 = z2 + z3;
1075                     tmp3 = z1 + z4;
1076                 }
1077             }
1078         }
1079         else
1080         {
1081             if (d3)
1082             {
1083                 if (d1)
1084                 {
1085             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1086                     z1 = d7 + d1;
1087                     z3 = d7 + d3;
1088                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1089
1090                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1091                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1092                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1093                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1094                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1095                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1096                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1097
1098                     z3 += z5;
1099                     z4 += z5;
1100
1101                     tmp0 += z1 + z3;
1102                     tmp1 = z2 + z4;
1103                     tmp2 += z2 + z3;
1104                     tmp3 += z1 + z4;
1105                 }
1106                 else
1107                 {
1108             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1109                     z3 = d7 + d3;
1110                     z5 = MULTIPLY(z3, FIX(1.175875602));
1111
1112                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1113                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1114                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1115                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1116                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1117
1118                     tmp0 += z3;
1119                     tmp1 = z2 + z5;
1120                     tmp2 += z3;
1121                     tmp3 = z1 + z5;
1122                 }
1123             }
1124             else
1125             {
1126                 if (d1)
1127                 {
1128             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1129                     z1 = d7 + d1;
1130                     z5 = MULTIPLY(z1, FIX(1.175875602));
1131
1132                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1133                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1134                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1135                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1136                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1137
1138                     tmp0 += z1;
1139                     tmp1 = z4 + z5;
1140                     tmp2 = z3 + z5;
1141                     tmp3 += z1;
1142                 }
1143                 else
1144                 {
1145             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1146                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1147                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1148                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1149                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1150                 }
1151             }
1152         }
1153     }
1154     else
1155     {
1156         if (d5)
1157         {
1158             if (d3)
1159             {
1160                 if (d1)
1161                 {
1162             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1163                     z2 = d5 + d3;
1164                     z4 = d5 + d1;
1165                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1166
1167                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1168                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1169                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1170                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1171                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1172                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1173                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1174
1175                     z3 += z5;
1176                     z4 += z5;
1177
1178                     tmp0 = z1 + z3;
1179                     tmp1 += z2 + z4;
1180                     tmp2 += z2 + z3;
1181                     tmp3 += z1 + z4;
1182                 }
1183                 else
1184                 {
1185             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1186                     z2 = d5 + d3;
1187                     z5 = MULTIPLY(z2, FIX(1.175875602));
1188
1189                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1190                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1191                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1192                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1193                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1194
1195                     tmp0 = z3 + z5;
1196                     tmp1 += z2;
1197                     tmp2 += z2;
1198                     tmp3 = z4 + z5;
1199                 }
1200             }
1201             else
1202             {
1203                 if (d1)
1204                 {
1205             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1206                     z4 = d5 + d1;
1207                     z5 = MULTIPLY(z4, FIX(1.175875602));
1208
1209                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1210                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1211                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1212                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1213                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1214
1215                     tmp0 = z1 + z5;
1216                     tmp1 += z4;
1217                     tmp2 = z2 + z5;
1218                     tmp3 += z4;
1219                 }
1220                 else
1221                 {
1222             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1223                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1224                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1225                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1226                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1227                 }
1228             }
1229         }
1230         else
1231         {
1232             if (d3)
1233             {
1234                 if (d1)
1235                 {
1236             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1237                     z5 = d3 + d1;
1238
1239                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1240                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1241                     z1 = MULTIPLY(d1, FIX(1.061594337));
1242                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1243                     z4 = MULTIPLY(z5, FIX(0.785694958));
1244                     z5 = MULTIPLY(z5, FIX(1.175875602));
1245
1246                     tmp0 = z1 - z4;
1247                     tmp1 = z2 + z4;
1248                     tmp2 += z5;
1249                     tmp3 += z5;
1250                 }
1251                 else
1252                 {
1253             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1254                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1255                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1256                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1257                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1258                 }
1259             }
1260             else
1261             {
1262                 if (d1)
1263                 {
1264             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1265                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1266                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1267                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1268                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1269                 }
1270                 else
1271                 {
1272             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1273                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1274                 }
1275             }
1276         }
1277     }
1278
1279     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1280
1281     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1282                        CONST_BITS+PASS1_BITS+3);
1283     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1284                        CONST_BITS+PASS1_BITS+3);
1285     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1286                        CONST_BITS+PASS1_BITS+3);
1287     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1288                        CONST_BITS+PASS1_BITS+3);
1289     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1290                        CONST_BITS+PASS1_BITS+3);
1291     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1292                        CONST_BITS+PASS1_BITS+3);
1293     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1294                        CONST_BITS+PASS1_BITS+3);
1295     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1296                        CONST_BITS+PASS1_BITS+3);
1297
1298     dataptr++;             /* advance pointer to next column */
1299     }
1300 }
1301