]> git.sesse.net Git - vlc/blob - plugins/idct/idct.c
2eafe3b79e08c156d6111b55064caa65333ac13b
[vlc] / plugins / idct / idct.c
1 /*****************************************************************************
2  * idct.c : IDCT module
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  * $Id: idct.c,v 1.14 2001/08/22 17:21:45 massiot Exp $
6  *
7  * Authors: GaĆ«l Hendryckx <jimmy@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
26
27 /*****************************************************************************
28  * Preamble
29  *****************************************************************************/
30 #include "defs.h"
31
32 #include <stdlib.h>
33
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
38 #include "tests.h"
39
40 #include "vdec_idct.h"
41
42 #include "modules.h"
43 #include "modules_export.h"
44
45 /*****************************************************************************
46  * Local and extern prototypes.
47  *****************************************************************************/
48 static void idct_getfunctions( function_list_t * p_function_list );
49 static int  idct_Probe      ( probedata_t *p_data );
50 static void vdec_NormScan   ( u8 ppi_scan[2][64] );
51
52 /*****************************************************************************
53  * Build configuration tree.
54  *****************************************************************************/
55 MODULE_CONFIG_START
56 ADD_WINDOW( "Configuration for IDCT module" )
57     ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
58 MODULE_CONFIG_STOP
59
60 MODULE_INIT_START
61     p_module->i_capabilities = MODULE_CAPABILITY_NULL
62                                 | MODULE_CAPABILITY_IDCT;
63     p_module->psz_longname = "IDCT module";
64 MODULE_INIT_STOP
65
66 MODULE_ACTIVATE_START
67     idct_getfunctions( &p_module->p_functions->idct );
68 MODULE_ACTIVATE_STOP
69
70 MODULE_DEACTIVATE_START
71 MODULE_DEACTIVATE_STOP
72
73 /* Following functions are local */
74
75 /*****************************************************************************
76  * Functions exported as capabilities. They are declared as static so that
77  * we don't pollute the namespace too much.
78  *****************************************************************************/
79 static void idct_getfunctions( function_list_t * p_function_list )
80 {
81     p_function_list->pf_probe = idct_Probe;
82 #define F p_function_list->functions.idct
83     F.pf_idct_init = _M( vdec_InitIDCT );
84     F.pf_sparse_idct = _M( vdec_SparseIDCT );
85     F.pf_idct = _M( vdec_IDCT );
86     F.pf_norm_scan = vdec_NormScan;
87     F.pf_decode_init = _M( vdec_InitDecode );
88     F.pf_addblock = _M( vdec_AddBlock );
89     F.pf_copyblock = _M( vdec_CopyBlock );
90 #undef F
91 }
92
93 /*****************************************************************************
94  * idct_Probe: returns a preference score
95  *****************************************************************************/
96 static int idct_Probe( probedata_t *p_data )
97 {
98     if( TestMethod( IDCT_METHOD_VAR, "idct" )
99          || TestMethod( IDCT_METHOD_VAR, "c" ))
100     {
101         return( 999 );
102     }
103
104     /* This plugin always works */
105     return( 50 );
106 }
107
108 /*****************************************************************************
109  * vdec_NormScan : Unused in this IDCT
110  *****************************************************************************/
111 static void vdec_NormScan( u8 ppi_scan[2][64] )
112 {
113 }
114
115 /*****************************************************************************
116  * vdec_IDCT : IDCT function for normal matrices
117  *****************************************************************************/
118 void _M( vdec_IDCT )( void * p_unused_data, dctelem_t * p_block,
119                       int i_idontcare )
120 {
121     s32 tmp0, tmp1, tmp2, tmp3;
122     s32 tmp10, tmp11, tmp12, tmp13;
123     s32 z1, z2, z3, z4, z5;
124     s32 d0, d1, d2, d3, d4, d5, d6, d7;
125     dctelem_t * dataptr;
126     int rowctr;
127
128     SHIFT_TEMPS
129
130     /* Pass 1: process rows. */
131     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
132     /* furthermore, we scale the results by 2**PASS1_BITS. */
133
134     dataptr = p_block;
135
136     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
137     {
138         /* Due to quantization, we will usually find that many of the input
139          * coefficients are zero, especially the AC terms.  We can exploit this
140          * by short-circuiting the IDCT calculation for any row in which all
141          * the AC terms are zero.  In that case each output is equal to the
142          * DC coefficient (with scale factor as needed).
143          * With typical images and quantization tables, half or more of the
144          * row DCT calculations can be simplified this way.
145          */
146
147         register int * idataptr = (int*)dataptr;
148         d0 = dataptr[0];
149         d1 = dataptr[1];
150         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
151         {
152       /* AC terms all zero */
153             if (d0)
154             {
155       /* Compute a 32 bit value to assign. */
156                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
157                 register int v = (dcval & 0xffff) | (dcval << 16);
158
159                 idataptr[0] = v;
160                 idataptr[1] = v;
161                 idataptr[2] = v;
162                 idataptr[3] = v;
163             }
164
165             dataptr += DCTSIZE; /* advance pointer to next row */
166             continue;
167         }
168         d2 = dataptr[2];
169         d3 = dataptr[3];
170         d4 = dataptr[4];
171         d5 = dataptr[5];
172         d6 = dataptr[6];
173         d7 = dataptr[7];
174
175     /* Even part: reverse the even part of the forward DCT. */
176     /* The rotator is sqrt(2)*c(-6). */
177         if (d6)
178         {
179             if (d4)
180             {
181                 if (d2)
182                 {
183                     if (d0)
184                     {
185             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
186                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
187                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
188                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
189
190                         tmp0 = (d0 + d4) << CONST_BITS;
191                         tmp1 = (d0 - d4) << CONST_BITS;
192
193                         tmp10 = tmp0 + tmp3;
194                         tmp13 = tmp0 - tmp3;
195                         tmp11 = tmp1 + tmp2;
196                         tmp12 = tmp1 - tmp2;
197                     }
198                     else
199                     {
200                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
201                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
202                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
203                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
204
205                         tmp0 = d4 << CONST_BITS;
206
207                         tmp10 = tmp0 + tmp3;
208                         tmp13 = tmp0 - tmp3;
209                         tmp11 = tmp2 - tmp0;
210                         tmp12 = -(tmp0 + tmp2);
211                         }
212                 }
213                 else
214                 {
215                     if (d0)
216                     {
217             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
218                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
219                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
220
221                         tmp0 = (d0 + d4) << CONST_BITS;
222                         tmp1 = (d0 - d4) << CONST_BITS;
223
224                         tmp10 = tmp0 + tmp3;
225                         tmp13 = tmp0 - tmp3;
226                         tmp11 = tmp1 + tmp2;
227                         tmp12 = tmp1 - tmp2;
228                         }
229                     else
230                     {
231                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
232                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
233                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
234
235                         tmp0 = d4 << CONST_BITS;
236
237                         tmp10 = tmp0 + tmp3;
238                         tmp13 = tmp0 - tmp3;
239                         tmp11 = tmp2 - tmp0;
240                         tmp12 = -(tmp0 + tmp2);
241                         }
242                 }
243             }
244             else
245             {
246                 if (d2)
247                 {
248                     if (d0)
249                     {
250             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
251                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
252                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
253                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
254
255                         tmp0 = d0 << CONST_BITS;
256
257                         tmp10 = tmp0 + tmp3;
258                         tmp13 = tmp0 - tmp3;
259                         tmp11 = tmp0 + tmp2;
260                         tmp12 = tmp0 - tmp2;
261                     }
262                     else
263                     {
264                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
265                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
266                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
267                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
268
269                         tmp10 = tmp3;
270                         tmp13 = -tmp3;
271                         tmp11 = tmp2;
272                         tmp12 = -tmp2;
273                             }
274                 }
275                 else
276                 {
277                     if (d0)
278                     {
279             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
280                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
281                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
282
283                         tmp0 = d0 << CONST_BITS;
284
285                         tmp10 = tmp0 + tmp3;
286                         tmp13 = tmp0 - tmp3;
287                         tmp11 = tmp0 + tmp2;
288                         tmp12 = tmp0 - tmp2;
289                     }
290                     else
291                     {
292             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
293                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
294                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
295
296                         tmp10 = tmp3;
297                         tmp13 = -tmp3;
298                         tmp11 = tmp2;
299                         tmp12 = -tmp2;
300                     }
301                 }
302             }
303         }
304         else
305         {
306             if (d4)
307             {
308                 if (d2)
309                 {
310                     if (d0)
311                     {
312                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
313                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
314                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
315
316                         tmp0 = (d0 + d4) << CONST_BITS;
317                         tmp1 = (d0 - d4) << CONST_BITS;
318
319                         tmp10 = tmp0 + tmp3;
320                         tmp13 = tmp0 - tmp3;
321                         tmp11 = tmp1 + tmp2;
322                         tmp12 = tmp1 - tmp2;
323                     }
324                     else
325                     {
326             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
327                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
328                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
329
330                         tmp0 = d4 << CONST_BITS;
331
332                         tmp10 = tmp0 + tmp3;
333                         tmp13 = tmp0 - tmp3;
334                         tmp11 = tmp2 - tmp0;
335                         tmp12 = -(tmp0 + tmp2);
336                     }
337                 }
338                 else
339                 {
340                     if (d0)
341                     {
342             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
343                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
344                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
345                     }
346                     else
347                     {
348             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
349                         tmp10 = tmp13 = d4 << CONST_BITS;
350                         tmp11 = tmp12 = -tmp10;
351                     }
352                 }
353             }
354             else
355             {
356                 if (d2)
357                 {
358                     if (d0)
359                     {
360             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
361                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
362                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
363
364                         tmp0 = d0 << CONST_BITS;
365
366                         tmp10 = tmp0 + tmp3;
367                         tmp13 = tmp0 - tmp3;
368                         tmp11 = tmp0 + tmp2;
369                         tmp12 = tmp0 - tmp2;
370                     }
371                     else
372                     {
373             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
374                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
375                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
376
377                         tmp10 = tmp3;
378                         tmp13 = -tmp3;
379                         tmp11 = tmp2;
380                         tmp12 = -tmp2;
381                     }
382                 }
383                 else
384                 {
385                     if (d0)
386                     {
387             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
388                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
389                     }
390                     else
391                     {
392             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
393                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
394                     }
395                 }
396             }
397         }
398
399
400     /* Odd part per figure 8; the matrix is unitary and hence its
401      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
402      */
403
404         if (d7)
405             {
406             if (d5)
407             {
408                 if (d3)
409                 {
410                     if (d1)
411                     {
412             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
413                         z1 = d7 + d1;
414                         z2 = d5 + d3;
415                         z3 = d7 + d3;
416                         z4 = d5 + d1;
417                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
418
419                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
420                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
421                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
422                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
423                         z1 = MULTIPLY(z1, - FIX(0.899976223));
424                         z2 = MULTIPLY(z2, - FIX(2.562915447));
425                         z3 = MULTIPLY(z3, - FIX(1.961570560));
426                         z4 = MULTIPLY(z4, - FIX(0.390180644));
427
428                         z3 += z5;
429                         z4 += z5;
430
431                         tmp0 += z1 + z3;
432                         tmp1 += z2 + z4;
433                         tmp2 += z2 + z3;
434                         tmp3 += z1 + z4;
435                     }
436                     else
437                     {
438             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
439                         z2 = d5 + d3;
440                         z3 = d7 + d3;
441                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
442
443                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
444                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
445                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
446                         z1 = MULTIPLY(d7, - FIX(0.899976223));
447                         z2 = MULTIPLY(z2, - FIX(2.562915447));
448                         z3 = MULTIPLY(z3, - FIX(1.961570560));
449                         z4 = MULTIPLY(d5, - FIX(0.390180644));
450
451                         z3 += z5;
452                         z4 += z5;
453
454                         tmp0 += z1 + z3;
455                         tmp1 += z2 + z4;
456                         tmp2 += z2 + z3;
457                         tmp3 = z1 + z4;
458                         }
459                     }
460                 else
461                 {
462                     if (d1)
463                     {
464             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
465                         z1 = d7 + d1;
466                         z4 = d5 + d1;
467                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
468
469                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
470                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
471                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
472                         z1 = MULTIPLY(z1, - FIX(0.899976223));
473                         z2 = MULTIPLY(d5, - FIX(2.562915447));
474                         z3 = MULTIPLY(d7, - FIX(1.961570560));
475                         z4 = MULTIPLY(z4, - FIX(0.390180644));
476
477                         z3 += z5;
478                         z4 += z5;
479
480                         tmp0 += z1 + z3;
481                         tmp1 += z2 + z4;
482                         tmp2 = z2 + z3;
483                         tmp3 += z1 + z4;
484                     }
485                     else
486                     {
487             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
488                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
489
490                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
491                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
492                         z1 = MULTIPLY(d7, - FIX(0.899976223));
493                         z3 = MULTIPLY(d7, - FIX(1.961570560));
494                         z2 = MULTIPLY(d5, - FIX(2.562915447));
495                         z4 = MULTIPLY(d5, - FIX(0.390180644));
496
497                         z3 += z5;
498                         z4 += z5;
499
500                         tmp0 += z3;
501                         tmp1 += z4;
502                         tmp2 = z2 + z3;
503                         tmp3 = z1 + z4;
504                     }
505                 }
506             }
507             else
508             {
509                 if (d3)
510                 {
511                     if (d1)
512                     {
513             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
514                         z1 = d7 + d1;
515                         z3 = d7 + d3;
516                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
517
518                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
519                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
520                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
521                         z1 = MULTIPLY(z1, - FIX(0.899976223));
522                         z2 = MULTIPLY(d3, - FIX(2.562915447));
523                         z3 = MULTIPLY(z3, - FIX(1.961570560));
524                         z4 = MULTIPLY(d1, - FIX(0.390180644));
525
526                         z3 += z5;
527                         z4 += z5;
528
529                         tmp0 += z1 + z3;
530                         tmp1 = z2 + z4;
531                         tmp2 += z2 + z3;
532                         tmp3 += z1 + z4;
533                     }
534                     else
535                     {
536             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
537                         z3 = d7 + d3;
538                         z5 = MULTIPLY(z3, FIX(1.175875602));
539
540                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
541                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
542                         z1 = MULTIPLY(d7, - FIX(0.899976223));
543                         z2 = MULTIPLY(d3, - FIX(2.562915447));
544                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
545
546                         tmp0 += z3;
547                         tmp1 = z2 + z5;
548                         tmp2 += z3;
549                         tmp3 = z1 + z5;
550                     }
551                 }
552                 else
553                 {
554                     if (d1)
555                     {
556             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
557                         z1 = d7 + d1;
558                         z5 = MULTIPLY(z1, FIX(1.175875602));
559
560                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
561                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
562                         z1 = MULTIPLY(z1, FIX2(0.275899379));
563                         z3 = MULTIPLY(d7, - FIX(1.961570560));
564                         z4 = MULTIPLY(d1, - FIX(0.390180644));
565
566                         tmp0 += z1;
567                         tmp1 = z4 + z5;
568                         tmp2 = z3 + z5;
569                         tmp3 += z1;
570                     }
571                 else
572                     {
573             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
574                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
575                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
576                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
577                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
578                     }
579                 }
580             }
581         }
582         else
583         {
584             if (d5)
585             {
586                 if (d3)
587                 {
588                     if (d1)
589                     {
590             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
591                         z2 = d5 + d3;
592                         z4 = d5 + d1;
593                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
594
595                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
596                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
597                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
598                         z1 = MULTIPLY(d1, - FIX(0.899976223));
599                         z2 = MULTIPLY(z2, - FIX(2.562915447));
600                         z3 = MULTIPLY(d3, - FIX(1.961570560));
601                         z4 = MULTIPLY(z4, - FIX(0.390180644));
602
603                         z3 += z5;
604                         z4 += z5;
605
606                         tmp0 = z1 + z3;
607                         tmp1 += z2 + z4;
608                         tmp2 += z2 + z3;
609                         tmp3 += z1 + z4;
610                     }
611                     else
612                     {
613             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
614                         z2 = d5 + d3;
615                         z5 = MULTIPLY(z2, FIX(1.175875602));
616
617                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
618                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
619                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
620                         z3 = MULTIPLY(d3, - FIX(1.961570560));
621                         z4 = MULTIPLY(d5, - FIX(0.390180644));
622
623                         tmp0 = z3 + z5;
624                         tmp1 += z2;
625                         tmp2 += z2;
626                         tmp3 = z4 + z5;
627                     }
628                 }
629                 else
630                 {
631                     if (d1)
632                     {
633             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
634                         z4 = d5 + d1;
635                         z5 = MULTIPLY(z4, FIX(1.175875602));
636
637                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
638                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
639                         z1 = MULTIPLY(d1, - FIX(0.899976223));
640                         z2 = MULTIPLY(d5, - FIX(2.562915447));
641                         z4 = MULTIPLY(z4, FIX2(0.785694958));
642
643                         tmp0 = z1 + z5;
644                         tmp1 += z4;
645                         tmp2 = z2 + z5;
646                         tmp3 += z4;
647                     }
648                     else
649                     {
650             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
651                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
652                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
653                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
654                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
655                     }
656                 }
657             }
658             else
659             {
660                 if (d3)
661                 {
662                     if (d1)
663                     {
664             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
665                         z5 = d3 + d1;
666
667                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
668                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
669                         z1 = MULTIPLY(d1, FIX(1.061594337));
670                         z2 = MULTIPLY(d3, - FIX(2.172734803));
671                         z4 = MULTIPLY(z5, FIX(0.785694958));
672                         z5 = MULTIPLY(z5, FIX(1.175875602));
673
674                         tmp0 = z1 - z4;
675                         tmp1 = z2 + z4;
676                         tmp2 += z5;
677                         tmp3 += z5;
678                     }
679                     else
680                     {
681             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
682                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
683                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
684                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
685                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
686                     }
687                 }
688                 else
689                 {
690                     if (d1)
691                     {
692             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
693                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
694                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
695                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
696                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
697                     }
698                     else
699                     {
700             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
701                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
702                     }
703                 }
704             }
705         }
706
707     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
708
709         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
710         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
711         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
712         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
713         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
714         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
715         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
716         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
717
718         dataptr += DCTSIZE;              /* advance pointer to next row */
719     }
720
721   /* Pass 2: process columns. */
722   /* Note that we must descale the results by a factor of 8 == 2**3, */
723   /* and also undo the PASS1_BITS scaling. */
724
725     dataptr = p_block;
726     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
727     {
728     /* Columns of zeroes can be exploited in the same way as we did with rows.
729      * However, the row calculation has created many nonzero AC terms, so the
730      * simplification applies less often (typically 5% to 10% of the time).
731      * On machines with very fast multiplication, it's possible that the
732      * test takes more time than it's worth.  In that case this section
733      * may be commented out.
734      */
735
736         d0 = dataptr[DCTSIZE*0];
737         d1 = dataptr[DCTSIZE*1];
738         d2 = dataptr[DCTSIZE*2];
739         d3 = dataptr[DCTSIZE*3];
740         d4 = dataptr[DCTSIZE*4];
741         d5 = dataptr[DCTSIZE*5];
742         d6 = dataptr[DCTSIZE*6];
743         d7 = dataptr[DCTSIZE*7];
744
745     /* Even part: reverse the even part of the forward DCT. */
746     /* The rotator is sqrt(2)*c(-6). */
747         if (d6)
748         {
749             if (d4)
750             {
751                 if (d2)
752                 {
753                     if (d0)
754                     {
755             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
756                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
757                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
758                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
759
760                         tmp0 = (d0 + d4) << CONST_BITS;
761                         tmp1 = (d0 - d4) << CONST_BITS;
762
763                         tmp10 = tmp0 + tmp3;
764                         tmp13 = tmp0 - tmp3;
765                         tmp11 = tmp1 + tmp2;
766                         tmp12 = tmp1 - tmp2;
767                     }
768                     else
769                     {
770             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
771                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
772                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
773                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
774
775                         tmp0 = d4 << CONST_BITS;
776
777                         tmp10 = tmp0 + tmp3;
778                         tmp13 = tmp0 - tmp3;
779                         tmp11 = tmp2 - tmp0;
780                         tmp12 = -(tmp0 + tmp2);
781                     }
782                 }
783                 else
784                 {
785                     if (d0)
786                     {
787             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
788                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
789                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
790
791                         tmp0 = (d0 + d4) << CONST_BITS;
792                         tmp1 = (d0 - d4) << CONST_BITS;
793
794                         tmp10 = tmp0 + tmp3;
795                         tmp13 = tmp0 - tmp3;
796                         tmp11 = tmp1 + tmp2;
797                         tmp12 = tmp1 - tmp2;
798                     }
799                     else
800                     {
801             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
802                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
803                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
804
805                         tmp0 = d4 << CONST_BITS;
806
807                         tmp10 = tmp0 + tmp3;
808                         tmp13 = tmp0 - tmp3;
809                         tmp11 = tmp2 - tmp0;
810                         tmp12 = -(tmp0 + tmp2);
811                     }
812                 }
813             }
814             else
815             {
816                 if (d2)
817                 {
818                     if (d0)
819                     {
820             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
821                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
822                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
823                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
824
825                         tmp0 = d0 << CONST_BITS;
826
827                         tmp10 = tmp0 + tmp3;
828                         tmp13 = tmp0 - tmp3;
829                         tmp11 = tmp0 + tmp2;
830                         tmp12 = tmp0 - tmp2;
831                     }
832                     else
833                     {
834             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
835                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
836                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
837                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
838
839                         tmp10 = tmp3;
840                         tmp13 = -tmp3;
841                         tmp11 = tmp2;
842                         tmp12 = -tmp2;
843                     }
844                 }
845                 else
846                 {
847                     if (d0)
848                     {
849             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
850                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
851                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
852
853                     tmp0 = d0 << CONST_BITS;
854
855                     tmp10 = tmp0 + tmp3;
856                     tmp13 = tmp0 - tmp3;
857                     tmp11 = tmp0 + tmp2;
858                     tmp12 = tmp0 - tmp2;
859                 }
860                 else
861                 {
862             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
863                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
864                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
865                     tmp10 = tmp3;
866                     tmp13 = -tmp3;
867                     tmp11 = tmp2;
868                     tmp12 = -tmp2;
869                 }
870             }
871         }
872     }
873     else
874     {
875         if (d4)
876         {
877             if (d2)
878             {
879                 if (d0)
880                 {
881             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
882                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
883                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
884
885                     tmp0 = (d0 + d4) << CONST_BITS;
886                     tmp1 = (d0 - d4) << CONST_BITS;
887
888                     tmp10 = tmp0 + tmp3;
889                     tmp13 = tmp0 - tmp3;
890                     tmp11 = tmp1 + tmp2;
891                     tmp12 = tmp1 - tmp2;
892                 }
893                 else
894                 {
895             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
896                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
897                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
898
899                     tmp0 = d4 << CONST_BITS;
900
901                     tmp10 = tmp0 + tmp3;
902                     tmp13 = tmp0 - tmp3;
903                     tmp11 = tmp2 - tmp0;
904                     tmp12 = -(tmp0 + tmp2);
905                 }
906             }
907             else
908             {
909                 if (d0)
910                 {
911             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
912                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
913                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
914                 }
915                 else
916                 {
917             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
918                     tmp10 = tmp13 = d4 << CONST_BITS;
919                     tmp11 = tmp12 = -tmp10;
920                 }
921             }
922         }
923         else
924         {
925         if (d2)
926         {
927             if (d0)
928             {
929             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
930                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
931                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
932
933                     tmp0 = d0 << CONST_BITS;
934
935                     tmp10 = tmp0 + tmp3;
936                     tmp13 = tmp0 - tmp3;
937                     tmp11 = tmp0 + tmp2;
938                     tmp12 = tmp0 - tmp2;
939             }
940             else
941             {
942             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
943                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
944                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
945
946                     tmp10 = tmp3;
947                     tmp13 = -tmp3;
948                     tmp11 = tmp2;
949                     tmp12 = -tmp2;
950             }
951         }
952         else
953         {
954             if (d0)
955                 {
956             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
957                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
958                 }
959                 else
960                 {
961             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
962                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
963                 }
964             }
965         }
966     }
967
968     /* Odd part per figure 8; the matrix is unitary and hence its
969      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
970      */
971     if (d7)
972     {
973         if (d5)
974         {
975             if (d3)
976             {
977                 if (d1)
978                 {
979             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
980                     z1 = d7 + d1;
981                     z2 = d5 + d3;
982                     z3 = d7 + d3;
983                     z4 = d5 + d1;
984                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
985
986                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
987                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
988                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
989                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
990                     z1 = MULTIPLY(z1, - FIX(0.899976223));
991                     z2 = MULTIPLY(z2, - FIX(2.562915447));
992                     z3 = MULTIPLY(z3, - FIX(1.961570560));
993                     z4 = MULTIPLY(z4, - FIX(0.390180644));
994
995                     z3 += z5;
996                     z4 += z5;
997
998                     tmp0 += z1 + z3;
999                     tmp1 += z2 + z4;
1000                     tmp2 += z2 + z3;
1001                     tmp3 += z1 + z4;
1002                 }
1003                 else
1004                 {
1005             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1006                     z2 = d5 + d3;
1007                     z3 = d7 + d3;
1008                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1009
1010                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1011                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1012                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1013                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1014                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1015                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1016                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1017
1018                     z3 += z5;
1019                     z4 += z5;
1020
1021                     tmp0 += z1 + z3;
1022                     tmp1 += z2 + z4;
1023                     tmp2 += z2 + z3;
1024                     tmp3 = z1 + z4;
1025                 }
1026             }
1027             else
1028             {
1029                 if (d1)
1030                 {
1031             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1032                     z1 = d7 + d1;
1033                     z4 = d5 + d1;
1034                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1035
1036                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1037                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1038                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1039                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1040                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1041                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1042                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1043
1044                     z3 += z5;
1045                     z4 += z5;
1046
1047                     tmp0 += z1 + z3;
1048                     tmp1 += z2 + z4;
1049                     tmp2 = z2 + z3;
1050                     tmp3 += z1 + z4;
1051                 }
1052                 else
1053                 {
1054             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1055                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1056
1057                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1058                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1059                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1060                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1061                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1062                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1063
1064                     z3 += z5;
1065                     z4 += z5;
1066
1067                     tmp0 += z3;
1068                     tmp1 += z4;
1069                     tmp2 = z2 + z3;
1070                     tmp3 = z1 + z4;
1071                 }
1072             }
1073         }
1074         else
1075         {
1076             if (d3)
1077             {
1078                 if (d1)
1079                 {
1080             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1081                     z1 = d7 + d1;
1082                     z3 = d7 + d3;
1083                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1084
1085                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1086                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1087                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1088                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1089                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1090                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1091                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1092
1093                     z3 += z5;
1094                     z4 += z5;
1095
1096                     tmp0 += z1 + z3;
1097                     tmp1 = z2 + z4;
1098                     tmp2 += z2 + z3;
1099                     tmp3 += z1 + z4;
1100                 }
1101                 else
1102                 {
1103             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1104                     z3 = d7 + d3;
1105                     z5 = MULTIPLY(z3, FIX(1.175875602));
1106
1107                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1108                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1109                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1110                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1111                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1112
1113                     tmp0 += z3;
1114                     tmp1 = z2 + z5;
1115                     tmp2 += z3;
1116                     tmp3 = z1 + z5;
1117                 }
1118             }
1119             else
1120             {
1121                 if (d1)
1122                 {
1123             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1124                     z1 = d7 + d1;
1125                     z5 = MULTIPLY(z1, FIX(1.175875602));
1126
1127                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1128                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1129                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1130                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1131                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1132
1133                     tmp0 += z1;
1134                     tmp1 = z4 + z5;
1135                     tmp2 = z3 + z5;
1136                     tmp3 += z1;
1137                 }
1138                 else
1139                 {
1140             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1141                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1142                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1143                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1144                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1145                 }
1146             }
1147         }
1148     }
1149     else
1150     {
1151         if (d5)
1152         {
1153             if (d3)
1154             {
1155                 if (d1)
1156                 {
1157             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1158                     z2 = d5 + d3;
1159                     z4 = d5 + d1;
1160                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1161
1162                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1163                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1164                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1165                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1166                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1167                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1168                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1169
1170                     z3 += z5;
1171                     z4 += z5;
1172
1173                     tmp0 = z1 + z3;
1174                     tmp1 += z2 + z4;
1175                     tmp2 += z2 + z3;
1176                     tmp3 += z1 + z4;
1177                 }
1178                 else
1179                 {
1180             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1181                     z2 = d5 + d3;
1182                     z5 = MULTIPLY(z2, FIX(1.175875602));
1183
1184                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1185                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1186                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1187                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1188                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1189
1190                     tmp0 = z3 + z5;
1191                     tmp1 += z2;
1192                     tmp2 += z2;
1193                     tmp3 = z4 + z5;
1194                 }
1195             }
1196             else
1197             {
1198                 if (d1)
1199                 {
1200             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1201                     z4 = d5 + d1;
1202                     z5 = MULTIPLY(z4, FIX(1.175875602));
1203
1204                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1205                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1206                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1207                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1208                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1209
1210                     tmp0 = z1 + z5;
1211                     tmp1 += z4;
1212                     tmp2 = z2 + z5;
1213                     tmp3 += z4;
1214                 }
1215                 else
1216                 {
1217             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1218                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1219                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1220                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1221                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1222                 }
1223             }
1224         }
1225         else
1226         {
1227             if (d3)
1228             {
1229                 if (d1)
1230                 {
1231             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1232                     z5 = d3 + d1;
1233
1234                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1235                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1236                     z1 = MULTIPLY(d1, FIX(1.061594337));
1237                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1238                     z4 = MULTIPLY(z5, FIX(0.785694958));
1239                     z5 = MULTIPLY(z5, FIX(1.175875602));
1240
1241                     tmp0 = z1 - z4;
1242                     tmp1 = z2 + z4;
1243                     tmp2 += z5;
1244                     tmp3 += z5;
1245                 }
1246                 else
1247                 {
1248             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1249                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1250                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1251                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1252                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1253                 }
1254             }
1255             else
1256             {
1257                 if (d1)
1258                 {
1259             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1260                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1261                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1262                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1263                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1264                 }
1265                 else
1266                 {
1267             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1268                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1269                 }
1270             }
1271         }
1272     }
1273
1274     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1275
1276     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1277                        CONST_BITS+PASS1_BITS+3);
1278     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1279                        CONST_BITS+PASS1_BITS+3);
1280     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1281                        CONST_BITS+PASS1_BITS+3);
1282     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1283                        CONST_BITS+PASS1_BITS+3);
1284     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1285                        CONST_BITS+PASS1_BITS+3);
1286     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1287                        CONST_BITS+PASS1_BITS+3);
1288     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1289                        CONST_BITS+PASS1_BITS+3);
1290     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1291                        CONST_BITS+PASS1_BITS+3);
1292
1293     dataptr++;             /* advance pointer to next column */
1294     }
1295 }
1296