]> git.sesse.net Git - vlc/blob - src/video_decoder/vdec_idct.c
D�but du portage BeOS. Beaucoup de fuchiers ont �t� modifi� car il a fallu
[vlc] / src / video_decoder / vdec_idct.c
1 /*****************************************************************************
2  * vdec_idct.c : IDCT functions
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  *
6  * Authors:
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public
19  * License along with this program; if not, write to the
20  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <sys/types.h>                        /* on BSD, uio.h needs types.h */
28 #include <sys/uio.h>                                          /* for input.h */
29
30 #include "threads.h"
31 #include "config.h"
32 #include "common.h"
33 #include "mtime.h"
34 #include "plugins.h"
35
36 #include "intf_msg.h"
37
38 #include "input.h"
39 #include "decoder_fifo.h"
40 #include "video.h"
41 #include "video_output.h"
42
43 #include "vdec_idct.h"
44 #include "video_decoder.h"
45 #include "vdec_motion.h"
46
47 #include "vpar_blocks.h"
48 #include "vpar_headers.h"
49 #include "vpar_synchro.h"
50 #include "video_parser.h"
51 #include "video_fifo.h"
52
53 /*
54  * Local prototypes
55  */
56
57 /* Our current implementation is a fast DCT, we might move to a fast DFT or
58  * an MMX DCT in the future. */
59
60 /*****************************************************************************
61  * vdec_InitIDCT : initialize datas for vdec_SparceIDCT
62  * vdec_SparseIDCT : IDCT function for sparse matrices
63  *****************************************************************************/
64
65 void vdec_InitIDCT (vdec_thread_t * p_vdec)
66 {
67     int i;
68
69     dctelem_t * p_pre = p_vdec->p_pre_idct;
70     memset( p_pre, 0, 64*64*sizeof(dctelem_t) );
71
72     for( i=0 ; i < 64 ; i++ )
73     {
74         p_pre[i*64+i] = 1 << SPARSE_SCALE_FACTOR;
75         vdec_IDCT( p_vdec, &p_pre[i*64], 0) ;
76     }
77     return;
78 }
79
80 void vdec_SparseIDCT (vdec_thread_t * p_vdec, dctelem_t * p_block,
81                       int i_sparse_pos)
82 {
83     short int val;
84     int * dp;
85     int v;
86     short int * p_dest;
87     short int * p_source;
88     int coeff, rr;
89
90     /* If DC Coefficient. */
91     if ( i_sparse_pos == 0 )
92     {
93         dp=(int *)p_block;
94         val=RIGHT_SHIFT((*p_block + 4), 3);
95         /* Compute int to assign.  This speeds things up a bit */
96         v = ((val & 0xffff) | (val << 16));
97         dp[0] = v;     dp[1] = v;     dp[2] = v;     dp[3] = v;
98         dp[4] = v;     dp[5] = v;     dp[6] = v;     dp[7] = v;
99         dp[8] = v;     dp[9] = v;     dp[10] = v;    dp[11] = v;
100         dp[12] = v;    dp[13] = v;    dp[14] = v;    dp[15] = v;
101         dp[16] = v;    dp[17] = v;    dp[18] = v;    dp[19] = v;
102         dp[20] = v;    dp[21] = v;    dp[22] = v;    dp[23] = v;
103         dp[24] = v;    dp[25] = v;    dp[26] = v;    dp[27] = v;
104         dp[28] = v;    dp[29] = v;    dp[30] = v;    dp[31] = v;
105         return;
106     }
107     /* Some other coefficient. */
108     p_dest = (s16*)p_block;
109     p_source = (s16*)&p_vdec->p_pre_idct[i_sparse_pos];
110     coeff = (int)p_dest[i_sparse_pos];
111     for( rr=0 ; rr < 4 ; rr++ )
112     {
113         p_dest[0] = (p_source[0] * coeff) >> SPARSE_SCALE_FACTOR;
114         p_dest[1] = (p_source[1] * coeff) >> SPARSE_SCALE_FACTOR;
115         p_dest[2] = (p_source[2] * coeff) >> SPARSE_SCALE_FACTOR;
116         p_dest[3] = (p_source[3] * coeff) >> SPARSE_SCALE_FACTOR;
117         p_dest[4] = (p_source[4] * coeff) >> SPARSE_SCALE_FACTOR;
118         p_dest[5] = (p_source[5] * coeff) >> SPARSE_SCALE_FACTOR;
119         p_dest[6] = (p_source[6] * coeff) >> SPARSE_SCALE_FACTOR;
120         p_dest[7] = (p_source[7] * coeff) >> SPARSE_SCALE_FACTOR;
121         p_dest[8] = (p_source[8] * coeff) >> SPARSE_SCALE_FACTOR;
122         p_dest[9] = (p_source[9] * coeff) >> SPARSE_SCALE_FACTOR;
123         p_dest[10] = (p_source[10] * coeff) >> SPARSE_SCALE_FACTOR;
124         p_dest[11] = (p_source[11] * coeff) >> SPARSE_SCALE_FACTOR;
125         p_dest[12] = (p_source[12] * coeff) >> SPARSE_SCALE_FACTOR;
126         p_dest[13] = (p_source[13] * coeff) >> SPARSE_SCALE_FACTOR;
127         p_dest[14] = (p_source[14] * coeff) >> SPARSE_SCALE_FACTOR;
128         p_dest[15] = (p_source[15] * coeff) >> SPARSE_SCALE_FACTOR;
129         p_dest += 16;
130         p_source += 16;
131     }
132     return;
133 }
134
135
136 /*****************************************************************************
137  * vdec_IDCT : IDCT function for normal matrices
138  *****************************************************************************/
139
140 #ifndef HAVE_MMX
141 void vdec_IDCT( vdec_thread_t * p_vdec, dctelem_t * p_block, int i_idontcare )
142 {
143 #if 0
144     /* dct classique: pour tester la meilleure entre la classique et la */
145     /* no classique */
146     s32 tmp0, tmp1, tmp2, tmp3;
147     s32 tmp10, tmp11, tmp12, tmp13;
148     s32 z1, z2, z3, z4, z5;
149     dctelem_t * dataptr;
150     int rowctr;
151     SHIFT_TEMPS
152
153   /* Pass 1: process rows. */
154   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
155   /* furthermore, we scale the results by 2**PASS1_BITS. */
156
157     dataptr = p_block;
158     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
159     {
160     /* Due to quantization, we will usually find that many of the input
161      * coefficients are zero, especially the AC terms.  We can exploit this
162      * by short-circuiting the IDCT calculation for any row in which all
163      * the AC terms are zero.  In that case each output is equal to the
164      * DC coefficient (with scale factor as needed).
165      * With typical images and quantization tables, half or more of the
166      * row DCT calculations can be simplified this way.
167      */
168
169         if ((dataptr[1] | dataptr[2] | dataptr[3] | dataptr[4] |
170                 dataptr[5] | dataptr[6] | dataptr[7]) == 0)
171         {
172       /* AC terms all zero */
173             dctelem_t dcval = (dctelem_t) (dataptr[0] << PASS1_BITS);
174
175             dataptr[0] = dcval;
176             dataptr[1] = dcval;
177             dataptr[2] = dcval;
178             dataptr[3] = dcval;
179             dataptr[4] = dcval;
180             dataptr[5] = dcval;
181             dataptr[6] = dcval;
182             dataptr[7] = dcval;
183
184             dataptr += DCTSIZE; /* advance pointer to next row */
185             continue;
186         }
187
188     /* Even part: reverse the even part of the forward DCT. */
189     /* The rotator is sqrt(2)*c(-6). */
190
191         z2 = (s32) dataptr[2];
192         z3 = (s32) dataptr[6];
193
194         z1 = MULTIPLY(z2 + z3, FIX(0.541196100));
195         tmp2 = z1 + MULTIPLY(z3, - FIX(1.847759065));
196         tmp3 = z1 + MULTIPLY(z2, FIX(0.765366865));
197
198         tmp0 = ((s32) dataptr[0] + (s32) dataptr[4]) << CONST_BITS;
199         tmp1 = ((s32) dataptr[0] - (s32) dataptr[4]) << CONST_BITS;
200
201         tmp10 = tmp0 + tmp3;
202         tmp13 = tmp0 - tmp3;
203         tmp11 = tmp1 + tmp2;
204         tmp12 = tmp1 - tmp2;
205
206     /* Odd part per figure 8; the matrix is unitary and hence its
207      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
208      */
209
210         tmp0 = (s32) dataptr[7];
211         tmp1 = (s32) dataptr[5];
212         tmp2 = (s32) dataptr[3];
213         tmp3 = (s32) dataptr[1];
214
215         z1 = tmp0 + tmp3;
216         z2 = tmp1 + tmp2;
217         z3 = tmp0 + tmp2;
218         z4 = tmp1 + tmp3;
219         z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */
220
221         tmp0 = MULTIPLY(tmp0, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */
222         tmp1 = MULTIPLY(tmp1, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */
223         tmp2 = MULTIPLY(tmp2, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */
224         tmp3 = MULTIPLY(tmp3, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */
225         z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */
226         z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */
227         z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */
228         z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */
229
230         z3 += z5;
231         z4 += z5;
232
233         tmp0 += z1 + z3;
234         tmp1 += z2 + z4;
235         tmp2 += z2 + z3;
236         tmp3 += z1 + z4;
237
238     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
239
240         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
241         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
242         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
243         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
244         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
245         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
246         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
247         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
248
249         dataptr += DCTSIZE;             /* advance pointer to next row */
250     }
251
252   /* Pass 2: process columns. */
253   /* Note that we must descale the results by a factor of 8 == 2**3, */
254   /* and also undo the PASS1_BITS scaling. */
255
256     dataptr = p_block;
257     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
258     {
259     /* Columns of zeroes can be exploited in the same way as we did with rows.
260      * However, the row calculation has created many nonzero AC terms, so the
261      * simplification applies less often (typically 5% to 10% of the time).
262      * On machines with very fast multiplication, it's possible that the
263      * test takes more time than it's worth.  In that case this section
264      * may be commented out.
265      */
266
267 #ifndef NO_ZERO_COLUMN_TEST /*ajoute un test mais evite des calculs */
268         if ((dataptr[DCTSIZE*1] | dataptr[DCTSIZE*2] | dataptr[DCTSIZE*3] |
269             dataptr[DCTSIZE*4] | dataptr[DCTSIZE*5] | dataptr[DCTSIZE*6] |
270             dataptr[DCTSIZE*7]) == 0)
271         {
272       /* AC terms all zero */
273             dctelem_t dcval = (dctelem_t) DESCALE((s32) dataptr[0], PASS1_BITS+3);
274
275             dataptr[DCTSIZE*0] = dcval;
276             dataptr[DCTSIZE*1] = dcval;
277             dataptr[DCTSIZE*2] = dcval;
278             dataptr[DCTSIZE*3] = dcval;
279             dataptr[DCTSIZE*4] = dcval;
280             dataptr[DCTSIZE*5] = dcval;
281             dataptr[DCTSIZE*6] = dcval;
282             dataptr[DCTSIZE*7] = dcval;
283
284             dataptr++;          /* advance pointer to next column */
285             continue;
286         }
287 #endif
288
289     /* Even part: reverse the even part of the forward DCT. */
290     /* The rotator is sqrt(2)*c(-6). */
291
292         z2 = (s32) dataptr[DCTSIZE*2];
293         z3 = (s32) dataptr[DCTSIZE*6];
294
295         z1 = MULTIPLY(z2 + z3, FIX(0.541196100));
296         tmp2 = z1 + MULTIPLY(z3, - FIX(1.847759065));
297         tmp3 = z1 + MULTIPLY(z2, FIX(0.765366865));
298
299         tmp0 = ((s32) dataptr[DCTSIZE*0] + (s32) dataptr[DCTSIZE*4]) << CONST_BITS;
300         tmp1 = ((s32) dataptr[DCTSIZE*0] - (s32) dataptr[DCTSIZE*4]) << CONST_BITS;
301
302         tmp10 = tmp0 + tmp3;
303         tmp13 = tmp0 - tmp3;
304         tmp11 = tmp1 + tmp2;
305         tmp12 = tmp1 - tmp2;
306
307     /* Odd part per figure 8; the matrix is unitary and hence its
308      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
309      */
310
311         tmp0 = (s32) dataptr[DCTSIZE*7];
312         tmp1 = (s32) dataptr[DCTSIZE*5];
313         tmp2 = (s32) dataptr[DCTSIZE*3];
314         tmp3 = (s32) dataptr[DCTSIZE*1];
315
316         z1 = tmp0 + tmp3;
317         z2 = tmp1 + tmp2;
318         z3 = tmp0 + tmp2;
319         z4 = tmp1 + tmp3;
320         z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */
321
322         tmp0 = MULTIPLY(tmp0, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */
323         tmp1 = MULTIPLY(tmp1, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */
324         tmp2 = MULTIPLY(tmp2, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */
325         tmp3 = MULTIPLY(tmp3, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */
326         z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */
327         z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */
328         z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */
329         z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */
330
331         z3 += z5;
332         z4 += z5;
333
334         tmp0 += z1 + z3;
335         tmp1 += z2 + z4;
336         tmp2 += z2 + z3;
337         tmp3 += z1 + z4;
338
339     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
340
341         dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
342                                            CONST_BITS+PASS1_BITS+3);
343         dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
344                                            CONST_BITS+PASS1_BITS+3);
345         dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
346                                            CONST_BITS+PASS1_BITS+3);
347         dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
348                                            CONST_BITS+PASS1_BITS+3);
349         dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
350                                            CONST_BITS+PASS1_BITS+3);
351         dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
352                                            CONST_BITS+PASS1_BITS+3);
353         dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
354                                            CONST_BITS+PASS1_BITS+3);
355         dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
356                                            CONST_BITS+PASS1_BITS+3);
357
358         dataptr++;                      /* advance pointer to next column */
359     }
360 #endif
361
362 #if 1  /*dct avec non classique*/
363
364     s32 tmp0, tmp1, tmp2, tmp3;
365     s32 tmp10, tmp11, tmp12, tmp13;
366     s32 z1, z2, z3, z4, z5;
367     s32 d0, d1, d2, d3, d4, d5, d6, d7;
368     dctelem_t * dataptr;
369     int rowctr;
370
371     SHIFT_TEMPS
372
373     /* Pass 1: process rows. */
374     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
375     /* furthermore, we scale the results by 2**PASS1_BITS. */
376
377     dataptr = p_block;
378
379     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
380     {
381         /* Due to quantization, we will usually find that many of the input
382          * coefficients are zero, especially the AC terms.  We can exploit this
383          * by short-circuiting the IDCT calculation for any row in which all
384          * the AC terms are zero.  In that case each output is equal to the
385          * DC coefficient (with scale factor as needed).
386          * With typical images and quantization tables, half or more of the
387          * row DCT calculations can be simplified this way.
388          */
389
390         register int * idataptr = (int*)dataptr;
391         d0 = dataptr[0];
392         d1 = dataptr[1];
393         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
394         {
395       /* AC terms all zero */
396             if (d0)
397             {
398       /* Compute a 32 bit value to assign. */
399                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
400                 register int v = (dcval & 0xffff) | (dcval << 16);
401
402                 idataptr[0] = v;
403                 idataptr[1] = v;
404                 idataptr[2] = v;
405                 idataptr[3] = v;
406             }
407
408             dataptr += DCTSIZE; /* advance pointer to next row */
409             continue;
410         }
411         d2 = dataptr[2];
412         d3 = dataptr[3];
413         d4 = dataptr[4];
414         d5 = dataptr[5];
415         d6 = dataptr[6];
416         d7 = dataptr[7];
417
418     /* Even part: reverse the even part of the forward DCT. */
419     /* The rotator is sqrt(2)*c(-6). */
420         if (d6)
421         {
422             if (d4)
423             {
424                 if (d2)
425                 {
426                     if (d0)
427                     {
428             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
429                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
430                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
431                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
432
433                         tmp0 = (d0 + d4) << CONST_BITS;
434                         tmp1 = (d0 - d4) << CONST_BITS;
435
436                         tmp10 = tmp0 + tmp3;
437                         tmp13 = tmp0 - tmp3;
438                         tmp11 = tmp1 + tmp2;
439                         tmp12 = tmp1 - tmp2;
440                     }
441                     else
442                     {
443                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
444                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
445                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
446                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
447
448                         tmp0 = d4 << CONST_BITS;
449
450                         tmp10 = tmp0 + tmp3;
451                         tmp13 = tmp0 - tmp3;
452                         tmp11 = tmp2 - tmp0;
453                         tmp12 = -(tmp0 + tmp2);
454                         }
455                 }
456                 else
457                 {
458                     if (d0)
459                     {
460             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
461                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
462                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
463
464                         tmp0 = (d0 + d4) << CONST_BITS;
465                         tmp1 = (d0 - d4) << CONST_BITS;
466
467                         tmp10 = tmp0 + tmp3;
468                         tmp13 = tmp0 - tmp3;
469                         tmp11 = tmp1 + tmp2;
470                         tmp12 = tmp1 - tmp2;
471                         }
472                     else
473                     {
474                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
475                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
476                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
477
478                         tmp0 = d4 << CONST_BITS;
479
480                         tmp10 = tmp0 + tmp3;
481                         tmp13 = tmp0 - tmp3;
482                         tmp11 = tmp2 - tmp0;
483                         tmp12 = -(tmp0 + tmp2);
484                         }
485                 }
486             }
487             else
488             {
489                 if (d2)
490                 {
491                     if (d0)
492                     {
493             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
494                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
495                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
496                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
497
498                         tmp0 = d0 << CONST_BITS;
499
500                         tmp10 = tmp0 + tmp3;
501                         tmp13 = tmp0 - tmp3;
502                         tmp11 = tmp0 + tmp2;
503                         tmp12 = tmp0 - tmp2;
504                     }
505                     else
506                     {
507                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
508                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
509                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
510                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
511
512                         tmp10 = tmp3;
513                         tmp13 = -tmp3;
514                         tmp11 = tmp2;
515                         tmp12 = -tmp2;
516                             }
517                 }
518                 else
519                 {
520                     if (d0)
521                     {
522             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
523                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
524                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
525
526                         tmp0 = d0 << CONST_BITS;
527
528                         tmp10 = tmp0 + tmp3;
529                         tmp13 = tmp0 - tmp3;
530                         tmp11 = tmp0 + tmp2;
531                         tmp12 = tmp0 - tmp2;
532                     }
533                     else
534                     {
535             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
536                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
537                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
538
539                         tmp10 = tmp3;
540                         tmp13 = -tmp3;
541                         tmp11 = tmp2;
542                         tmp12 = -tmp2;
543                     }
544                 }
545             }
546         }
547         else
548         {
549             if (d4)
550             {
551                 if (d2)
552                 {
553                     if (d0)
554                     {
555                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
556                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
557                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
558
559                         tmp0 = (d0 + d4) << CONST_BITS;
560                         tmp1 = (d0 - d4) << CONST_BITS;
561
562                         tmp10 = tmp0 + tmp3;
563                         tmp13 = tmp0 - tmp3;
564                         tmp11 = tmp1 + tmp2;
565                         tmp12 = tmp1 - tmp2;
566                     }
567                     else
568                     {
569             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
570                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
571                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
572
573                         tmp0 = d4 << CONST_BITS;
574
575                         tmp10 = tmp0 + tmp3;
576                         tmp13 = tmp0 - tmp3;
577                         tmp11 = tmp2 - tmp0;
578                         tmp12 = -(tmp0 + tmp2);
579                     }
580                 }
581                 else
582                 {
583                     if (d0)
584                     {
585             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
586                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
587                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
588                     }
589                     else
590                     {
591             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
592                         tmp10 = tmp13 = d4 << CONST_BITS;
593                         tmp11 = tmp12 = -tmp10;
594                     }
595                 }
596             }
597             else
598             {
599                 if (d2)
600                 {
601                     if (d0)
602                     {
603             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
604                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
605                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
606
607                         tmp0 = d0 << CONST_BITS;
608
609                         tmp10 = tmp0 + tmp3;
610                         tmp13 = tmp0 - tmp3;
611                         tmp11 = tmp0 + tmp2;
612                         tmp12 = tmp0 - tmp2;
613                     }
614                     else
615                     {
616             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
617                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
618                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
619
620                         tmp10 = tmp3;
621                         tmp13 = -tmp3;
622                         tmp11 = tmp2;
623                         tmp12 = -tmp2;
624                     }
625                 }
626                 else
627                 {
628                     if (d0)
629                     {
630             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
631                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
632                     }
633                     else
634                     {
635             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
636                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
637                     }
638                 }
639             }
640         }
641
642
643     /* Odd part per figure 8; the matrix is unitary and hence its
644      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
645      */
646
647         if (d7)
648             {
649             if (d5)
650             {
651                 if (d3)
652                 {
653                     if (d1)
654                     {
655             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
656                         z1 = d7 + d1;
657                         z2 = d5 + d3;
658                         z3 = d7 + d3;
659                         z4 = d5 + d1;
660                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
661
662                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
663                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
664                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
665                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
666                         z1 = MULTIPLY(z1, - FIX(0.899976223));
667                         z2 = MULTIPLY(z2, - FIX(2.562915447));
668                         z3 = MULTIPLY(z3, - FIX(1.961570560));
669                         z4 = MULTIPLY(z4, - FIX(0.390180644));
670
671                         z3 += z5;
672                         z4 += z5;
673
674                         tmp0 += z1 + z3;
675                         tmp1 += z2 + z4;
676                         tmp2 += z2 + z3;
677                         tmp3 += z1 + z4;
678                     }
679                     else
680                     {
681             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
682                         z2 = d5 + d3;
683                         z3 = d7 + d3;
684                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
685
686                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
687                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
688                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
689                         z1 = MULTIPLY(d7, - FIX(0.899976223));
690                         z2 = MULTIPLY(z2, - FIX(2.562915447));
691                         z3 = MULTIPLY(z3, - FIX(1.961570560));
692                         z4 = MULTIPLY(d5, - FIX(0.390180644));
693
694                         z3 += z5;
695                         z4 += z5;
696
697                         tmp0 += z1 + z3;
698                         tmp1 += z2 + z4;
699                         tmp2 += z2 + z3;
700                         tmp3 = z1 + z4;
701                         }
702                     }
703                 else
704                 {
705                     if (d1)
706                     {
707             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
708                         z1 = d7 + d1;
709                         z4 = d5 + d1;
710                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
711
712                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
713                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
714                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
715                         z1 = MULTIPLY(z1, - FIX(0.899976223));
716                         z2 = MULTIPLY(d5, - FIX(2.562915447));
717                         z3 = MULTIPLY(d7, - FIX(1.961570560));
718                         z4 = MULTIPLY(z4, - FIX(0.390180644));
719
720                         z3 += z5;
721                         z4 += z5;
722
723                         tmp0 += z1 + z3;
724                         tmp1 += z2 + z4;
725                         tmp2 = z2 + z3;
726                         tmp3 += z1 + z4;
727                     }
728                     else
729                     {
730             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
731                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
732
733                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
734                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
735                         z1 = MULTIPLY(d7, - FIX(0.899976223));
736                         z3 = MULTIPLY(d7, - FIX(1.961570560));
737                         z2 = MULTIPLY(d5, - FIX(2.562915447));
738                         z4 = MULTIPLY(d5, - FIX(0.390180644));
739
740                         z3 += z5;
741                         z4 += z5;
742
743                         tmp0 += z3;
744                         tmp1 += z4;
745                         tmp2 = z2 + z3;
746                         tmp3 = z1 + z4;
747                     }
748                 }
749             }
750             else
751             {
752                 if (d3)
753                 {
754                     if (d1)
755                     {
756             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
757                         z1 = d7 + d1;
758                         z3 = d7 + d3;
759                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
760
761                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
762                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
763                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
764                         z1 = MULTIPLY(z1, - FIX(0.899976223));
765                         z2 = MULTIPLY(d3, - FIX(2.562915447));
766                         z3 = MULTIPLY(z3, - FIX(1.961570560));
767                         z4 = MULTIPLY(d1, - FIX(0.390180644));
768
769                         z3 += z5;
770                         z4 += z5;
771
772                         tmp0 += z1 + z3;
773                         tmp1 = z2 + z4;
774                         tmp2 += z2 + z3;
775                         tmp3 += z1 + z4;
776                     }
777                     else
778                     {
779             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
780                         z3 = d7 + d3;
781                         z5 = MULTIPLY(z3, FIX(1.175875602));
782
783                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
784                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
785                         z1 = MULTIPLY(d7, - FIX(0.899976223));
786                         z2 = MULTIPLY(d3, - FIX(2.562915447));
787                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
788
789                         tmp0 += z3;
790                         tmp1 = z2 + z5;
791                         tmp2 += z3;
792                         tmp3 = z1 + z5;
793                     }
794                 }
795                 else
796                 {
797                     if (d1)
798                     {
799             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
800                         z1 = d7 + d1;
801                         z5 = MULTIPLY(z1, FIX(1.175875602));
802
803                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
804                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
805                         z1 = MULTIPLY(z1, FIX2(0.275899379));
806                         z3 = MULTIPLY(d7, - FIX(1.961570560));
807                         z4 = MULTIPLY(d1, - FIX(0.390180644));
808
809                         tmp0 += z1;
810                         tmp1 = z4 + z5;
811                         tmp2 = z3 + z5;
812                         tmp3 += z1;
813                     }
814                 else
815                     {
816             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
817                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
818                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
819                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
820                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
821                     }
822                 }
823             }
824         }
825         else
826         {
827             if (d5)
828             {
829                 if (d3)
830                 {
831                     if (d1)
832                     {
833             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
834                         z2 = d5 + d3;
835                         z4 = d5 + d1;
836                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
837
838                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
839                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
840                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
841                         z1 = MULTIPLY(d1, - FIX(0.899976223));
842                         z2 = MULTIPLY(z2, - FIX(2.562915447));
843                         z3 = MULTIPLY(d3, - FIX(1.961570560));
844                         z4 = MULTIPLY(z4, - FIX(0.390180644));
845
846                         z3 += z5;
847                         z4 += z5;
848
849                         tmp0 = z1 + z3;
850                         tmp1 += z2 + z4;
851                         tmp2 += z2 + z3;
852                         tmp3 += z1 + z4;
853                     }
854                     else
855                     {
856             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
857                         z2 = d5 + d3;
858                         z5 = MULTIPLY(z2, FIX(1.175875602));
859
860                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
861                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
862                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
863                         z3 = MULTIPLY(d3, - FIX(1.961570560));
864                         z4 = MULTIPLY(d5, - FIX(0.390180644));
865
866                         tmp0 = z3 + z5;
867                         tmp1 += z2;
868                         tmp2 += z2;
869                         tmp3 = z4 + z5;
870                     }
871                 }
872                 else
873                 {
874                     if (d1)
875                     {
876             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
877                         z4 = d5 + d1;
878                         z5 = MULTIPLY(z4, FIX(1.175875602));
879
880                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
881                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
882                         z1 = MULTIPLY(d1, - FIX(0.899976223));
883                         z2 = MULTIPLY(d5, - FIX(2.562915447));
884                         z4 = MULTIPLY(z4, FIX2(0.785694958));
885
886                         tmp0 = z1 + z5;
887                         tmp1 += z4;
888                         tmp2 = z2 + z5;
889                         tmp3 += z4;
890                     }
891                     else
892                     {
893             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
894                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
895                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
896                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
897                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
898                     }
899                 }
900             }
901             else
902             {
903                 if (d3)
904                 {
905                     if (d1)
906                     {
907             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
908                         z5 = d3 + d1;
909
910                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
911                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
912                         z1 = MULTIPLY(d1, FIX(1.061594337));
913                         z2 = MULTIPLY(d3, - FIX(2.172734803));
914                         z4 = MULTIPLY(z5, FIX(0.785694958));
915                         z5 = MULTIPLY(z5, FIX(1.175875602));
916
917                         tmp0 = z1 - z4;
918                         tmp1 = z2 + z4;
919                         tmp2 += z5;
920                         tmp3 += z5;
921                     }
922                     else
923                     {
924             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
925                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
926                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
927                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
928                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
929                     }
930                 }
931                 else
932                 {
933                     if (d1)
934                     {
935             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
936                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
937                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
938                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
939                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
940                     }
941                     else
942                     {
943             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
944                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
945                     }
946                 }
947             }
948         }
949
950     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
951
952         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
953         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
954         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
955         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
956         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
957         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
958         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
959         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
960
961         dataptr += DCTSIZE;              /* advance pointer to next row */
962     }
963
964   /* Pass 2: process columns. */
965   /* Note that we must descale the results by a factor of 8 == 2**3, */
966   /* and also undo the PASS1_BITS scaling. */
967
968     dataptr = p_block;
969     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
970     {
971     /* Columns of zeroes can be exploited in the same way as we did with rows.
972      * However, the row calculation has created many nonzero AC terms, so the
973      * simplification applies less often (typically 5% to 10% of the time).
974      * On machines with very fast multiplication, it's possible that the
975      * test takes more time than it's worth.  In that case this section
976      * may be commented out.
977      */
978
979         d0 = dataptr[DCTSIZE*0];
980         d1 = dataptr[DCTSIZE*1];
981         d2 = dataptr[DCTSIZE*2];
982         d3 = dataptr[DCTSIZE*3];
983         d4 = dataptr[DCTSIZE*4];
984         d5 = dataptr[DCTSIZE*5];
985         d6 = dataptr[DCTSIZE*6];
986         d7 = dataptr[DCTSIZE*7];
987
988     /* Even part: reverse the even part of the forward DCT. */
989     /* The rotator is sqrt(2)*c(-6). */
990         if (d6)
991         {
992             if (d4)
993             {
994                 if (d2)
995                 {
996                     if (d0)
997                     {
998             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
999                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1000                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1001                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1002
1003                         tmp0 = (d0 + d4) << CONST_BITS;
1004                         tmp1 = (d0 - d4) << CONST_BITS;
1005
1006                         tmp10 = tmp0 + tmp3;
1007                         tmp13 = tmp0 - tmp3;
1008                         tmp11 = tmp1 + tmp2;
1009                         tmp12 = tmp1 - tmp2;
1010                     }
1011                     else
1012                     {
1013             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
1014                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1015                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1016                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1017
1018                         tmp0 = d4 << CONST_BITS;
1019
1020                         tmp10 = tmp0 + tmp3;
1021                         tmp13 = tmp0 - tmp3;
1022                         tmp11 = tmp2 - tmp0;
1023                         tmp12 = -(tmp0 + tmp2);
1024                     }
1025                 }
1026                 else
1027                 {
1028                     if (d0)
1029                     {
1030             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1031                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
1032                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
1033
1034                         tmp0 = (d0 + d4) << CONST_BITS;
1035                         tmp1 = (d0 - d4) << CONST_BITS;
1036
1037                         tmp10 = tmp0 + tmp3;
1038                         tmp13 = tmp0 - tmp3;
1039                         tmp11 = tmp1 + tmp2;
1040                         tmp12 = tmp1 - tmp2;
1041                     }
1042                     else
1043                     {
1044             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
1045                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
1046                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
1047
1048                         tmp0 = d4 << CONST_BITS;
1049
1050                         tmp10 = tmp0 + tmp3;
1051                         tmp13 = tmp0 - tmp3;
1052                         tmp11 = tmp2 - tmp0;
1053                         tmp12 = -(tmp0 + tmp2);
1054                     }
1055                 }
1056             }
1057             else
1058             {
1059                 if (d2)
1060                 {
1061                     if (d0)
1062                     {
1063             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
1064                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1065                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1066                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1067
1068                         tmp0 = d0 << CONST_BITS;
1069
1070                         tmp10 = tmp0 + tmp3;
1071                         tmp13 = tmp0 - tmp3;
1072                         tmp11 = tmp0 + tmp2;
1073                         tmp12 = tmp0 - tmp2;
1074                     }
1075                     else
1076                     {
1077             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
1078                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1079                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1080                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1081
1082                         tmp10 = tmp3;
1083                         tmp13 = -tmp3;
1084                         tmp11 = tmp2;
1085                         tmp12 = -tmp2;
1086                     }
1087                 }
1088                 else
1089                 {
1090                     if (d0)
1091                     {
1092             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
1093                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
1094                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
1095
1096                     tmp0 = d0 << CONST_BITS;
1097
1098                     tmp10 = tmp0 + tmp3;
1099                     tmp13 = tmp0 - tmp3;
1100                     tmp11 = tmp0 + tmp2;
1101                     tmp12 = tmp0 - tmp2;
1102                 }
1103                 else
1104                 {
1105             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
1106                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
1107                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
1108                     tmp10 = tmp3;
1109                     tmp13 = -tmp3;
1110                     tmp11 = tmp2;
1111                     tmp12 = -tmp2;
1112                 }
1113             }
1114         }
1115     }
1116     else
1117     {
1118         if (d4)
1119         {
1120             if (d2)
1121             {
1122                 if (d0)
1123                 {
1124             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1125                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1126                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1127
1128                     tmp0 = (d0 + d4) << CONST_BITS;
1129                     tmp1 = (d0 - d4) << CONST_BITS;
1130
1131                     tmp10 = tmp0 + tmp3;
1132                     tmp13 = tmp0 - tmp3;
1133                     tmp11 = tmp1 + tmp2;
1134                     tmp12 = tmp1 - tmp2;
1135                 }
1136                 else
1137                 {
1138             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
1139                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1140                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1141
1142                     tmp0 = d4 << CONST_BITS;
1143
1144                     tmp10 = tmp0 + tmp3;
1145                     tmp13 = tmp0 - tmp3;
1146                     tmp11 = tmp2 - tmp0;
1147                     tmp12 = -(tmp0 + tmp2);
1148                 }
1149             }
1150             else
1151             {
1152                 if (d0)
1153                 {
1154             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1155                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1156                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1157                 }
1158                 else
1159                 {
1160             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
1161                     tmp10 = tmp13 = d4 << CONST_BITS;
1162                     tmp11 = tmp12 = -tmp10;
1163                 }
1164             }
1165         }
1166         else
1167         {
1168         if (d2)
1169         {
1170             if (d0)
1171             {
1172             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
1173                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1174                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1175
1176                     tmp0 = d0 << CONST_BITS;
1177
1178                     tmp10 = tmp0 + tmp3;
1179                     tmp13 = tmp0 - tmp3;
1180                     tmp11 = tmp0 + tmp2;
1181                     tmp12 = tmp0 - tmp2;
1182             }
1183             else
1184             {
1185             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1186                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1187                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1188
1189                     tmp10 = tmp3;
1190                     tmp13 = -tmp3;
1191                     tmp11 = tmp2;
1192                     tmp12 = -tmp2;
1193             }
1194         }
1195         else
1196         {
1197             if (d0)
1198                 {
1199             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1200                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1201                 }
1202                 else
1203                 {
1204             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1205                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
1206                 }
1207             }
1208         }
1209     }
1210
1211     /* Odd part per figure 8; the matrix is unitary and hence its
1212      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
1213      */
1214     if (d7)
1215     {
1216         if (d5)
1217         {
1218             if (d3)
1219             {
1220                 if (d1)
1221                 {
1222             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1223                     z1 = d7 + d1;
1224                     z2 = d5 + d3;
1225                     z3 = d7 + d3;
1226                     z4 = d5 + d1;
1227                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1228
1229                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1230                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1231                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1232                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1233                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1234                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1235                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1236                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1237
1238                     z3 += z5;
1239                     z4 += z5;
1240
1241                     tmp0 += z1 + z3;
1242                     tmp1 += z2 + z4;
1243                     tmp2 += z2 + z3;
1244                     tmp3 += z1 + z4;
1245                 }
1246                 else
1247                 {
1248             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1249                     z2 = d5 + d3;
1250                     z3 = d7 + d3;
1251                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1252
1253                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1254                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1255                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1256                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1257                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1258                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1259                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1260
1261                     z3 += z5;
1262                     z4 += z5;
1263
1264                     tmp0 += z1 + z3;
1265                     tmp1 += z2 + z4;
1266                     tmp2 += z2 + z3;
1267                     tmp3 = z1 + z4;
1268                 }
1269             }
1270             else
1271             {
1272                 if (d1)
1273                 {
1274             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1275                     z1 = d7 + d1;
1276                     z4 = d5 + d1;
1277                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1278
1279                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1280                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1281                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1282                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1283                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1284                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1285                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1286
1287                     z3 += z5;
1288                     z4 += z5;
1289
1290                     tmp0 += z1 + z3;
1291                     tmp1 += z2 + z4;
1292                     tmp2 = z2 + z3;
1293                     tmp3 += z1 + z4;
1294                 }
1295                 else
1296                 {
1297             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1298                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1299
1300                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1301                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1302                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1303                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1304                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1305                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1306
1307                     z3 += z5;
1308                     z4 += z5;
1309
1310                     tmp0 += z3;
1311                     tmp1 += z4;
1312                     tmp2 = z2 + z3;
1313                     tmp3 = z1 + z4;
1314                 }
1315             }
1316         }
1317         else
1318         {
1319             if (d3)
1320             {
1321                 if (d1)
1322                 {
1323             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1324                     z1 = d7 + d1;
1325                     z3 = d7 + d3;
1326                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1327
1328                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1329                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1330                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1331                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1332                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1333                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1334                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1335
1336                     z3 += z5;
1337                     z4 += z5;
1338
1339                     tmp0 += z1 + z3;
1340                     tmp1 = z2 + z4;
1341                     tmp2 += z2 + z3;
1342                     tmp3 += z1 + z4;
1343                 }
1344                 else
1345                 {
1346             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1347                     z3 = d7 + d3;
1348                     z5 = MULTIPLY(z3, FIX(1.175875602));
1349
1350                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1351                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1352                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1353                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1354                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1355
1356                     tmp0 += z3;
1357                     tmp1 = z2 + z5;
1358                     tmp2 += z3;
1359                     tmp3 = z1 + z5;
1360                 }
1361             }
1362             else
1363             {
1364                 if (d1)
1365                 {
1366             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1367                     z1 = d7 + d1;
1368                     z5 = MULTIPLY(z1, FIX(1.175875602));
1369
1370                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1371                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1372                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1373                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1374                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1375
1376                     tmp0 += z1;
1377                     tmp1 = z4 + z5;
1378                     tmp2 = z3 + z5;
1379                     tmp3 += z1;
1380                 }
1381                 else
1382                 {
1383             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1384                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1385                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1386                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1387                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1388                 }
1389             }
1390         }
1391     }
1392     else
1393     {
1394         if (d5)
1395         {
1396             if (d3)
1397             {
1398                 if (d1)
1399                 {
1400             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1401                     z2 = d5 + d3;
1402                     z4 = d5 + d1;
1403                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1404
1405                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1406                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1407                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1408                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1409                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1410                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1411                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1412
1413                     z3 += z5;
1414                     z4 += z5;
1415
1416                     tmp0 = z1 + z3;
1417                     tmp1 += z2 + z4;
1418                     tmp2 += z2 + z3;
1419                     tmp3 += z1 + z4;
1420                 }
1421                 else
1422                 {
1423             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1424                     z2 = d5 + d3;
1425                     z5 = MULTIPLY(z2, FIX(1.175875602));
1426
1427                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1428                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1429                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1430                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1431                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1432
1433                     tmp0 = z3 + z5;
1434                     tmp1 += z2;
1435                     tmp2 += z2;
1436                     tmp3 = z4 + z5;
1437                 }
1438             }
1439             else
1440             {
1441                 if (d1)
1442                 {
1443             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1444                     z4 = d5 + d1;
1445                     z5 = MULTIPLY(z4, FIX(1.175875602));
1446
1447                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1448                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1449                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1450                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1451                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1452
1453                     tmp0 = z1 + z5;
1454                     tmp1 += z4;
1455                     tmp2 = z2 + z5;
1456                     tmp3 += z4;
1457                 }
1458                 else
1459                 {
1460             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1461                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1462                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1463                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1464                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1465                 }
1466             }
1467         }
1468         else
1469         {
1470             if (d3)
1471             {
1472                 if (d1)
1473                 {
1474             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1475                     z5 = d3 + d1;
1476
1477                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1478                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1479                     z1 = MULTIPLY(d1, FIX(1.061594337));
1480                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1481                     z4 = MULTIPLY(z5, FIX(0.785694958));
1482                     z5 = MULTIPLY(z5, FIX(1.175875602));
1483
1484                     tmp0 = z1 - z4;
1485                     tmp1 = z2 + z4;
1486                     tmp2 += z5;
1487                     tmp3 += z5;
1488                 }
1489                 else
1490                 {
1491             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1492                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1493                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1494                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1495                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1496                 }
1497             }
1498             else
1499             {
1500                 if (d1)
1501                 {
1502             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1503                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1504                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1505                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1506                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1507                 }
1508                 else
1509                 {
1510             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1511                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1512                 }
1513             }
1514         }
1515     }
1516
1517     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1518
1519     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1520                        CONST_BITS+PASS1_BITS+3);
1521     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1522                        CONST_BITS+PASS1_BITS+3);
1523     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1524                        CONST_BITS+PASS1_BITS+3);
1525     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1526                        CONST_BITS+PASS1_BITS+3);
1527     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1528                        CONST_BITS+PASS1_BITS+3);
1529     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1530                        CONST_BITS+PASS1_BITS+3);
1531     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1532                        CONST_BITS+PASS1_BITS+3);
1533     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1534                        CONST_BITS+PASS1_BITS+3);
1535
1536     dataptr++;             /* advance pointer to next column */
1537     }
1538 #endif
1539 }
1540 #endif