]> git.sesse.net Git - vlc/blob - src/video_decoder/vdec_idct.c
. normalement on devrait se prendre 1 seul mail par commit gr�ce aux
[vlc] / src / video_decoder / vdec_idct.c
1 /*****************************************************************************
2  * vdec_idct.c : IDCT functions
3  *****************************************************************************
4  * Copyright (C) 1999, 2000 VideoLAN
5  *
6  * Authors:
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public
19  * License along with this program; if not, write to the
20  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21  * Boston, MA 02111-1307, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include "defs.h"
28
29 #include <sys/types.h>                        /* on BSD, uio.h needs types.h */
30 #include <sys/uio.h>                                          /* for input.h */
31
32 #include "config.h"
33 #include "common.h"
34 #include "threads.h"
35 #include "mtime.h"
36 #include "plugins.h"
37
38 #include "intf_msg.h"
39
40 #include "input.h"
41 #include "decoder_fifo.h"
42 #include "video.h"
43 #include "video_output.h"
44
45 #include "vdec_idct.h"
46 #include "video_decoder.h"
47 #include "vdec_motion.h"
48
49 #include "vpar_blocks.h"
50 #include "vpar_headers.h"
51 #include "vpar_synchro.h"
52 #include "video_parser.h"
53 #include "video_fifo.h"
54
55 /*
56  * Local prototypes
57  */
58
59 /* Our current implementation is a fast DCT, we might move to a fast DFT or
60  * an MMX DCT in the future. */
61
62 /*****************************************************************************
63  * vdec_InitIDCT : initialize datas for vdec_SparceIDCT
64  * vdec_SparseIDCT : IDCT function for sparse matrices
65  *****************************************************************************/
66
67 void vdec_InitIDCT (vdec_thread_t * p_vdec)
68 {
69     int i;
70
71     dctelem_t * p_pre = p_vdec->p_pre_idct;
72     memset( p_pre, 0, 64*64*sizeof(dctelem_t) );
73
74     for( i=0 ; i < 64 ; i++ )
75     {
76         p_pre[i*64+i] = 1 << SPARSE_SCALE_FACTOR;
77         vdec_IDCT( p_vdec, &p_pre[i*64], 0) ;
78     }
79     return;
80 }
81
82 void vdec_SparseIDCT (vdec_thread_t * p_vdec, dctelem_t * p_block,
83                       int i_sparse_pos)
84 {
85     short int val;
86     int * dp;
87     int v;
88     short int * p_dest;
89     short int * p_source;
90     int coeff, rr;
91
92     /* If DC Coefficient. */
93     if ( i_sparse_pos == 0 )
94     {
95         dp=(int *)p_block;
96         val=RIGHT_SHIFT((*p_block + 4), 3);
97         /* Compute int to assign.  This speeds things up a bit */
98         v = ((val & 0xffff) | (val << 16));
99         dp[0] = v;     dp[1] = v;     dp[2] = v;     dp[3] = v;
100         dp[4] = v;     dp[5] = v;     dp[6] = v;     dp[7] = v;
101         dp[8] = v;     dp[9] = v;     dp[10] = v;    dp[11] = v;
102         dp[12] = v;    dp[13] = v;    dp[14] = v;    dp[15] = v;
103         dp[16] = v;    dp[17] = v;    dp[18] = v;    dp[19] = v;
104         dp[20] = v;    dp[21] = v;    dp[22] = v;    dp[23] = v;
105         dp[24] = v;    dp[25] = v;    dp[26] = v;    dp[27] = v;
106         dp[28] = v;    dp[29] = v;    dp[30] = v;    dp[31] = v;
107         return;
108     }
109     /* Some other coefficient. */
110     p_dest = (s16*)p_block;
111     p_source = (s16*)&p_vdec->p_pre_idct[i_sparse_pos];
112     coeff = (int)p_dest[i_sparse_pos];
113     for( rr=0 ; rr < 4 ; rr++ )
114     {
115         p_dest[0] = (p_source[0] * coeff) >> SPARSE_SCALE_FACTOR;
116         p_dest[1] = (p_source[1] * coeff) >> SPARSE_SCALE_FACTOR;
117         p_dest[2] = (p_source[2] * coeff) >> SPARSE_SCALE_FACTOR;
118         p_dest[3] = (p_source[3] * coeff) >> SPARSE_SCALE_FACTOR;
119         p_dest[4] = (p_source[4] * coeff) >> SPARSE_SCALE_FACTOR;
120         p_dest[5] = (p_source[5] * coeff) >> SPARSE_SCALE_FACTOR;
121         p_dest[6] = (p_source[6] * coeff) >> SPARSE_SCALE_FACTOR;
122         p_dest[7] = (p_source[7] * coeff) >> SPARSE_SCALE_FACTOR;
123         p_dest[8] = (p_source[8] * coeff) >> SPARSE_SCALE_FACTOR;
124         p_dest[9] = (p_source[9] * coeff) >> SPARSE_SCALE_FACTOR;
125         p_dest[10] = (p_source[10] * coeff) >> SPARSE_SCALE_FACTOR;
126         p_dest[11] = (p_source[11] * coeff) >> SPARSE_SCALE_FACTOR;
127         p_dest[12] = (p_source[12] * coeff) >> SPARSE_SCALE_FACTOR;
128         p_dest[13] = (p_source[13] * coeff) >> SPARSE_SCALE_FACTOR;
129         p_dest[14] = (p_source[14] * coeff) >> SPARSE_SCALE_FACTOR;
130         p_dest[15] = (p_source[15] * coeff) >> SPARSE_SCALE_FACTOR;
131         p_dest += 16;
132         p_source += 16;
133     }
134     return;
135 }
136
137
138 /*****************************************************************************
139  * vdec_IDCT : IDCT function for normal matrices
140  *****************************************************************************/
141
142 #ifndef HAVE_MMX
143 void vdec_IDCT( vdec_thread_t * p_vdec, dctelem_t * p_block, int i_idontcare )
144 {
145 #if 0
146     /* dct classique: pour tester la meilleure entre la classique et la */
147     /* no classique */
148     s32 tmp0, tmp1, tmp2, tmp3;
149     s32 tmp10, tmp11, tmp12, tmp13;
150     s32 z1, z2, z3, z4, z5;
151     dctelem_t * dataptr;
152     int rowctr;
153     SHIFT_TEMPS
154
155   /* Pass 1: process rows. */
156   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
157   /* furthermore, we scale the results by 2**PASS1_BITS. */
158
159     dataptr = p_block;
160     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
161     {
162     /* Due to quantization, we will usually find that many of the input
163      * coefficients are zero, especially the AC terms.  We can exploit this
164      * by short-circuiting the IDCT calculation for any row in which all
165      * the AC terms are zero.  In that case each output is equal to the
166      * DC coefficient (with scale factor as needed).
167      * With typical images and quantization tables, half or more of the
168      * row DCT calculations can be simplified this way.
169      */
170
171         if ((dataptr[1] | dataptr[2] | dataptr[3] | dataptr[4] |
172                 dataptr[5] | dataptr[6] | dataptr[7]) == 0)
173         {
174       /* AC terms all zero */
175             dctelem_t dcval = (dctelem_t) (dataptr[0] << PASS1_BITS);
176
177             dataptr[0] = dcval;
178             dataptr[1] = dcval;
179             dataptr[2] = dcval;
180             dataptr[3] = dcval;
181             dataptr[4] = dcval;
182             dataptr[5] = dcval;
183             dataptr[6] = dcval;
184             dataptr[7] = dcval;
185
186             dataptr += DCTSIZE; /* advance pointer to next row */
187             continue;
188         }
189
190     /* Even part: reverse the even part of the forward DCT. */
191     /* The rotator is sqrt(2)*c(-6). */
192
193         z2 = (s32) dataptr[2];
194         z3 = (s32) dataptr[6];
195
196         z1 = MULTIPLY(z2 + z3, FIX(0.541196100));
197         tmp2 = z1 + MULTIPLY(z3, - FIX(1.847759065));
198         tmp3 = z1 + MULTIPLY(z2, FIX(0.765366865));
199
200         tmp0 = ((s32) dataptr[0] + (s32) dataptr[4]) << CONST_BITS;
201         tmp1 = ((s32) dataptr[0] - (s32) dataptr[4]) << CONST_BITS;
202
203         tmp10 = tmp0 + tmp3;
204         tmp13 = tmp0 - tmp3;
205         tmp11 = tmp1 + tmp2;
206         tmp12 = tmp1 - tmp2;
207
208     /* Odd part per figure 8; the matrix is unitary and hence its
209      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
210      */
211
212         tmp0 = (s32) dataptr[7];
213         tmp1 = (s32) dataptr[5];
214         tmp2 = (s32) dataptr[3];
215         tmp3 = (s32) dataptr[1];
216
217         z1 = tmp0 + tmp3;
218         z2 = tmp1 + tmp2;
219         z3 = tmp0 + tmp2;
220         z4 = tmp1 + tmp3;
221         z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */
222
223         tmp0 = MULTIPLY(tmp0, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */
224         tmp1 = MULTIPLY(tmp1, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */
225         tmp2 = MULTIPLY(tmp2, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */
226         tmp3 = MULTIPLY(tmp3, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */
227         z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */
228         z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */
229         z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */
230         z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */
231
232         z3 += z5;
233         z4 += z5;
234
235         tmp0 += z1 + z3;
236         tmp1 += z2 + z4;
237         tmp2 += z2 + z3;
238         tmp3 += z1 + z4;
239
240     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
241
242         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
243         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
244         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
245         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
246         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
247         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
248         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
249         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
250
251         dataptr += DCTSIZE;             /* advance pointer to next row */
252     }
253
254   /* Pass 2: process columns. */
255   /* Note that we must descale the results by a factor of 8 == 2**3, */
256   /* and also undo the PASS1_BITS scaling. */
257
258     dataptr = p_block;
259     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
260     {
261     /* Columns of zeroes can be exploited in the same way as we did with rows.
262      * However, the row calculation has created many nonzero AC terms, so the
263      * simplification applies less often (typically 5% to 10% of the time).
264      * On machines with very fast multiplication, it's possible that the
265      * test takes more time than it's worth.  In that case this section
266      * may be commented out.
267      */
268
269 #ifndef NO_ZERO_COLUMN_TEST /*ajoute un test mais evite des calculs */
270         if ((dataptr[DCTSIZE*1] | dataptr[DCTSIZE*2] | dataptr[DCTSIZE*3] |
271             dataptr[DCTSIZE*4] | dataptr[DCTSIZE*5] | dataptr[DCTSIZE*6] |
272             dataptr[DCTSIZE*7]) == 0)
273         {
274       /* AC terms all zero */
275             dctelem_t dcval = (dctelem_t) DESCALE((s32) dataptr[0], PASS1_BITS+3);
276
277             dataptr[DCTSIZE*0] = dcval;
278             dataptr[DCTSIZE*1] = dcval;
279             dataptr[DCTSIZE*2] = dcval;
280             dataptr[DCTSIZE*3] = dcval;
281             dataptr[DCTSIZE*4] = dcval;
282             dataptr[DCTSIZE*5] = dcval;
283             dataptr[DCTSIZE*6] = dcval;
284             dataptr[DCTSIZE*7] = dcval;
285
286             dataptr++;          /* advance pointer to next column */
287             continue;
288         }
289 #endif
290
291     /* Even part: reverse the even part of the forward DCT. */
292     /* The rotator is sqrt(2)*c(-6). */
293
294         z2 = (s32) dataptr[DCTSIZE*2];
295         z3 = (s32) dataptr[DCTSIZE*6];
296
297         z1 = MULTIPLY(z2 + z3, FIX(0.541196100));
298         tmp2 = z1 + MULTIPLY(z3, - FIX(1.847759065));
299         tmp3 = z1 + MULTIPLY(z2, FIX(0.765366865));
300
301         tmp0 = ((s32) dataptr[DCTSIZE*0] + (s32) dataptr[DCTSIZE*4]) << CONST_BITS;
302         tmp1 = ((s32) dataptr[DCTSIZE*0] - (s32) dataptr[DCTSIZE*4]) << CONST_BITS;
303
304         tmp10 = tmp0 + tmp3;
305         tmp13 = tmp0 - tmp3;
306         tmp11 = tmp1 + tmp2;
307         tmp12 = tmp1 - tmp2;
308
309     /* Odd part per figure 8; the matrix is unitary and hence its
310      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
311      */
312
313         tmp0 = (s32) dataptr[DCTSIZE*7];
314         tmp1 = (s32) dataptr[DCTSIZE*5];
315         tmp2 = (s32) dataptr[DCTSIZE*3];
316         tmp3 = (s32) dataptr[DCTSIZE*1];
317
318         z1 = tmp0 + tmp3;
319         z2 = tmp1 + tmp2;
320         z3 = tmp0 + tmp2;
321         z4 = tmp1 + tmp3;
322         z5 = MULTIPLY(z3 + z4, FIX(1.175875602)); /* sqrt(2) * c3 */
323
324         tmp0 = MULTIPLY(tmp0, FIX(0.298631336)); /* sqrt(2) * (-c1+c3+c5-c7) */
325         tmp1 = MULTIPLY(tmp1, FIX(2.053119869)); /* sqrt(2) * ( c1+c3-c5+c7) */
326         tmp2 = MULTIPLY(tmp2, FIX(3.072711026)); /* sqrt(2) * ( c1+c3+c5-c7) */
327         tmp3 = MULTIPLY(tmp3, FIX(1.501321110)); /* sqrt(2) * ( c1+c3-c5-c7) */
328         z1 = MULTIPLY(z1, - FIX(0.899976223)); /* sqrt(2) * (c7-c3) */
329         z2 = MULTIPLY(z2, - FIX(2.562915447)); /* sqrt(2) * (-c1-c3) */
330         z3 = MULTIPLY(z3, - FIX(1.961570560)); /* sqrt(2) * (-c3-c5) */
331         z4 = MULTIPLY(z4, - FIX(0.390180644)); /* sqrt(2) * (c5-c3) */
332
333         z3 += z5;
334         z4 += z5;
335
336         tmp0 += z1 + z3;
337         tmp1 += z2 + z4;
338         tmp2 += z2 + z3;
339         tmp3 += z1 + z4;
340
341     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
342
343         dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
344                                            CONST_BITS+PASS1_BITS+3);
345         dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
346                                            CONST_BITS+PASS1_BITS+3);
347         dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
348                                            CONST_BITS+PASS1_BITS+3);
349         dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
350                                            CONST_BITS+PASS1_BITS+3);
351         dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
352                                            CONST_BITS+PASS1_BITS+3);
353         dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
354                                            CONST_BITS+PASS1_BITS+3);
355         dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
356                                            CONST_BITS+PASS1_BITS+3);
357         dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
358                                            CONST_BITS+PASS1_BITS+3);
359
360         dataptr++;                      /* advance pointer to next column */
361     }
362 #endif
363
364 #if 1  /*dct avec non classique*/
365
366     s32 tmp0, tmp1, tmp2, tmp3;
367     s32 tmp10, tmp11, tmp12, tmp13;
368     s32 z1, z2, z3, z4, z5;
369     s32 d0, d1, d2, d3, d4, d5, d6, d7;
370     dctelem_t * dataptr;
371     int rowctr;
372
373     SHIFT_TEMPS
374
375     /* Pass 1: process rows. */
376     /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
377     /* furthermore, we scale the results by 2**PASS1_BITS. */
378
379     dataptr = p_block;
380
381     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
382     {
383         /* Due to quantization, we will usually find that many of the input
384          * coefficients are zero, especially the AC terms.  We can exploit this
385          * by short-circuiting the IDCT calculation for any row in which all
386          * the AC terms are zero.  In that case each output is equal to the
387          * DC coefficient (with scale factor as needed).
388          * With typical images and quantization tables, half or more of the
389          * row DCT calculations can be simplified this way.
390          */
391
392         register int * idataptr = (int*)dataptr;
393         d0 = dataptr[0];
394         d1 = dataptr[1];
395         if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
396         {
397       /* AC terms all zero */
398             if (d0)
399             {
400       /* Compute a 32 bit value to assign. */
401                 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
402                 register int v = (dcval & 0xffff) | (dcval << 16);
403
404                 idataptr[0] = v;
405                 idataptr[1] = v;
406                 idataptr[2] = v;
407                 idataptr[3] = v;
408             }
409
410             dataptr += DCTSIZE; /* advance pointer to next row */
411             continue;
412         }
413         d2 = dataptr[2];
414         d3 = dataptr[3];
415         d4 = dataptr[4];
416         d5 = dataptr[5];
417         d6 = dataptr[6];
418         d7 = dataptr[7];
419
420     /* Even part: reverse the even part of the forward DCT. */
421     /* The rotator is sqrt(2)*c(-6). */
422         if (d6)
423         {
424             if (d4)
425             {
426                 if (d2)
427                 {
428                     if (d0)
429                     {
430             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
431                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
432                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
433                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
434
435                         tmp0 = (d0 + d4) << CONST_BITS;
436                         tmp1 = (d0 - d4) << CONST_BITS;
437
438                         tmp10 = tmp0 + tmp3;
439                         tmp13 = tmp0 - tmp3;
440                         tmp11 = tmp1 + tmp2;
441                         tmp12 = tmp1 - tmp2;
442                     }
443                     else
444                     {
445                     /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
446                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
447                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
448                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
449
450                         tmp0 = d4 << CONST_BITS;
451
452                         tmp10 = tmp0 + tmp3;
453                         tmp13 = tmp0 - tmp3;
454                         tmp11 = tmp2 - tmp0;
455                         tmp12 = -(tmp0 + tmp2);
456                         }
457                 }
458                 else
459                 {
460                     if (d0)
461                     {
462             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
463                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
464                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
465
466                         tmp0 = (d0 + d4) << CONST_BITS;
467                         tmp1 = (d0 - d4) << CONST_BITS;
468
469                         tmp10 = tmp0 + tmp3;
470                         tmp13 = tmp0 - tmp3;
471                         tmp11 = tmp1 + tmp2;
472                         tmp12 = tmp1 - tmp2;
473                         }
474                     else
475                     {
476                     /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
477                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
478                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
479
480                         tmp0 = d4 << CONST_BITS;
481
482                         tmp10 = tmp0 + tmp3;
483                         tmp13 = tmp0 - tmp3;
484                         tmp11 = tmp2 - tmp0;
485                         tmp12 = -(tmp0 + tmp2);
486                         }
487                 }
488             }
489             else
490             {
491                 if (d2)
492                 {
493                     if (d0)
494                     {
495             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
496                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
497                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
498                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
499
500                         tmp0 = d0 << CONST_BITS;
501
502                         tmp10 = tmp0 + tmp3;
503                         tmp13 = tmp0 - tmp3;
504                         tmp11 = tmp0 + tmp2;
505                         tmp12 = tmp0 - tmp2;
506                     }
507                     else
508                     {
509                     /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
510                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
511                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
512                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
513
514                         tmp10 = tmp3;
515                         tmp13 = -tmp3;
516                         tmp11 = tmp2;
517                         tmp12 = -tmp2;
518                             }
519                 }
520                 else
521                 {
522                     if (d0)
523                     {
524             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
525                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
526                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
527
528                         tmp0 = d0 << CONST_BITS;
529
530                         tmp10 = tmp0 + tmp3;
531                         tmp13 = tmp0 - tmp3;
532                         tmp11 = tmp0 + tmp2;
533                         tmp12 = tmp0 - tmp2;
534                     }
535                     else
536                     {
537             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
538                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
539                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
540
541                         tmp10 = tmp3;
542                         tmp13 = -tmp3;
543                         tmp11 = tmp2;
544                         tmp12 = -tmp2;
545                     }
546                 }
547             }
548         }
549         else
550         {
551             if (d4)
552             {
553                 if (d2)
554                 {
555                     if (d0)
556                     {
557                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
558                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
559                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
560
561                         tmp0 = (d0 + d4) << CONST_BITS;
562                         tmp1 = (d0 - d4) << CONST_BITS;
563
564                         tmp10 = tmp0 + tmp3;
565                         tmp13 = tmp0 - tmp3;
566                         tmp11 = tmp1 + tmp2;
567                         tmp12 = tmp1 - tmp2;
568                     }
569                     else
570                     {
571             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
572                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
573                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
574
575                         tmp0 = d4 << CONST_BITS;
576
577                         tmp10 = tmp0 + tmp3;
578                         tmp13 = tmp0 - tmp3;
579                         tmp11 = tmp2 - tmp0;
580                         tmp12 = -(tmp0 + tmp2);
581                     }
582                 }
583                 else
584                 {
585                     if (d0)
586                     {
587             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
588                         tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
589                         tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
590                     }
591                     else
592                     {
593             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
594                         tmp10 = tmp13 = d4 << CONST_BITS;
595                         tmp11 = tmp12 = -tmp10;
596                     }
597                 }
598             }
599             else
600             {
601                 if (d2)
602                 {
603                     if (d0)
604                     {
605             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
606                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
607                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
608
609                         tmp0 = d0 << CONST_BITS;
610
611                         tmp10 = tmp0 + tmp3;
612                         tmp13 = tmp0 - tmp3;
613                         tmp11 = tmp0 + tmp2;
614                         tmp12 = tmp0 - tmp2;
615                     }
616                     else
617                     {
618             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
619                         tmp2 = MULTIPLY(d2, FIX(0.541196100));
620                         tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
621
622                         tmp10 = tmp3;
623                         tmp13 = -tmp3;
624                         tmp11 = tmp2;
625                         tmp12 = -tmp2;
626                     }
627                 }
628                 else
629                 {
630                     if (d0)
631                     {
632             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
633                         tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
634                     }
635                     else
636                     {
637             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
638                         tmp10 = tmp13 = tmp11 = tmp12 = 0;
639                     }
640                 }
641             }
642         }
643
644
645     /* Odd part per figure 8; the matrix is unitary and hence its
646      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
647      */
648
649         if (d7)
650             {
651             if (d5)
652             {
653                 if (d3)
654                 {
655                     if (d1)
656                     {
657             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
658                         z1 = d7 + d1;
659                         z2 = d5 + d3;
660                         z3 = d7 + d3;
661                         z4 = d5 + d1;
662                         z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
663
664                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
665                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
666                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
667                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
668                         z1 = MULTIPLY(z1, - FIX(0.899976223));
669                         z2 = MULTIPLY(z2, - FIX(2.562915447));
670                         z3 = MULTIPLY(z3, - FIX(1.961570560));
671                         z4 = MULTIPLY(z4, - FIX(0.390180644));
672
673                         z3 += z5;
674                         z4 += z5;
675
676                         tmp0 += z1 + z3;
677                         tmp1 += z2 + z4;
678                         tmp2 += z2 + z3;
679                         tmp3 += z1 + z4;
680                     }
681                     else
682                     {
683             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
684                         z2 = d5 + d3;
685                         z3 = d7 + d3;
686                         z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
687
688                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
689                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
690                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
691                         z1 = MULTIPLY(d7, - FIX(0.899976223));
692                         z2 = MULTIPLY(z2, - FIX(2.562915447));
693                         z3 = MULTIPLY(z3, - FIX(1.961570560));
694                         z4 = MULTIPLY(d5, - FIX(0.390180644));
695
696                         z3 += z5;
697                         z4 += z5;
698
699                         tmp0 += z1 + z3;
700                         tmp1 += z2 + z4;
701                         tmp2 += z2 + z3;
702                         tmp3 = z1 + z4;
703                         }
704                     }
705                 else
706                 {
707                     if (d1)
708                     {
709             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
710                         z1 = d7 + d1;
711                         z4 = d5 + d1;
712                         z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
713
714                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
715                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
716                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
717                         z1 = MULTIPLY(z1, - FIX(0.899976223));
718                         z2 = MULTIPLY(d5, - FIX(2.562915447));
719                         z3 = MULTIPLY(d7, - FIX(1.961570560));
720                         z4 = MULTIPLY(z4, - FIX(0.390180644));
721
722                         z3 += z5;
723                         z4 += z5;
724
725                         tmp0 += z1 + z3;
726                         tmp1 += z2 + z4;
727                         tmp2 = z2 + z3;
728                         tmp3 += z1 + z4;
729                     }
730                     else
731                     {
732             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
733                         z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
734
735                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
736                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
737                         z1 = MULTIPLY(d7, - FIX(0.899976223));
738                         z3 = MULTIPLY(d7, - FIX(1.961570560));
739                         z2 = MULTIPLY(d5, - FIX(2.562915447));
740                         z4 = MULTIPLY(d5, - FIX(0.390180644));
741
742                         z3 += z5;
743                         z4 += z5;
744
745                         tmp0 += z3;
746                         tmp1 += z4;
747                         tmp2 = z2 + z3;
748                         tmp3 = z1 + z4;
749                     }
750                 }
751             }
752             else
753             {
754                 if (d3)
755                 {
756                     if (d1)
757                     {
758             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
759                         z1 = d7 + d1;
760                         z3 = d7 + d3;
761                         z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
762
763                         tmp0 = MULTIPLY(d7, FIX(0.298631336));
764                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
765                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
766                         z1 = MULTIPLY(z1, - FIX(0.899976223));
767                         z2 = MULTIPLY(d3, - FIX(2.562915447));
768                         z3 = MULTIPLY(z3, - FIX(1.961570560));
769                         z4 = MULTIPLY(d1, - FIX(0.390180644));
770
771                         z3 += z5;
772                         z4 += z5;
773
774                         tmp0 += z1 + z3;
775                         tmp1 = z2 + z4;
776                         tmp2 += z2 + z3;
777                         tmp3 += z1 + z4;
778                     }
779                     else
780                     {
781             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
782                         z3 = d7 + d3;
783                         z5 = MULTIPLY(z3, FIX(1.175875602));
784
785                         tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
786                         tmp2 = MULTIPLY(d3, FIX(0.509795579));
787                         z1 = MULTIPLY(d7, - FIX(0.899976223));
788                         z2 = MULTIPLY(d3, - FIX(2.562915447));
789                         z3 = MULTIPLY(z3, - FIX2(0.785694958));
790
791                         tmp0 += z3;
792                         tmp1 = z2 + z5;
793                         tmp2 += z3;
794                         tmp3 = z1 + z5;
795                     }
796                 }
797                 else
798                 {
799                     if (d1)
800                     {
801             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
802                         z1 = d7 + d1;
803                         z5 = MULTIPLY(z1, FIX(1.175875602));
804
805                         tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
806                         tmp3 = MULTIPLY(d1, FIX2(1.111140466));
807                         z1 = MULTIPLY(z1, FIX2(0.275899379));
808                         z3 = MULTIPLY(d7, - FIX(1.961570560));
809                         z4 = MULTIPLY(d1, - FIX(0.390180644));
810
811                         tmp0 += z1;
812                         tmp1 = z4 + z5;
813                         tmp2 = z3 + z5;
814                         tmp3 += z1;
815                     }
816                 else
817                     {
818             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
819                         tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
820                         tmp1 = MULTIPLY(d7, FIX(1.175875602));
821                         tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
822                         tmp3 = MULTIPLY(d7, FIX2(0.275899379));
823                     }
824                 }
825             }
826         }
827         else
828         {
829             if (d5)
830             {
831                 if (d3)
832                 {
833                     if (d1)
834                     {
835             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
836                         z2 = d5 + d3;
837                         z4 = d5 + d1;
838                         z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
839
840                         tmp1 = MULTIPLY(d5, FIX(2.053119869));
841                         tmp2 = MULTIPLY(d3, FIX(3.072711026));
842                         tmp3 = MULTIPLY(d1, FIX(1.501321110));
843                         z1 = MULTIPLY(d1, - FIX(0.899976223));
844                         z2 = MULTIPLY(z2, - FIX(2.562915447));
845                         z3 = MULTIPLY(d3, - FIX(1.961570560));
846                         z4 = MULTIPLY(z4, - FIX(0.390180644));
847
848                         z3 += z5;
849                         z4 += z5;
850
851                         tmp0 = z1 + z3;
852                         tmp1 += z2 + z4;
853                         tmp2 += z2 + z3;
854                         tmp3 += z1 + z4;
855                     }
856                     else
857                     {
858             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
859                         z2 = d5 + d3;
860                         z5 = MULTIPLY(z2, FIX(1.175875602));
861
862                         tmp1 = MULTIPLY(d5, FIX2(1.662939225));
863                         tmp2 = MULTIPLY(d3, FIX2(1.111140466));
864                         z2 = MULTIPLY(z2, - FIX2(1.387039845));
865                         z3 = MULTIPLY(d3, - FIX(1.961570560));
866                         z4 = MULTIPLY(d5, - FIX(0.390180644));
867
868                         tmp0 = z3 + z5;
869                         tmp1 += z2;
870                         tmp2 += z2;
871                         tmp3 = z4 + z5;
872                     }
873                 }
874                 else
875                 {
876                     if (d1)
877                     {
878             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
879                         z4 = d5 + d1;
880                         z5 = MULTIPLY(z4, FIX(1.175875602));
881
882                         tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
883                         tmp3 = MULTIPLY(d1, FIX2(0.601344887));
884                         z1 = MULTIPLY(d1, - FIX(0.899976223));
885                         z2 = MULTIPLY(d5, - FIX(2.562915447));
886                         z4 = MULTIPLY(z4, FIX2(0.785694958));
887
888                         tmp0 = z1 + z5;
889                         tmp1 += z4;
890                         tmp2 = z2 + z5;
891                         tmp3 += z4;
892                     }
893                     else
894                     {
895             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
896                         tmp0 = MULTIPLY(d5, FIX(1.175875602));
897                         tmp1 = MULTIPLY(d5, FIX2(0.275899380));
898                         tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
899                         tmp3 = MULTIPLY(d5, FIX2(0.785694958));
900                     }
901                 }
902             }
903             else
904             {
905                 if (d3)
906                 {
907                     if (d1)
908                     {
909             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
910                         z5 = d3 + d1;
911
912                         tmp2 = MULTIPLY(d3, - FIX(1.451774981));
913                         tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
914                         z1 = MULTIPLY(d1, FIX(1.061594337));
915                         z2 = MULTIPLY(d3, - FIX(2.172734803));
916                         z4 = MULTIPLY(z5, FIX(0.785694958));
917                         z5 = MULTIPLY(z5, FIX(1.175875602));
918
919                         tmp0 = z1 - z4;
920                         tmp1 = z2 + z4;
921                         tmp2 += z5;
922                         tmp3 += z5;
923                     }
924                     else
925                     {
926             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
927                         tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
928                         tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
929                         tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
930                         tmp3 = MULTIPLY(d3, FIX(1.175875602));
931                     }
932                 }
933                 else
934                 {
935                     if (d1)
936                     {
937             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
938                         tmp0 = MULTIPLY(d1, FIX2(0.275899379));
939                         tmp1 = MULTIPLY(d1, FIX2(0.785694958));
940                         tmp2 = MULTIPLY(d1, FIX(1.175875602));
941                         tmp3 = MULTIPLY(d1, FIX2(1.387039845));
942                     }
943                     else
944                     {
945             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
946                         tmp0 = tmp1 = tmp2 = tmp3 = 0;
947                     }
948                 }
949             }
950         }
951
952     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
953
954         dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
955         dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
956         dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
957         dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
958         dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
959         dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
960         dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
961         dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
962
963         dataptr += DCTSIZE;              /* advance pointer to next row */
964     }
965
966   /* Pass 2: process columns. */
967   /* Note that we must descale the results by a factor of 8 == 2**3, */
968   /* and also undo the PASS1_BITS scaling. */
969
970     dataptr = p_block;
971     for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
972     {
973     /* Columns of zeroes can be exploited in the same way as we did with rows.
974      * However, the row calculation has created many nonzero AC terms, so the
975      * simplification applies less often (typically 5% to 10% of the time).
976      * On machines with very fast multiplication, it's possible that the
977      * test takes more time than it's worth.  In that case this section
978      * may be commented out.
979      */
980
981         d0 = dataptr[DCTSIZE*0];
982         d1 = dataptr[DCTSIZE*1];
983         d2 = dataptr[DCTSIZE*2];
984         d3 = dataptr[DCTSIZE*3];
985         d4 = dataptr[DCTSIZE*4];
986         d5 = dataptr[DCTSIZE*5];
987         d6 = dataptr[DCTSIZE*6];
988         d7 = dataptr[DCTSIZE*7];
989
990     /* Even part: reverse the even part of the forward DCT. */
991     /* The rotator is sqrt(2)*c(-6). */
992         if (d6)
993         {
994             if (d4)
995             {
996                 if (d2)
997                 {
998                     if (d0)
999                     {
1000             /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1001                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1002                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1003                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1004
1005                         tmp0 = (d0 + d4) << CONST_BITS;
1006                         tmp1 = (d0 - d4) << CONST_BITS;
1007
1008                         tmp10 = tmp0 + tmp3;
1009                         tmp13 = tmp0 - tmp3;
1010                         tmp11 = tmp1 + tmp2;
1011                         tmp12 = tmp1 - tmp2;
1012                     }
1013                     else
1014                     {
1015             /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
1016                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1017                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1018                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1019
1020                         tmp0 = d4 << CONST_BITS;
1021
1022                         tmp10 = tmp0 + tmp3;
1023                         tmp13 = tmp0 - tmp3;
1024                         tmp11 = tmp2 - tmp0;
1025                         tmp12 = -(tmp0 + tmp2);
1026                     }
1027                 }
1028                 else
1029                 {
1030                     if (d0)
1031                     {
1032             /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1033                         tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
1034                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
1035
1036                         tmp0 = (d0 + d4) << CONST_BITS;
1037                         tmp1 = (d0 - d4) << CONST_BITS;
1038
1039                         tmp10 = tmp0 + tmp3;
1040                         tmp13 = tmp0 - tmp3;
1041                         tmp11 = tmp1 + tmp2;
1042                         tmp12 = tmp1 - tmp2;
1043                     }
1044                     else
1045                     {
1046             /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
1047                         tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
1048                         tmp3 = MULTIPLY(d6, FIX(0.541196100));
1049
1050                         tmp0 = d4 << CONST_BITS;
1051
1052                         tmp10 = tmp0 + tmp3;
1053                         tmp13 = tmp0 - tmp3;
1054                         tmp11 = tmp2 - tmp0;
1055                         tmp12 = -(tmp0 + tmp2);
1056                     }
1057                 }
1058             }
1059             else
1060             {
1061                 if (d2)
1062                 {
1063                     if (d0)
1064                     {
1065             /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
1066                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1067                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1068                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1069
1070                         tmp0 = d0 << CONST_BITS;
1071
1072                         tmp10 = tmp0 + tmp3;
1073                         tmp13 = tmp0 - tmp3;
1074                         tmp11 = tmp0 + tmp2;
1075                         tmp12 = tmp0 - tmp2;
1076                     }
1077                     else
1078                     {
1079             /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
1080                         z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
1081                         tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
1082                         tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
1083
1084                         tmp10 = tmp3;
1085                         tmp13 = -tmp3;
1086                         tmp11 = tmp2;
1087                         tmp12 = -tmp2;
1088                     }
1089                 }
1090                 else
1091                 {
1092                     if (d0)
1093                     {
1094             /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
1095                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
1096                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
1097
1098                     tmp0 = d0 << CONST_BITS;
1099
1100                     tmp10 = tmp0 + tmp3;
1101                     tmp13 = tmp0 - tmp3;
1102                     tmp11 = tmp0 + tmp2;
1103                     tmp12 = tmp0 - tmp2;
1104                 }
1105                 else
1106                 {
1107             /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
1108                     tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
1109                     tmp3 = MULTIPLY(d6, FIX(0.541196100));
1110                     tmp10 = tmp3;
1111                     tmp13 = -tmp3;
1112                     tmp11 = tmp2;
1113                     tmp12 = -tmp2;
1114                 }
1115             }
1116         }
1117     }
1118     else
1119     {
1120         if (d4)
1121         {
1122             if (d2)
1123             {
1124                 if (d0)
1125                 {
1126             /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1127                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1128                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1129
1130                     tmp0 = (d0 + d4) << CONST_BITS;
1131                     tmp1 = (d0 - d4) << CONST_BITS;
1132
1133                     tmp10 = tmp0 + tmp3;
1134                     tmp13 = tmp0 - tmp3;
1135                     tmp11 = tmp1 + tmp2;
1136                     tmp12 = tmp1 - tmp2;
1137                 }
1138                 else
1139                 {
1140             /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
1141                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1142                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1143
1144                     tmp0 = d4 << CONST_BITS;
1145
1146                     tmp10 = tmp0 + tmp3;
1147                     tmp13 = tmp0 - tmp3;
1148                     tmp11 = tmp2 - tmp0;
1149                     tmp12 = -(tmp0 + tmp2);
1150                 }
1151             }
1152             else
1153             {
1154                 if (d0)
1155                 {
1156             /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1157                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1158                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1159                 }
1160                 else
1161                 {
1162             /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
1163                     tmp10 = tmp13 = d4 << CONST_BITS;
1164                     tmp11 = tmp12 = -tmp10;
1165                 }
1166             }
1167         }
1168         else
1169         {
1170         if (d2)
1171         {
1172             if (d0)
1173             {
1174             /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
1175                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1176                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1177
1178                     tmp0 = d0 << CONST_BITS;
1179
1180                     tmp10 = tmp0 + tmp3;
1181                     tmp13 = tmp0 - tmp3;
1182                     tmp11 = tmp0 + tmp2;
1183                     tmp12 = tmp0 - tmp2;
1184             }
1185             else
1186             {
1187             /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1188                     tmp2 = MULTIPLY(d2, FIX(0.541196100));
1189                     tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1190
1191                     tmp10 = tmp3;
1192                     tmp13 = -tmp3;
1193                     tmp11 = tmp2;
1194                     tmp12 = -tmp2;
1195             }
1196         }
1197         else
1198         {
1199             if (d0)
1200                 {
1201             /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1202                     tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1203                 }
1204                 else
1205                 {
1206             /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1207                     tmp10 = tmp13 = tmp11 = tmp12 = 0;
1208                 }
1209             }
1210         }
1211     }
1212
1213     /* Odd part per figure 8; the matrix is unitary and hence its
1214      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
1215      */
1216     if (d7)
1217     {
1218         if (d5)
1219         {
1220             if (d3)
1221             {
1222                 if (d1)
1223                 {
1224             /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1225                     z1 = d7 + d1;
1226                     z2 = d5 + d3;
1227                     z3 = d7 + d3;
1228                     z4 = d5 + d1;
1229                     z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1230
1231                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1232                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1233                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1234                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1235                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1236                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1237                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1238                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1239
1240                     z3 += z5;
1241                     z4 += z5;
1242
1243                     tmp0 += z1 + z3;
1244                     tmp1 += z2 + z4;
1245                     tmp2 += z2 + z3;
1246                     tmp3 += z1 + z4;
1247                 }
1248                 else
1249                 {
1250             /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1251                     z2 = d5 + d3;
1252                     z3 = d7 + d3;
1253                     z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1254
1255                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1256                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1257                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1258                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1259                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1260                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1261                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1262
1263                     z3 += z5;
1264                     z4 += z5;
1265
1266                     tmp0 += z1 + z3;
1267                     tmp1 += z2 + z4;
1268                     tmp2 += z2 + z3;
1269                     tmp3 = z1 + z4;
1270                 }
1271             }
1272             else
1273             {
1274                 if (d1)
1275                 {
1276             /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1277                     z1 = d7 + d1;
1278                     z4 = d5 + d1;
1279                     z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1280
1281                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1282                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1283                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1284                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1285                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1286                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1287                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1288
1289                     z3 += z5;
1290                     z4 += z5;
1291
1292                     tmp0 += z1 + z3;
1293                     tmp1 += z2 + z4;
1294                     tmp2 = z2 + z3;
1295                     tmp3 += z1 + z4;
1296                 }
1297                 else
1298                 {
1299             /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1300                     z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1301
1302                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1303                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1304                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1305                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1306                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1307                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1308
1309                     z3 += z5;
1310                     z4 += z5;
1311
1312                     tmp0 += z3;
1313                     tmp1 += z4;
1314                     tmp2 = z2 + z3;
1315                     tmp3 = z1 + z4;
1316                 }
1317             }
1318         }
1319         else
1320         {
1321             if (d3)
1322             {
1323                 if (d1)
1324                 {
1325             /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1326                     z1 = d7 + d1;
1327                     z3 = d7 + d3;
1328                     z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1329
1330                     tmp0 = MULTIPLY(d7, FIX(0.298631336));
1331                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1332                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1333                     z1 = MULTIPLY(z1, - FIX(0.899976223));
1334                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1335                     z3 = MULTIPLY(z3, - FIX(1.961570560));
1336                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1337
1338                     z3 += z5;
1339                     z4 += z5;
1340
1341                     tmp0 += z1 + z3;
1342                     tmp1 = z2 + z4;
1343                     tmp2 += z2 + z3;
1344                     tmp3 += z1 + z4;
1345                 }
1346                 else
1347                 {
1348             /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1349                     z3 = d7 + d3;
1350                     z5 = MULTIPLY(z3, FIX(1.175875602));
1351
1352                     tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1353                     z1 = MULTIPLY(d7, - FIX(0.899976223));
1354                     tmp2 = MULTIPLY(d3, FIX(0.509795579));
1355                     z2 = MULTIPLY(d3, - FIX(2.562915447));
1356                     z3 = MULTIPLY(z3, - FIX2(0.785694958));
1357
1358                     tmp0 += z3;
1359                     tmp1 = z2 + z5;
1360                     tmp2 += z3;
1361                     tmp3 = z1 + z5;
1362                 }
1363             }
1364             else
1365             {
1366                 if (d1)
1367                 {
1368             /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1369                     z1 = d7 + d1;
1370                     z5 = MULTIPLY(z1, FIX(1.175875602));
1371
1372                     tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1373                     tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1374                     z1 = MULTIPLY(z1, FIX2(0.275899379));
1375                     z3 = MULTIPLY(d7, - FIX(1.961570560));
1376                     z4 = MULTIPLY(d1, - FIX(0.390180644));
1377
1378                     tmp0 += z1;
1379                     tmp1 = z4 + z5;
1380                     tmp2 = z3 + z5;
1381                     tmp3 += z1;
1382                 }
1383                 else
1384                 {
1385             /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1386                     tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1387                     tmp1 = MULTIPLY(d7, FIX(1.175875602));
1388                     tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1389                     tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1390                 }
1391             }
1392         }
1393     }
1394     else
1395     {
1396         if (d5)
1397         {
1398             if (d3)
1399             {
1400                 if (d1)
1401                 {
1402             /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1403                     z2 = d5 + d3;
1404                     z4 = d5 + d1;
1405                     z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1406
1407                     tmp1 = MULTIPLY(d5, FIX(2.053119869));
1408                     tmp2 = MULTIPLY(d3, FIX(3.072711026));
1409                     tmp3 = MULTIPLY(d1, FIX(1.501321110));
1410                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1411                     z2 = MULTIPLY(z2, - FIX(2.562915447));
1412                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1413                     z4 = MULTIPLY(z4, - FIX(0.390180644));
1414
1415                     z3 += z5;
1416                     z4 += z5;
1417
1418                     tmp0 = z1 + z3;
1419                     tmp1 += z2 + z4;
1420                     tmp2 += z2 + z3;
1421                     tmp3 += z1 + z4;
1422                 }
1423                 else
1424                 {
1425             /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1426                     z2 = d5 + d3;
1427                     z5 = MULTIPLY(z2, FIX(1.175875602));
1428
1429                     tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1430                     tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1431                     z2 = MULTIPLY(z2, - FIX2(1.387039845));
1432                     z3 = MULTIPLY(d3, - FIX(1.961570560));
1433                     z4 = MULTIPLY(d5, - FIX(0.390180644));
1434
1435                     tmp0 = z3 + z5;
1436                     tmp1 += z2;
1437                     tmp2 += z2;
1438                     tmp3 = z4 + z5;
1439                 }
1440             }
1441             else
1442             {
1443                 if (d1)
1444                 {
1445             /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1446                     z4 = d5 + d1;
1447                     z5 = MULTIPLY(z4, FIX(1.175875602));
1448
1449                     tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1450                     tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1451                     z1 = MULTIPLY(d1, - FIX(0.899976223));
1452                     z2 = MULTIPLY(d5, - FIX(2.562915447));
1453                     z4 = MULTIPLY(z4, FIX2(0.785694958));
1454
1455                     tmp0 = z1 + z5;
1456                     tmp1 += z4;
1457                     tmp2 = z2 + z5;
1458                     tmp3 += z4;
1459                 }
1460                 else
1461                 {
1462             /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1463                     tmp0 = MULTIPLY(d5, FIX(1.175875602));
1464                     tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1465                     tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1466                     tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1467                 }
1468             }
1469         }
1470         else
1471         {
1472             if (d3)
1473             {
1474                 if (d1)
1475                 {
1476             /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1477                     z5 = d3 + d1;
1478
1479                     tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1480                     tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1481                     z1 = MULTIPLY(d1, FIX(1.061594337));
1482                     z2 = MULTIPLY(d3, - FIX(2.172734803));
1483                     z4 = MULTIPLY(z5, FIX(0.785694958));
1484                     z5 = MULTIPLY(z5, FIX(1.175875602));
1485
1486                     tmp0 = z1 - z4;
1487                     tmp1 = z2 + z4;
1488                     tmp2 += z5;
1489                     tmp3 += z5;
1490                 }
1491                 else
1492                 {
1493             /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1494                     tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1495                     tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1496                     tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1497                     tmp3 = MULTIPLY(d3, FIX(1.175875602));
1498                 }
1499             }
1500             else
1501             {
1502                 if (d1)
1503                 {
1504             /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1505                     tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1506                     tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1507                     tmp2 = MULTIPLY(d1, FIX(1.175875602));
1508                     tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1509                 }
1510                 else
1511                 {
1512             /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1513                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
1514                 }
1515             }
1516         }
1517     }
1518
1519     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1520
1521     dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1522                        CONST_BITS+PASS1_BITS+3);
1523     dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1524                        CONST_BITS+PASS1_BITS+3);
1525     dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1526                        CONST_BITS+PASS1_BITS+3);
1527     dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1528                        CONST_BITS+PASS1_BITS+3);
1529     dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1530                        CONST_BITS+PASS1_BITS+3);
1531     dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1532                        CONST_BITS+PASS1_BITS+3);
1533     dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1534                        CONST_BITS+PASS1_BITS+3);
1535     dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1536                        CONST_BITS+PASS1_BITS+3);
1537
1538     dataptr++;             /* advance pointer to next column */
1539     }
1540 #endif
1541 }
1542 #endif