1 /*****************************************************************************
2 * vdec_idct.c : IDCT functions
4 *****************************************************************************/
6 /*****************************************************************************
8 *****************************************************************************/
17 #include <X11/extensions/XShm.h>
22 #include "vlc_thread.h"
25 #include "debug.h" /* ?? temporaire, requis par netlist.h */
28 #include "input_netlist.h"
29 #include "decoder_fifo.h"
31 #include "video_output.h"
33 #include "vdec_idct.h"
34 #include "video_decoder.h"
35 #include "vdec_motion.h"
37 #include "vpar_blocks.h"
38 #include "vpar_headers.h"
39 #include "video_fifo.h"
40 #include "vpar_synchro.h"
41 #include "video_parser.h"
47 /* Our current implementation is a fast DCT, we might move to a fast DFT or
48 * an MMX DCT in the future. */
50 /*****************************************************************************
51 * vdec_DummyIDCT : dummy function that does nothing
52 *****************************************************************************/
53 void vdec_DummyIDCT( vdec_thread_t * p_vdec, elem_t * p_block,
58 /*****************************************************************************
59 * init_SparseIDCT : initialize datas for vdec_SparceIDCT
60 * vdec_SparseIDCT : IDCT function for sparse matrices
61 *****************************************************************************/
63 void vdec_InitIDCT (vdec_thread_t * p_vdec)
67 elem_t * p_pre = p_vdec->p_pre_idct;
68 memset( p_pre, 0, 64*64*sizeof(elem_t) );
70 for( i=0 ; i < 64 ; i++ )
72 p_pre[i*64+i] = 1 << SPARSE_SCALE_FACTOR;
73 vdec_IDCT( p_vdec, &p_pre[i*64], 0) ;
77 void vdec_SparseIDCT (vdec_thread_t * p_vdec, elem_t * p_block,
87 /* If DC Coefficient. */
89 if ( i_sparse_pos == 0 )
93 /* Compute int to assign. This speeds things up a bit */
94 v = ((val & 0xffff) | (val << 16));
95 dp[0] = v; dp[1] = v; dp[2] = v; dp[3] = v;
96 dp[4] = v; dp[5] = v; dp[6] = v; dp[7] = v;
97 dp[8] = v; dp[9] = v; dp[10] = v; dp[11] = v;
98 dp[12] = v; dp[13] = v; dp[14] = v; dp[15] = v;
99 dp[16] = v; dp[17] = v; dp[18] = v; dp[19] = v;
100 dp[20] = v; dp[21] = v; dp[22] = v; dp[23] = v;
101 dp[24] = v; dp[25] = v; dp[26] = v; dp[27] = v;
102 dp[28] = v; dp[29] = v; dp[30] = v; dp[31] = v;
105 /* Some other coefficient. */
106 p_dest = (s16*)p_block;
107 p_source = (s16*)&p_vdec->p_pre_idct[i_sparse_pos];
108 coeff = (int)p_dest[i_sparse_pos];
109 for( rr=0 ; rr < 4 ; rr++ )
111 p_dest[0] = (p_source[0] * coeff) >> SPARSE_SCALE_FACTOR;
112 p_dest[1] = (p_source[1] * coeff) >> SPARSE_SCALE_FACTOR;
113 p_dest[2] = (p_source[2] * coeff) >> SPARSE_SCALE_FACTOR;
114 p_dest[3] = (p_source[3] * coeff) >> SPARSE_SCALE_FACTOR;
115 p_dest[4] = (p_source[4] * coeff) >> SPARSE_SCALE_FACTOR;
116 p_dest[5] = (p_source[5] * coeff) >> SPARSE_SCALE_FACTOR;
117 p_dest[6] = (p_source[6] * coeff) >> SPARSE_SCALE_FACTOR;
118 p_dest[7] = (p_source[7] * coeff) >> SPARSE_SCALE_FACTOR;
119 p_dest[8] = (p_source[8] * coeff) >> SPARSE_SCALE_FACTOR;
120 p_dest[9] = (p_source[9] * coeff) >> SPARSE_SCALE_FACTOR;
121 p_dest[10] = (p_source[10] * coeff) >> SPARSE_SCALE_FACTOR;
122 p_dest[11] = (p_source[11] * coeff) >> SPARSE_SCALE_FACTOR;
123 p_dest[12] = (p_source[12] * coeff) >> SPARSE_SCALE_FACTOR;
124 p_dest[13] = (p_source[13] * coeff) >> SPARSE_SCALE_FACTOR;
125 p_dest[14] = (p_source[14] * coeff) >> SPARSE_SCALE_FACTOR;
126 p_dest[15] = (p_source[15] * coeff) >> SPARSE_SCALE_FACTOR;
134 /*****************************************************************************
135 * vdec_IDCT : IDCT function for normal matrices
136 *****************************************************************************/
137 void vdec_IDCT( vdec_thread_t * p_vdec, elem_t * p_block, int i_idontcare )
139 s32 tmp0, tmp1, tmp2, tmp3;
140 s32 tmp10, tmp11, tmp12, tmp13;
141 s32 z1, z2, z3, z4, z5;
142 s32 d0, d1, d2, d3, d4, d5, d6, d7;
148 /* Pass 1: process rows. */
149 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
150 /* furthermore, we scale the results by 2**PASS1_BITS. */
154 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
156 /* Due to quantization, we will usually find that many of the input
157 * coefficients are zero, especially the AC terms. We can exploit this
158 * by short-circuiting the IDCT calculation for any row in which all
159 * the AC terms are zero. In that case each output is equal to the
160 * DC coefficient (with scale factor as needed).
161 * With typical images and quantization tables, half or more of the
162 * row DCT calculations can be simplified this way.
165 register int * idataptr = (int*)dataptr;
168 if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
170 /* AC terms all zero */
173 /* Compute a 32 bit value to assign. */
174 elem_t dcval = (elem_t) (d0 << PASS1_BITS);
175 register int v = (dcval & 0xffff) | (dcval << 16);
183 dataptr += DCTSIZE; /* advance pointer to next row */
193 /* Even part: reverse the even part of the forward DCT. */
194 /* The rotator is sqrt(2)*c(-6). */
203 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
204 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
205 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
206 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
208 tmp0 = (d0 + d4) << CONST_BITS;
209 tmp1 = (d0 - d4) << CONST_BITS;
218 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
219 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
220 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
221 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
223 tmp0 = d4 << CONST_BITS;
228 tmp12 = -(tmp0 + tmp2);
235 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
236 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
237 tmp3 = MULTIPLY(d6, FIX(0.541196100));
239 tmp0 = (d0 + d4) << CONST_BITS;
240 tmp1 = (d0 - d4) << CONST_BITS;
249 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
250 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
251 tmp3 = MULTIPLY(d6, FIX(0.541196100));
253 tmp0 = d4 << CONST_BITS;
258 tmp12 = -(tmp0 + tmp2);
268 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
269 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
270 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
271 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
273 tmp0 = d0 << CONST_BITS;
282 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
283 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
284 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
285 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
297 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
298 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
299 tmp3 = MULTIPLY(d6, FIX(0.541196100));
301 tmp0 = d0 << CONST_BITS;
310 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
311 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
312 tmp3 = MULTIPLY(d6, FIX(0.541196100));
330 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
331 tmp2 = MULTIPLY(d2, FIX(0.541196100));
332 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
334 tmp0 = (d0 + d4) << CONST_BITS;
335 tmp1 = (d0 - d4) << CONST_BITS;
344 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
345 tmp2 = MULTIPLY(d2, FIX(0.541196100));
346 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
348 tmp0 = d4 << CONST_BITS;
353 tmp12 = -(tmp0 + tmp2);
360 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
361 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
362 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
366 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
367 tmp10 = tmp13 = d4 << CONST_BITS;
368 tmp11 = tmp12 = -tmp10;
378 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
379 tmp2 = MULTIPLY(d2, FIX(0.541196100));
380 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
382 tmp0 = d0 << CONST_BITS;
391 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
392 tmp2 = MULTIPLY(d2, FIX(0.541196100));
393 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
405 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
406 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
410 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
411 tmp10 = tmp13 = tmp11 = tmp12 = 0;
418 /* Odd part per figure 8; the matrix is unitary and hence its
419 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
430 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
435 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
437 tmp0 = MULTIPLY(d7, FIX(0.298631336));
438 tmp1 = MULTIPLY(d5, FIX(2.053119869));
439 tmp2 = MULTIPLY(d3, FIX(3.072711026));
440 tmp3 = MULTIPLY(d1, FIX(1.501321110));
441 z1 = MULTIPLY(z1, - FIX(0.899976223));
442 z2 = MULTIPLY(z2, - FIX(2.562915447));
443 z3 = MULTIPLY(z3, - FIX(1.961570560));
444 z4 = MULTIPLY(z4, - FIX(0.390180644));
456 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
459 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
461 tmp0 = MULTIPLY(d7, FIX(0.298631336));
462 tmp1 = MULTIPLY(d5, FIX(2.053119869));
463 tmp2 = MULTIPLY(d3, FIX(3.072711026));
464 z1 = MULTIPLY(d7, - FIX(0.899976223));
465 z2 = MULTIPLY(z2, - FIX(2.562915447));
466 z3 = MULTIPLY(z3, - FIX(1.961570560));
467 z4 = MULTIPLY(d5, - FIX(0.390180644));
482 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
485 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
487 tmp0 = MULTIPLY(d7, FIX(0.298631336));
488 tmp1 = MULTIPLY(d5, FIX(2.053119869));
489 tmp3 = MULTIPLY(d1, FIX(1.501321110));
490 z1 = MULTIPLY(z1, - FIX(0.899976223));
491 z2 = MULTIPLY(d5, - FIX(2.562915447));
492 z3 = MULTIPLY(d7, - FIX(1.961570560));
493 z4 = MULTIPLY(z4, - FIX(0.390180644));
505 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
506 z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
508 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
509 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
510 z1 = MULTIPLY(d7, - FIX(0.899976223));
511 z3 = MULTIPLY(d7, - FIX(1.961570560));
512 z2 = MULTIPLY(d5, - FIX(2.562915447));
513 z4 = MULTIPLY(d5, - FIX(0.390180644));
531 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
534 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
536 tmp0 = MULTIPLY(d7, FIX(0.298631336));
537 tmp2 = MULTIPLY(d3, FIX(3.072711026));
538 tmp3 = MULTIPLY(d1, FIX(1.501321110));
539 z1 = MULTIPLY(z1, - FIX(0.899976223));
540 z2 = MULTIPLY(d3, - FIX(2.562915447));
541 z3 = MULTIPLY(z3, - FIX(1.961570560));
542 z4 = MULTIPLY(d1, - FIX(0.390180644));
554 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
556 z5 = MULTIPLY(z3, FIX(1.175875602));
558 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
559 tmp2 = MULTIPLY(d3, FIX(0.509795579));
560 z1 = MULTIPLY(d7, - FIX(0.899976223));
561 z2 = MULTIPLY(d3, - FIX(2.562915447));
562 z3 = MULTIPLY(z3, - FIX2(0.785694958));
574 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
576 z5 = MULTIPLY(z1, FIX(1.175875602));
578 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
579 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
580 z1 = MULTIPLY(z1, FIX2(0.275899379));
581 z3 = MULTIPLY(d7, - FIX(1.961570560));
582 z4 = MULTIPLY(d1, - FIX(0.390180644));
591 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
592 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
593 tmp1 = MULTIPLY(d7, FIX(1.175875602));
594 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
595 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
608 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
611 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
613 tmp1 = MULTIPLY(d5, FIX(2.053119869));
614 tmp2 = MULTIPLY(d3, FIX(3.072711026));
615 tmp3 = MULTIPLY(d1, FIX(1.501321110));
616 z1 = MULTIPLY(d1, - FIX(0.899976223));
617 z2 = MULTIPLY(z2, - FIX(2.562915447));
618 z3 = MULTIPLY(d3, - FIX(1.961570560));
619 z4 = MULTIPLY(z4, - FIX(0.390180644));
631 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
633 z5 = MULTIPLY(z2, FIX(1.175875602));
635 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
636 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
637 z2 = MULTIPLY(z2, - FIX2(1.387039845));
638 z3 = MULTIPLY(d3, - FIX(1.961570560));
639 z4 = MULTIPLY(d5, - FIX(0.390180644));
651 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
653 z5 = MULTIPLY(z4, FIX(1.175875602));
655 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
656 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
657 z1 = MULTIPLY(d1, - FIX(0.899976223));
658 z2 = MULTIPLY(d5, - FIX(2.562915447));
659 z4 = MULTIPLY(z4, FIX2(0.785694958));
668 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
669 tmp0 = MULTIPLY(d5, FIX(1.175875602));
670 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
671 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
672 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
682 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
685 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
686 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
687 z1 = MULTIPLY(d1, FIX(1.061594337));
688 z2 = MULTIPLY(d3, - FIX(2.172734803));
689 z4 = MULTIPLY(z5, FIX(0.785694958));
690 z5 = MULTIPLY(z5, FIX(1.175875602));
699 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
700 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
701 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
702 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
703 tmp3 = MULTIPLY(d3, FIX(1.175875602));
710 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
711 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
712 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
713 tmp2 = MULTIPLY(d1, FIX(1.175875602));
714 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
718 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
719 tmp0 = tmp1 = tmp2 = tmp3 = 0;
725 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
727 dataptr[0] = (elem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
728 dataptr[7] = (elem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
729 dataptr[1] = (elem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
730 dataptr[6] = (elem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
731 dataptr[2] = (elem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
732 dataptr[5] = (elem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
733 dataptr[3] = (elem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
734 dataptr[4] = (elem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
736 dataptr += DCTSIZE; /* advance pointer to next row */
739 /* Pass 2: process columns. */
740 /* Note that we must descale the results by a factor of 8 == 2**3, */
741 /* and also undo the PASS1_BITS scaling. */
744 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
746 /* Columns of zeroes can be exploited in the same way as we did with rows.
747 * However, the row calculation has created many nonzero AC terms, so the
748 * simplification applies less often (typically 5% to 10% of the time).
749 * On machines with very fast multiplication, it's possible that the
750 * test takes more time than it's worth. In that case this section
751 * may be commented out.
754 d0 = dataptr[DCTSIZE*0];
755 d1 = dataptr[DCTSIZE*1];
756 d2 = dataptr[DCTSIZE*2];
757 d3 = dataptr[DCTSIZE*3];
758 d4 = dataptr[DCTSIZE*4];
759 d5 = dataptr[DCTSIZE*5];
760 d6 = dataptr[DCTSIZE*6];
761 d7 = dataptr[DCTSIZE*7];
763 /* Even part: reverse the even part of the forward DCT. */
764 /* The rotator is sqrt(2)*c(-6). */
773 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
774 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
775 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
776 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
778 tmp0 = (d0 + d4) << CONST_BITS;
779 tmp1 = (d0 - d4) << CONST_BITS;
788 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
789 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
790 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
791 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
793 tmp0 = d4 << CONST_BITS;
798 tmp12 = -(tmp0 + tmp2);
805 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
806 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
807 tmp3 = MULTIPLY(d6, FIX(0.541196100));
809 tmp0 = (d0 + d4) << CONST_BITS;
810 tmp1 = (d0 - d4) << CONST_BITS;
819 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
820 tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
821 tmp3 = MULTIPLY(d6, FIX(0.541196100));
823 tmp0 = d4 << CONST_BITS;
828 tmp12 = -(tmp0 + tmp2);
838 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
839 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
840 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
841 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
843 tmp0 = d0 << CONST_BITS;
852 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
853 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
854 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
855 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
867 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
868 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
869 tmp3 = MULTIPLY(d6, FIX(0.541196100));
871 tmp0 = d0 << CONST_BITS;
880 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
881 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
882 tmp3 = MULTIPLY(d6, FIX(0.541196100));
899 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
900 tmp2 = MULTIPLY(d2, FIX(0.541196100));
901 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
903 tmp0 = (d0 + d4) << CONST_BITS;
904 tmp1 = (d0 - d4) << CONST_BITS;
913 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
914 tmp2 = MULTIPLY(d2, FIX(0.541196100));
915 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
917 tmp0 = d4 << CONST_BITS;
922 tmp12 = -(tmp0 + tmp2);
929 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
930 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
931 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
935 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
936 tmp10 = tmp13 = d4 << CONST_BITS;
937 tmp11 = tmp12 = -tmp10;
947 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
948 tmp2 = MULTIPLY(d2, FIX(0.541196100));
949 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
951 tmp0 = d0 << CONST_BITS;
960 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
961 tmp2 = MULTIPLY(d2, FIX(0.541196100));
962 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
974 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
975 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
979 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
980 tmp10 = tmp13 = tmp11 = tmp12 = 0;
986 /* Odd part per figure 8; the matrix is unitary and hence its
987 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
997 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1002 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1004 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1005 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1006 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1007 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1008 z1 = MULTIPLY(z1, - FIX(0.899976223));
1009 z2 = MULTIPLY(z2, - FIX(2.562915447));
1010 z3 = MULTIPLY(z3, - FIX(1.961570560));
1011 z4 = MULTIPLY(z4, - FIX(0.390180644));
1023 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1026 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1028 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1029 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1030 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1031 z1 = MULTIPLY(d7, - FIX(0.899976223));
1032 z2 = MULTIPLY(z2, - FIX(2.562915447));
1033 z3 = MULTIPLY(z3, - FIX(1.961570560));
1034 z4 = MULTIPLY(d5, - FIX(0.390180644));
1049 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1052 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1054 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1055 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1056 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1057 z1 = MULTIPLY(z1, - FIX(0.899976223));
1058 z2 = MULTIPLY(d5, - FIX(2.562915447));
1059 z3 = MULTIPLY(d7, - FIX(1.961570560));
1060 z4 = MULTIPLY(z4, - FIX(0.390180644));
1072 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1073 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1075 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1076 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1077 z1 = MULTIPLY(d7, - FIX(0.899976223));
1078 z3 = MULTIPLY(d7, - FIX(1.961570560));
1079 z2 = MULTIPLY(d5, - FIX(2.562915447));
1080 z4 = MULTIPLY(d5, - FIX(0.390180644));
1098 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1101 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1103 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1104 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1105 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1106 z1 = MULTIPLY(z1, - FIX(0.899976223));
1107 z2 = MULTIPLY(d3, - FIX(2.562915447));
1108 z3 = MULTIPLY(z3, - FIX(1.961570560));
1109 z4 = MULTIPLY(d1, - FIX(0.390180644));
1121 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1123 z5 = MULTIPLY(z3, FIX(1.175875602));
1125 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1126 z1 = MULTIPLY(d7, - FIX(0.899976223));
1127 tmp2 = MULTIPLY(d3, FIX(0.509795579));
1128 z2 = MULTIPLY(d3, - FIX(2.562915447));
1129 z3 = MULTIPLY(z3, - FIX2(0.785694958));
1141 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1143 z5 = MULTIPLY(z1, FIX(1.175875602));
1145 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1146 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1147 z1 = MULTIPLY(z1, FIX2(0.275899379));
1148 z3 = MULTIPLY(d7, - FIX(1.961570560));
1149 z4 = MULTIPLY(d1, - FIX(0.390180644));
1158 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1159 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1160 tmp1 = MULTIPLY(d7, FIX(1.175875602));
1161 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1162 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1175 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1178 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1180 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1181 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1182 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1183 z1 = MULTIPLY(d1, - FIX(0.899976223));
1184 z2 = MULTIPLY(z2, - FIX(2.562915447));
1185 z3 = MULTIPLY(d3, - FIX(1.961570560));
1186 z4 = MULTIPLY(z4, - FIX(0.390180644));
1198 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1200 z5 = MULTIPLY(z2, FIX(1.175875602));
1202 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1203 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1204 z2 = MULTIPLY(z2, - FIX2(1.387039845));
1205 z3 = MULTIPLY(d3, - FIX(1.961570560));
1206 z4 = MULTIPLY(d5, - FIX(0.390180644));
1218 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1220 z5 = MULTIPLY(z4, FIX(1.175875602));
1222 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1223 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1224 z1 = MULTIPLY(d1, - FIX(0.899976223));
1225 z2 = MULTIPLY(d5, - FIX(2.562915447));
1226 z4 = MULTIPLY(z4, FIX2(0.785694958));
1235 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1236 tmp0 = MULTIPLY(d5, FIX(1.175875602));
1237 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1238 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1239 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1249 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1252 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1253 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1254 z1 = MULTIPLY(d1, FIX(1.061594337));
1255 z2 = MULTIPLY(d3, - FIX(2.172734803));
1256 z4 = MULTIPLY(z5, FIX(0.785694958));
1257 z5 = MULTIPLY(z5, FIX(1.175875602));
1266 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1267 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1268 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1269 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1270 tmp3 = MULTIPLY(d3, FIX(1.175875602));
1277 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1278 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1279 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1280 tmp2 = MULTIPLY(d1, FIX(1.175875602));
1281 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1285 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1286 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1292 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1294 dataptr[DCTSIZE*0] = (elem_t) DESCALE(tmp10 + tmp3,
1295 CONST_BITS+PASS1_BITS+3);
1296 dataptr[DCTSIZE*7] = (elem_t) DESCALE(tmp10 - tmp3,
1297 CONST_BITS+PASS1_BITS+3);
1298 dataptr[DCTSIZE*1] = (elem_t) DESCALE(tmp11 + tmp2,
1299 CONST_BITS+PASS1_BITS+3);
1300 dataptr[DCTSIZE*6] = (elem_t) DESCALE(tmp11 - tmp2,
1301 CONST_BITS+PASS1_BITS+3);
1302 dataptr[DCTSIZE*2] = (elem_t) DESCALE(tmp12 + tmp1,
1303 CONST_BITS+PASS1_BITS+3);
1304 dataptr[DCTSIZE*5] = (elem_t) DESCALE(tmp12 - tmp1,
1305 CONST_BITS+PASS1_BITS+3);
1306 dataptr[DCTSIZE*3] = (elem_t) DESCALE(tmp13 + tmp0,
1307 CONST_BITS+PASS1_BITS+3);
1308 dataptr[DCTSIZE*4] = (elem_t) DESCALE(tmp13 - tmp0,
1309 CONST_BITS+PASS1_BITS+3);
1311 dataptr++; /* advance pointer to next column */