1 /*****************************************************************************
3 *****************************************************************************
4 * Copyright (C) 1999, 2000 VideoLAN
5 * $Id: idct.c,v 1.13 2001/07/17 09:48:07 massiot Exp $
7 * Authors: Gaƫl Hendryckx <jimmy@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
27 /*****************************************************************************
29 *****************************************************************************/
41 #include "video_output.h"
43 #include "vdec_ext-plugins.h"
45 #include "vdec_block.h"
46 #include "vdec_idct.h"
49 #include "modules_export.h"
51 /*****************************************************************************
52 * Local and extern prototypes.
53 *****************************************************************************/
54 static void idct_getfunctions( function_list_t * p_function_list );
55 static int idct_Probe ( probedata_t *p_data );
56 static void vdec_NormScan ( u8 ppi_scan[2][64] );
58 /*****************************************************************************
59 * Build configuration tree.
60 *****************************************************************************/
62 ADD_WINDOW( "Configuration for IDCT module" )
63 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
67 p_module->i_capabilities = MODULE_CAPABILITY_NULL
68 | MODULE_CAPABILITY_IDCT;
69 p_module->psz_longname = "IDCT module";
73 idct_getfunctions( &p_module->p_functions->idct );
76 MODULE_DEACTIVATE_START
77 MODULE_DEACTIVATE_STOP
79 /* Following functions are local */
81 /*****************************************************************************
82 * Functions exported as capabilities. They are declared as static so that
83 * we don't pollute the namespace too much.
84 *****************************************************************************/
85 static void idct_getfunctions( function_list_t * p_function_list )
87 p_function_list->pf_probe = idct_Probe;
88 #define F p_function_list->functions.idct
89 F.pf_idct_init = _M( vdec_InitIDCT );
90 F.pf_sparse_idct = _M( vdec_SparseIDCT );
91 F.pf_idct = _M( vdec_IDCT );
92 F.pf_norm_scan = vdec_NormScan;
93 F.pf_decode_init = _M( vdec_InitDecode );
94 F.pf_decode_mb_c = _M( vdec_DecodeMacroblockC );
95 F.pf_decode_mb_bw = _M( vdec_DecodeMacroblockBW );
99 /*****************************************************************************
100 * idct_Probe: returns a preference score
101 *****************************************************************************/
102 static int idct_Probe( probedata_t *p_data )
104 if( TestMethod( IDCT_METHOD_VAR, "idct" )
105 || TestMethod( IDCT_METHOD_VAR, "c" ))
110 /* This plugin always works */
114 /*****************************************************************************
115 * vdec_NormScan : Unused in this IDCT
116 *****************************************************************************/
117 static void vdec_NormScan( u8 ppi_scan[2][64] )
121 /*****************************************************************************
122 * vdec_IDCT : IDCT function for normal matrices
123 *****************************************************************************/
124 void _M( vdec_IDCT )( void * p_idct_data, dctelem_t * p_block, int i_idontcare )
126 s32 tmp0, tmp1, tmp2, tmp3;
127 s32 tmp10, tmp11, tmp12, tmp13;
128 s32 z1, z2, z3, z4, z5;
129 s32 d0, d1, d2, d3, d4, d5, d6, d7;
135 /* Pass 1: process rows. */
136 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
137 /* furthermore, we scale the results by 2**PASS1_BITS. */
141 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
143 /* Due to quantization, we will usually find that many of the input
144 * coefficients are zero, especially the AC terms. We can exploit this
145 * by short-circuiting the IDCT calculation for any row in which all
146 * the AC terms are zero. In that case each output is equal to the
147 * DC coefficient (with scale factor as needed).
148 * With typical images and quantization tables, half or more of the
149 * row DCT calculations can be simplified this way.
152 register int * idataptr = (int*)dataptr;
155 if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
157 /* AC terms all zero */
160 /* Compute a 32 bit value to assign. */
161 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
162 register int v = (dcval & 0xffff) | (dcval << 16);
170 dataptr += DCTSIZE; /* advance pointer to next row */
180 /* Even part: reverse the even part of the forward DCT. */
181 /* The rotator is sqrt(2)*c(-6). */
190 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
191 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
192 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
193 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
195 tmp0 = (d0 + d4) << CONST_BITS;
196 tmp1 = (d0 - d4) << CONST_BITS;
205 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
206 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
207 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
208 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
210 tmp0 = d4 << CONST_BITS;
215 tmp12 = -(tmp0 + tmp2);
222 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
223 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
224 tmp3 = MULTIPLY(d6, FIX(0.541196100));
226 tmp0 = (d0 + d4) << CONST_BITS;
227 tmp1 = (d0 - d4) << CONST_BITS;
236 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
237 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
238 tmp3 = MULTIPLY(d6, FIX(0.541196100));
240 tmp0 = d4 << CONST_BITS;
245 tmp12 = -(tmp0 + tmp2);
255 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
256 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
257 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
258 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
260 tmp0 = d0 << CONST_BITS;
269 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
270 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
271 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
272 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
284 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
285 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
286 tmp3 = MULTIPLY(d6, FIX(0.541196100));
288 tmp0 = d0 << CONST_BITS;
297 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
298 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
299 tmp3 = MULTIPLY(d6, FIX(0.541196100));
317 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
318 tmp2 = MULTIPLY(d2, FIX(0.541196100));
319 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
321 tmp0 = (d0 + d4) << CONST_BITS;
322 tmp1 = (d0 - d4) << CONST_BITS;
331 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
332 tmp2 = MULTIPLY(d2, FIX(0.541196100));
333 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
335 tmp0 = d4 << CONST_BITS;
340 tmp12 = -(tmp0 + tmp2);
347 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
348 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
349 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
353 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
354 tmp10 = tmp13 = d4 << CONST_BITS;
355 tmp11 = tmp12 = -tmp10;
365 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
366 tmp2 = MULTIPLY(d2, FIX(0.541196100));
367 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
369 tmp0 = d0 << CONST_BITS;
378 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
379 tmp2 = MULTIPLY(d2, FIX(0.541196100));
380 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
392 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
393 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
397 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
398 tmp10 = tmp13 = tmp11 = tmp12 = 0;
405 /* Odd part per figure 8; the matrix is unitary and hence its
406 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
417 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
422 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
424 tmp0 = MULTIPLY(d7, FIX(0.298631336));
425 tmp1 = MULTIPLY(d5, FIX(2.053119869));
426 tmp2 = MULTIPLY(d3, FIX(3.072711026));
427 tmp3 = MULTIPLY(d1, FIX(1.501321110));
428 z1 = MULTIPLY(z1, - FIX(0.899976223));
429 z2 = MULTIPLY(z2, - FIX(2.562915447));
430 z3 = MULTIPLY(z3, - FIX(1.961570560));
431 z4 = MULTIPLY(z4, - FIX(0.390180644));
443 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
446 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
448 tmp0 = MULTIPLY(d7, FIX(0.298631336));
449 tmp1 = MULTIPLY(d5, FIX(2.053119869));
450 tmp2 = MULTIPLY(d3, FIX(3.072711026));
451 z1 = MULTIPLY(d7, - FIX(0.899976223));
452 z2 = MULTIPLY(z2, - FIX(2.562915447));
453 z3 = MULTIPLY(z3, - FIX(1.961570560));
454 z4 = MULTIPLY(d5, - FIX(0.390180644));
469 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
472 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
474 tmp0 = MULTIPLY(d7, FIX(0.298631336));
475 tmp1 = MULTIPLY(d5, FIX(2.053119869));
476 tmp3 = MULTIPLY(d1, FIX(1.501321110));
477 z1 = MULTIPLY(z1, - FIX(0.899976223));
478 z2 = MULTIPLY(d5, - FIX(2.562915447));
479 z3 = MULTIPLY(d7, - FIX(1.961570560));
480 z4 = MULTIPLY(z4, - FIX(0.390180644));
492 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
493 z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
495 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
496 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
497 z1 = MULTIPLY(d7, - FIX(0.899976223));
498 z3 = MULTIPLY(d7, - FIX(1.961570560));
499 z2 = MULTIPLY(d5, - FIX(2.562915447));
500 z4 = MULTIPLY(d5, - FIX(0.390180644));
518 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
521 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
523 tmp0 = MULTIPLY(d7, FIX(0.298631336));
524 tmp2 = MULTIPLY(d3, FIX(3.072711026));
525 tmp3 = MULTIPLY(d1, FIX(1.501321110));
526 z1 = MULTIPLY(z1, - FIX(0.899976223));
527 z2 = MULTIPLY(d3, - FIX(2.562915447));
528 z3 = MULTIPLY(z3, - FIX(1.961570560));
529 z4 = MULTIPLY(d1, - FIX(0.390180644));
541 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
543 z5 = MULTIPLY(z3, FIX(1.175875602));
545 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
546 tmp2 = MULTIPLY(d3, FIX(0.509795579));
547 z1 = MULTIPLY(d7, - FIX(0.899976223));
548 z2 = MULTIPLY(d3, - FIX(2.562915447));
549 z3 = MULTIPLY(z3, - FIX2(0.785694958));
561 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
563 z5 = MULTIPLY(z1, FIX(1.175875602));
565 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
566 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
567 z1 = MULTIPLY(z1, FIX2(0.275899379));
568 z3 = MULTIPLY(d7, - FIX(1.961570560));
569 z4 = MULTIPLY(d1, - FIX(0.390180644));
578 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
579 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
580 tmp1 = MULTIPLY(d7, FIX(1.175875602));
581 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
582 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
595 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
598 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
600 tmp1 = MULTIPLY(d5, FIX(2.053119869));
601 tmp2 = MULTIPLY(d3, FIX(3.072711026));
602 tmp3 = MULTIPLY(d1, FIX(1.501321110));
603 z1 = MULTIPLY(d1, - FIX(0.899976223));
604 z2 = MULTIPLY(z2, - FIX(2.562915447));
605 z3 = MULTIPLY(d3, - FIX(1.961570560));
606 z4 = MULTIPLY(z4, - FIX(0.390180644));
618 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
620 z5 = MULTIPLY(z2, FIX(1.175875602));
622 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
623 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
624 z2 = MULTIPLY(z2, - FIX2(1.387039845));
625 z3 = MULTIPLY(d3, - FIX(1.961570560));
626 z4 = MULTIPLY(d5, - FIX(0.390180644));
638 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
640 z5 = MULTIPLY(z4, FIX(1.175875602));
642 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
643 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
644 z1 = MULTIPLY(d1, - FIX(0.899976223));
645 z2 = MULTIPLY(d5, - FIX(2.562915447));
646 z4 = MULTIPLY(z4, FIX2(0.785694958));
655 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
656 tmp0 = MULTIPLY(d5, FIX(1.175875602));
657 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
658 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
659 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
669 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
672 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
673 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
674 z1 = MULTIPLY(d1, FIX(1.061594337));
675 z2 = MULTIPLY(d3, - FIX(2.172734803));
676 z4 = MULTIPLY(z5, FIX(0.785694958));
677 z5 = MULTIPLY(z5, FIX(1.175875602));
686 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
687 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
688 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
689 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
690 tmp3 = MULTIPLY(d3, FIX(1.175875602));
697 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
698 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
699 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
700 tmp2 = MULTIPLY(d1, FIX(1.175875602));
701 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
705 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
706 tmp0 = tmp1 = tmp2 = tmp3 = 0;
712 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
714 dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
715 dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
716 dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
717 dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
718 dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
719 dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
720 dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
721 dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
723 dataptr += DCTSIZE; /* advance pointer to next row */
726 /* Pass 2: process columns. */
727 /* Note that we must descale the results by a factor of 8 == 2**3, */
728 /* and also undo the PASS1_BITS scaling. */
731 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
733 /* Columns of zeroes can be exploited in the same way as we did with rows.
734 * However, the row calculation has created many nonzero AC terms, so the
735 * simplification applies less often (typically 5% to 10% of the time).
736 * On machines with very fast multiplication, it's possible that the
737 * test takes more time than it's worth. In that case this section
738 * may be commented out.
741 d0 = dataptr[DCTSIZE*0];
742 d1 = dataptr[DCTSIZE*1];
743 d2 = dataptr[DCTSIZE*2];
744 d3 = dataptr[DCTSIZE*3];
745 d4 = dataptr[DCTSIZE*4];
746 d5 = dataptr[DCTSIZE*5];
747 d6 = dataptr[DCTSIZE*6];
748 d7 = dataptr[DCTSIZE*7];
750 /* Even part: reverse the even part of the forward DCT. */
751 /* The rotator is sqrt(2)*c(-6). */
760 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
761 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
762 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
763 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
765 tmp0 = (d0 + d4) << CONST_BITS;
766 tmp1 = (d0 - d4) << CONST_BITS;
775 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
776 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
777 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
778 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
780 tmp0 = d4 << CONST_BITS;
785 tmp12 = -(tmp0 + tmp2);
792 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
793 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
794 tmp3 = MULTIPLY(d6, FIX(0.541196100));
796 tmp0 = (d0 + d4) << CONST_BITS;
797 tmp1 = (d0 - d4) << CONST_BITS;
806 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
807 tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
808 tmp3 = MULTIPLY(d6, FIX(0.541196100));
810 tmp0 = d4 << CONST_BITS;
815 tmp12 = -(tmp0 + tmp2);
825 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
826 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
827 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
828 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
830 tmp0 = d0 << CONST_BITS;
839 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
840 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
841 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
842 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
854 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
855 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
856 tmp3 = MULTIPLY(d6, FIX(0.541196100));
858 tmp0 = d0 << CONST_BITS;
867 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
868 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
869 tmp3 = MULTIPLY(d6, FIX(0.541196100));
886 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
887 tmp2 = MULTIPLY(d2, FIX(0.541196100));
888 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
890 tmp0 = (d0 + d4) << CONST_BITS;
891 tmp1 = (d0 - d4) << CONST_BITS;
900 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
901 tmp2 = MULTIPLY(d2, FIX(0.541196100));
902 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
904 tmp0 = d4 << CONST_BITS;
909 tmp12 = -(tmp0 + tmp2);
916 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
917 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
918 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
922 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
923 tmp10 = tmp13 = d4 << CONST_BITS;
924 tmp11 = tmp12 = -tmp10;
934 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
935 tmp2 = MULTIPLY(d2, FIX(0.541196100));
936 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
938 tmp0 = d0 << CONST_BITS;
947 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
948 tmp2 = MULTIPLY(d2, FIX(0.541196100));
949 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
961 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
962 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
966 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
967 tmp10 = tmp13 = tmp11 = tmp12 = 0;
973 /* Odd part per figure 8; the matrix is unitary and hence its
974 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
984 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
989 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
991 tmp0 = MULTIPLY(d7, FIX(0.298631336));
992 tmp1 = MULTIPLY(d5, FIX(2.053119869));
993 tmp2 = MULTIPLY(d3, FIX(3.072711026));
994 tmp3 = MULTIPLY(d1, FIX(1.501321110));
995 z1 = MULTIPLY(z1, - FIX(0.899976223));
996 z2 = MULTIPLY(z2, - FIX(2.562915447));
997 z3 = MULTIPLY(z3, - FIX(1.961570560));
998 z4 = MULTIPLY(z4, - FIX(0.390180644));
1010 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1013 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1015 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1016 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1017 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1018 z1 = MULTIPLY(d7, - FIX(0.899976223));
1019 z2 = MULTIPLY(z2, - FIX(2.562915447));
1020 z3 = MULTIPLY(z3, - FIX(1.961570560));
1021 z4 = MULTIPLY(d5, - FIX(0.390180644));
1036 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1039 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1041 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1042 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1043 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1044 z1 = MULTIPLY(z1, - FIX(0.899976223));
1045 z2 = MULTIPLY(d5, - FIX(2.562915447));
1046 z3 = MULTIPLY(d7, - FIX(1.961570560));
1047 z4 = MULTIPLY(z4, - FIX(0.390180644));
1059 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1060 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1062 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1063 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1064 z1 = MULTIPLY(d7, - FIX(0.899976223));
1065 z3 = MULTIPLY(d7, - FIX(1.961570560));
1066 z2 = MULTIPLY(d5, - FIX(2.562915447));
1067 z4 = MULTIPLY(d5, - FIX(0.390180644));
1085 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1088 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1090 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1091 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1092 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1093 z1 = MULTIPLY(z1, - FIX(0.899976223));
1094 z2 = MULTIPLY(d3, - FIX(2.562915447));
1095 z3 = MULTIPLY(z3, - FIX(1.961570560));
1096 z4 = MULTIPLY(d1, - FIX(0.390180644));
1108 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1110 z5 = MULTIPLY(z3, FIX(1.175875602));
1112 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1113 z1 = MULTIPLY(d7, - FIX(0.899976223));
1114 tmp2 = MULTIPLY(d3, FIX(0.509795579));
1115 z2 = MULTIPLY(d3, - FIX(2.562915447));
1116 z3 = MULTIPLY(z3, - FIX2(0.785694958));
1128 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1130 z5 = MULTIPLY(z1, FIX(1.175875602));
1132 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1133 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1134 z1 = MULTIPLY(z1, FIX2(0.275899379));
1135 z3 = MULTIPLY(d7, - FIX(1.961570560));
1136 z4 = MULTIPLY(d1, - FIX(0.390180644));
1145 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1146 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1147 tmp1 = MULTIPLY(d7, FIX(1.175875602));
1148 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1149 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1162 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1165 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1167 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1168 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1169 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1170 z1 = MULTIPLY(d1, - FIX(0.899976223));
1171 z2 = MULTIPLY(z2, - FIX(2.562915447));
1172 z3 = MULTIPLY(d3, - FIX(1.961570560));
1173 z4 = MULTIPLY(z4, - FIX(0.390180644));
1185 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1187 z5 = MULTIPLY(z2, FIX(1.175875602));
1189 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1190 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1191 z2 = MULTIPLY(z2, - FIX2(1.387039845));
1192 z3 = MULTIPLY(d3, - FIX(1.961570560));
1193 z4 = MULTIPLY(d5, - FIX(0.390180644));
1205 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1207 z5 = MULTIPLY(z4, FIX(1.175875602));
1209 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1210 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1211 z1 = MULTIPLY(d1, - FIX(0.899976223));
1212 z2 = MULTIPLY(d5, - FIX(2.562915447));
1213 z4 = MULTIPLY(z4, FIX2(0.785694958));
1222 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1223 tmp0 = MULTIPLY(d5, FIX(1.175875602));
1224 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1225 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1226 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1236 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1239 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1240 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1241 z1 = MULTIPLY(d1, FIX(1.061594337));
1242 z2 = MULTIPLY(d3, - FIX(2.172734803));
1243 z4 = MULTIPLY(z5, FIX(0.785694958));
1244 z5 = MULTIPLY(z5, FIX(1.175875602));
1253 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1254 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1255 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1256 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1257 tmp3 = MULTIPLY(d3, FIX(1.175875602));
1264 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1265 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1266 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1267 tmp2 = MULTIPLY(d1, FIX(1.175875602));
1268 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1272 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1273 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1279 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1281 dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1282 CONST_BITS+PASS1_BITS+3);
1283 dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1284 CONST_BITS+PASS1_BITS+3);
1285 dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1286 CONST_BITS+PASS1_BITS+3);
1287 dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1288 CONST_BITS+PASS1_BITS+3);
1289 dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1290 CONST_BITS+PASS1_BITS+3);
1291 dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1292 CONST_BITS+PASS1_BITS+3);
1293 dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1294 CONST_BITS+PASS1_BITS+3);
1295 dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1296 CONST_BITS+PASS1_BITS+3);
1298 dataptr++; /* advance pointer to next column */