1 /*****************************************************************************
3 *****************************************************************************
4 * Copyright (C) 1999, 2000 VideoLAN
5 * $Id: idct.c,v 1.10 2001/05/30 17:03:12 sam Exp $
7 * Authors: Gaƫl Hendryckx <jimmy@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
27 /*****************************************************************************
29 *****************************************************************************/
41 #include "video_output.h"
43 #include "video_decoder.h"
47 #include "vdec_block.h"
48 #include "vdec_idct.h"
50 /*****************************************************************************
51 * Local and extern prototypes.
52 *****************************************************************************/
53 static void idct_getfunctions( function_list_t * p_function_list );
54 static int idct_Probe ( probedata_t *p_data );
55 static void vdec_NormScan ( u8 ppi_scan[2][64] );
57 /*****************************************************************************
58 * Build configuration tree.
59 *****************************************************************************/
61 ADD_WINDOW( "Configuration for IDCT module" )
62 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
66 p_module->i_capabilities = MODULE_CAPABILITY_NULL
67 | MODULE_CAPABILITY_IDCT;
68 p_module->psz_longname = "IDCT module";
72 idct_getfunctions( &p_module->p_functions->idct );
75 MODULE_DEACTIVATE_START
76 MODULE_DEACTIVATE_STOP
78 /* Following functions are local */
80 /*****************************************************************************
81 * Functions exported as capabilities. They are declared as static so that
82 * we don't pollute the namespace too much.
83 *****************************************************************************/
84 static void idct_getfunctions( function_list_t * p_function_list )
86 p_function_list->pf_probe = idct_Probe;
87 #define F p_function_list->functions.idct
88 F.pf_idct_init = _M( vdec_InitIDCT );
89 F.pf_sparse_idct = _M( vdec_SparseIDCT );
90 F.pf_idct = _M( vdec_IDCT );
91 F.pf_norm_scan = vdec_NormScan;
92 F.pf_decode_init = _M( vdec_InitDecode );
93 F.pf_decode_mb_c = _M( vdec_DecodeMacroblockC );
94 F.pf_decode_mb_bw = _M( vdec_DecodeMacroblockBW );
98 /*****************************************************************************
99 * idct_Probe: returns a preference score
100 *****************************************************************************/
101 static int idct_Probe( probedata_t *p_data )
103 if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
108 /* This plugin always works */
112 /*****************************************************************************
113 * vdec_NormScan : Unused in this IDCT
114 *****************************************************************************/
115 static void vdec_NormScan( u8 ppi_scan[2][64] )
119 /*****************************************************************************
120 * vdec_IDCT : IDCT function for normal matrices
121 *****************************************************************************/
122 void _M( vdec_IDCT )( vdec_thread_t * p_vdec, dctelem_t * p_block,
125 s32 tmp0, tmp1, tmp2, tmp3;
126 s32 tmp10, tmp11, tmp12, tmp13;
127 s32 z1, z2, z3, z4, z5;
128 s32 d0, d1, d2, d3, d4, d5, d6, d7;
134 /* Pass 1: process rows. */
135 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
136 /* furthermore, we scale the results by 2**PASS1_BITS. */
140 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
142 /* Due to quantization, we will usually find that many of the input
143 * coefficients are zero, especially the AC terms. We can exploit this
144 * by short-circuiting the IDCT calculation for any row in which all
145 * the AC terms are zero. In that case each output is equal to the
146 * DC coefficient (with scale factor as needed).
147 * With typical images and quantization tables, half or more of the
148 * row DCT calculations can be simplified this way.
151 register int * idataptr = (int*)dataptr;
154 if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
156 /* AC terms all zero */
159 /* Compute a 32 bit value to assign. */
160 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
161 register int v = (dcval & 0xffff) | (dcval << 16);
169 dataptr += DCTSIZE; /* advance pointer to next row */
179 /* Even part: reverse the even part of the forward DCT. */
180 /* The rotator is sqrt(2)*c(-6). */
189 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
190 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
191 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
192 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
194 tmp0 = (d0 + d4) << CONST_BITS;
195 tmp1 = (d0 - d4) << CONST_BITS;
204 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
205 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
206 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
207 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
209 tmp0 = d4 << CONST_BITS;
214 tmp12 = -(tmp0 + tmp2);
221 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
222 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
223 tmp3 = MULTIPLY(d6, FIX(0.541196100));
225 tmp0 = (d0 + d4) << CONST_BITS;
226 tmp1 = (d0 - d4) << CONST_BITS;
235 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
236 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
237 tmp3 = MULTIPLY(d6, FIX(0.541196100));
239 tmp0 = d4 << CONST_BITS;
244 tmp12 = -(tmp0 + tmp2);
254 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
255 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
256 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
257 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
259 tmp0 = d0 << CONST_BITS;
268 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
269 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
270 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
271 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
283 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
284 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
285 tmp3 = MULTIPLY(d6, FIX(0.541196100));
287 tmp0 = d0 << CONST_BITS;
296 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
297 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
298 tmp3 = MULTIPLY(d6, FIX(0.541196100));
316 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
317 tmp2 = MULTIPLY(d2, FIX(0.541196100));
318 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
320 tmp0 = (d0 + d4) << CONST_BITS;
321 tmp1 = (d0 - d4) << CONST_BITS;
330 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
331 tmp2 = MULTIPLY(d2, FIX(0.541196100));
332 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
334 tmp0 = d4 << CONST_BITS;
339 tmp12 = -(tmp0 + tmp2);
346 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
347 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
348 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
352 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
353 tmp10 = tmp13 = d4 << CONST_BITS;
354 tmp11 = tmp12 = -tmp10;
364 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
365 tmp2 = MULTIPLY(d2, FIX(0.541196100));
366 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
368 tmp0 = d0 << CONST_BITS;
377 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
378 tmp2 = MULTIPLY(d2, FIX(0.541196100));
379 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
391 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
392 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
396 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
397 tmp10 = tmp13 = tmp11 = tmp12 = 0;
404 /* Odd part per figure 8; the matrix is unitary and hence its
405 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
416 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
421 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
423 tmp0 = MULTIPLY(d7, FIX(0.298631336));
424 tmp1 = MULTIPLY(d5, FIX(2.053119869));
425 tmp2 = MULTIPLY(d3, FIX(3.072711026));
426 tmp3 = MULTIPLY(d1, FIX(1.501321110));
427 z1 = MULTIPLY(z1, - FIX(0.899976223));
428 z2 = MULTIPLY(z2, - FIX(2.562915447));
429 z3 = MULTIPLY(z3, - FIX(1.961570560));
430 z4 = MULTIPLY(z4, - FIX(0.390180644));
442 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
445 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
447 tmp0 = MULTIPLY(d7, FIX(0.298631336));
448 tmp1 = MULTIPLY(d5, FIX(2.053119869));
449 tmp2 = MULTIPLY(d3, FIX(3.072711026));
450 z1 = MULTIPLY(d7, - FIX(0.899976223));
451 z2 = MULTIPLY(z2, - FIX(2.562915447));
452 z3 = MULTIPLY(z3, - FIX(1.961570560));
453 z4 = MULTIPLY(d5, - FIX(0.390180644));
468 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
471 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
473 tmp0 = MULTIPLY(d7, FIX(0.298631336));
474 tmp1 = MULTIPLY(d5, FIX(2.053119869));
475 tmp3 = MULTIPLY(d1, FIX(1.501321110));
476 z1 = MULTIPLY(z1, - FIX(0.899976223));
477 z2 = MULTIPLY(d5, - FIX(2.562915447));
478 z3 = MULTIPLY(d7, - FIX(1.961570560));
479 z4 = MULTIPLY(z4, - FIX(0.390180644));
491 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
492 z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
494 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
495 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
496 z1 = MULTIPLY(d7, - FIX(0.899976223));
497 z3 = MULTIPLY(d7, - FIX(1.961570560));
498 z2 = MULTIPLY(d5, - FIX(2.562915447));
499 z4 = MULTIPLY(d5, - FIX(0.390180644));
517 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
520 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
522 tmp0 = MULTIPLY(d7, FIX(0.298631336));
523 tmp2 = MULTIPLY(d3, FIX(3.072711026));
524 tmp3 = MULTIPLY(d1, FIX(1.501321110));
525 z1 = MULTIPLY(z1, - FIX(0.899976223));
526 z2 = MULTIPLY(d3, - FIX(2.562915447));
527 z3 = MULTIPLY(z3, - FIX(1.961570560));
528 z4 = MULTIPLY(d1, - FIX(0.390180644));
540 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
542 z5 = MULTIPLY(z3, FIX(1.175875602));
544 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
545 tmp2 = MULTIPLY(d3, FIX(0.509795579));
546 z1 = MULTIPLY(d7, - FIX(0.899976223));
547 z2 = MULTIPLY(d3, - FIX(2.562915447));
548 z3 = MULTIPLY(z3, - FIX2(0.785694958));
560 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
562 z5 = MULTIPLY(z1, FIX(1.175875602));
564 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
565 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
566 z1 = MULTIPLY(z1, FIX2(0.275899379));
567 z3 = MULTIPLY(d7, - FIX(1.961570560));
568 z4 = MULTIPLY(d1, - FIX(0.390180644));
577 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
578 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
579 tmp1 = MULTIPLY(d7, FIX(1.175875602));
580 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
581 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
594 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
597 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
599 tmp1 = MULTIPLY(d5, FIX(2.053119869));
600 tmp2 = MULTIPLY(d3, FIX(3.072711026));
601 tmp3 = MULTIPLY(d1, FIX(1.501321110));
602 z1 = MULTIPLY(d1, - FIX(0.899976223));
603 z2 = MULTIPLY(z2, - FIX(2.562915447));
604 z3 = MULTIPLY(d3, - FIX(1.961570560));
605 z4 = MULTIPLY(z4, - FIX(0.390180644));
617 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
619 z5 = MULTIPLY(z2, FIX(1.175875602));
621 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
622 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
623 z2 = MULTIPLY(z2, - FIX2(1.387039845));
624 z3 = MULTIPLY(d3, - FIX(1.961570560));
625 z4 = MULTIPLY(d5, - FIX(0.390180644));
637 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
639 z5 = MULTIPLY(z4, FIX(1.175875602));
641 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
642 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
643 z1 = MULTIPLY(d1, - FIX(0.899976223));
644 z2 = MULTIPLY(d5, - FIX(2.562915447));
645 z4 = MULTIPLY(z4, FIX2(0.785694958));
654 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
655 tmp0 = MULTIPLY(d5, FIX(1.175875602));
656 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
657 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
658 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
668 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
671 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
672 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
673 z1 = MULTIPLY(d1, FIX(1.061594337));
674 z2 = MULTIPLY(d3, - FIX(2.172734803));
675 z4 = MULTIPLY(z5, FIX(0.785694958));
676 z5 = MULTIPLY(z5, FIX(1.175875602));
685 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
686 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
687 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
688 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
689 tmp3 = MULTIPLY(d3, FIX(1.175875602));
696 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
697 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
698 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
699 tmp2 = MULTIPLY(d1, FIX(1.175875602));
700 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
704 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
705 tmp0 = tmp1 = tmp2 = tmp3 = 0;
711 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
713 dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
714 dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
715 dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
716 dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
717 dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
718 dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
719 dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
720 dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
722 dataptr += DCTSIZE; /* advance pointer to next row */
725 /* Pass 2: process columns. */
726 /* Note that we must descale the results by a factor of 8 == 2**3, */
727 /* and also undo the PASS1_BITS scaling. */
730 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
732 /* Columns of zeroes can be exploited in the same way as we did with rows.
733 * However, the row calculation has created many nonzero AC terms, so the
734 * simplification applies less often (typically 5% to 10% of the time).
735 * On machines with very fast multiplication, it's possible that the
736 * test takes more time than it's worth. In that case this section
737 * may be commented out.
740 d0 = dataptr[DCTSIZE*0];
741 d1 = dataptr[DCTSIZE*1];
742 d2 = dataptr[DCTSIZE*2];
743 d3 = dataptr[DCTSIZE*3];
744 d4 = dataptr[DCTSIZE*4];
745 d5 = dataptr[DCTSIZE*5];
746 d6 = dataptr[DCTSIZE*6];
747 d7 = dataptr[DCTSIZE*7];
749 /* Even part: reverse the even part of the forward DCT. */
750 /* The rotator is sqrt(2)*c(-6). */
759 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
760 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
761 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
762 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
764 tmp0 = (d0 + d4) << CONST_BITS;
765 tmp1 = (d0 - d4) << CONST_BITS;
774 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
775 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
776 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
777 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
779 tmp0 = d4 << CONST_BITS;
784 tmp12 = -(tmp0 + tmp2);
791 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
792 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
793 tmp3 = MULTIPLY(d6, FIX(0.541196100));
795 tmp0 = (d0 + d4) << CONST_BITS;
796 tmp1 = (d0 - d4) << CONST_BITS;
805 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
806 tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
807 tmp3 = MULTIPLY(d6, FIX(0.541196100));
809 tmp0 = d4 << CONST_BITS;
814 tmp12 = -(tmp0 + tmp2);
824 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
825 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
826 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
827 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
829 tmp0 = d0 << CONST_BITS;
838 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
839 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
840 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
841 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
853 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
854 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
855 tmp3 = MULTIPLY(d6, FIX(0.541196100));
857 tmp0 = d0 << CONST_BITS;
866 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
867 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
868 tmp3 = MULTIPLY(d6, FIX(0.541196100));
885 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
886 tmp2 = MULTIPLY(d2, FIX(0.541196100));
887 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
889 tmp0 = (d0 + d4) << CONST_BITS;
890 tmp1 = (d0 - d4) << CONST_BITS;
899 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
900 tmp2 = MULTIPLY(d2, FIX(0.541196100));
901 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
903 tmp0 = d4 << CONST_BITS;
908 tmp12 = -(tmp0 + tmp2);
915 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
916 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
917 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
921 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
922 tmp10 = tmp13 = d4 << CONST_BITS;
923 tmp11 = tmp12 = -tmp10;
933 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
934 tmp2 = MULTIPLY(d2, FIX(0.541196100));
935 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
937 tmp0 = d0 << CONST_BITS;
946 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
947 tmp2 = MULTIPLY(d2, FIX(0.541196100));
948 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
960 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
961 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
965 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
966 tmp10 = tmp13 = tmp11 = tmp12 = 0;
972 /* Odd part per figure 8; the matrix is unitary and hence its
973 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
983 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
988 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
990 tmp0 = MULTIPLY(d7, FIX(0.298631336));
991 tmp1 = MULTIPLY(d5, FIX(2.053119869));
992 tmp2 = MULTIPLY(d3, FIX(3.072711026));
993 tmp3 = MULTIPLY(d1, FIX(1.501321110));
994 z1 = MULTIPLY(z1, - FIX(0.899976223));
995 z2 = MULTIPLY(z2, - FIX(2.562915447));
996 z3 = MULTIPLY(z3, - FIX(1.961570560));
997 z4 = MULTIPLY(z4, - FIX(0.390180644));
1009 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1012 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1014 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1015 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1016 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1017 z1 = MULTIPLY(d7, - FIX(0.899976223));
1018 z2 = MULTIPLY(z2, - FIX(2.562915447));
1019 z3 = MULTIPLY(z3, - FIX(1.961570560));
1020 z4 = MULTIPLY(d5, - FIX(0.390180644));
1035 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1038 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1040 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1041 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1042 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1043 z1 = MULTIPLY(z1, - FIX(0.899976223));
1044 z2 = MULTIPLY(d5, - FIX(2.562915447));
1045 z3 = MULTIPLY(d7, - FIX(1.961570560));
1046 z4 = MULTIPLY(z4, - FIX(0.390180644));
1058 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1059 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1061 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1062 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1063 z1 = MULTIPLY(d7, - FIX(0.899976223));
1064 z3 = MULTIPLY(d7, - FIX(1.961570560));
1065 z2 = MULTIPLY(d5, - FIX(2.562915447));
1066 z4 = MULTIPLY(d5, - FIX(0.390180644));
1084 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1087 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1089 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1090 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1091 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1092 z1 = MULTIPLY(z1, - FIX(0.899976223));
1093 z2 = MULTIPLY(d3, - FIX(2.562915447));
1094 z3 = MULTIPLY(z3, - FIX(1.961570560));
1095 z4 = MULTIPLY(d1, - FIX(0.390180644));
1107 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1109 z5 = MULTIPLY(z3, FIX(1.175875602));
1111 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1112 z1 = MULTIPLY(d7, - FIX(0.899976223));
1113 tmp2 = MULTIPLY(d3, FIX(0.509795579));
1114 z2 = MULTIPLY(d3, - FIX(2.562915447));
1115 z3 = MULTIPLY(z3, - FIX2(0.785694958));
1127 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1129 z5 = MULTIPLY(z1, FIX(1.175875602));
1131 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1132 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1133 z1 = MULTIPLY(z1, FIX2(0.275899379));
1134 z3 = MULTIPLY(d7, - FIX(1.961570560));
1135 z4 = MULTIPLY(d1, - FIX(0.390180644));
1144 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1145 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1146 tmp1 = MULTIPLY(d7, FIX(1.175875602));
1147 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1148 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1161 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1164 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1166 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1167 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1168 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1169 z1 = MULTIPLY(d1, - FIX(0.899976223));
1170 z2 = MULTIPLY(z2, - FIX(2.562915447));
1171 z3 = MULTIPLY(d3, - FIX(1.961570560));
1172 z4 = MULTIPLY(z4, - FIX(0.390180644));
1184 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1186 z5 = MULTIPLY(z2, FIX(1.175875602));
1188 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1189 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1190 z2 = MULTIPLY(z2, - FIX2(1.387039845));
1191 z3 = MULTIPLY(d3, - FIX(1.961570560));
1192 z4 = MULTIPLY(d5, - FIX(0.390180644));
1204 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1206 z5 = MULTIPLY(z4, FIX(1.175875602));
1208 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1209 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1210 z1 = MULTIPLY(d1, - FIX(0.899976223));
1211 z2 = MULTIPLY(d5, - FIX(2.562915447));
1212 z4 = MULTIPLY(z4, FIX2(0.785694958));
1221 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1222 tmp0 = MULTIPLY(d5, FIX(1.175875602));
1223 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1224 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1225 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1235 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1238 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1239 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1240 z1 = MULTIPLY(d1, FIX(1.061594337));
1241 z2 = MULTIPLY(d3, - FIX(2.172734803));
1242 z4 = MULTIPLY(z5, FIX(0.785694958));
1243 z5 = MULTIPLY(z5, FIX(1.175875602));
1252 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1253 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1254 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1255 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1256 tmp3 = MULTIPLY(d3, FIX(1.175875602));
1263 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1264 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1265 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1266 tmp2 = MULTIPLY(d1, FIX(1.175875602));
1267 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1271 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1272 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1278 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1280 dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1281 CONST_BITS+PASS1_BITS+3);
1282 dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1283 CONST_BITS+PASS1_BITS+3);
1284 dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1285 CONST_BITS+PASS1_BITS+3);
1286 dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1287 CONST_BITS+PASS1_BITS+3);
1288 dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1289 CONST_BITS+PASS1_BITS+3);
1290 dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1291 CONST_BITS+PASS1_BITS+3);
1292 dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1293 CONST_BITS+PASS1_BITS+3);
1294 dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1295 CONST_BITS+PASS1_BITS+3);
1297 dataptr++; /* advance pointer to next column */