1 /*****************************************************************************
3 *****************************************************************************
4 * Copyright (C) 1999, 2000 VideoLAN
5 * $Id: idct.c,v 1.9 2001/05/06 04:32:02 sam Exp $
7 * Authors: Gaƫl Hendryckx <jimmy@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
24 #define MODULE_NAME idct
25 #include "modules_inner.h"
27 /*****************************************************************************
29 *****************************************************************************/
41 #include "video_output.h"
43 #include "video_decoder.h"
47 #include "vdec_block.h"
48 #include "vdec_idct.h"
50 /*****************************************************************************
51 * Local and extern prototypes.
52 *****************************************************************************/
53 static void idct_getfunctions( function_list_t * p_function_list );
54 static int idct_Probe ( probedata_t *p_data );
55 static void vdec_NormScan ( u8 ppi_scan[2][64] );
58 /*****************************************************************************
59 * Build configuration tree.
60 *****************************************************************************/
62 ADD_WINDOW( "Configuration for IDCT module" )
63 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
66 /*****************************************************************************
67 * InitModule: get the module structure and configuration.
68 *****************************************************************************
69 * We have to fill psz_name, psz_longname and psz_version. These variables
70 * will be strdup()ed later by the main application because the module can
71 * be unloaded later to save memory, and we want to be able to access this
72 * data even after the module has been unloaded.
73 *****************************************************************************/
76 p_module->psz_name = MODULE_STRING;
77 p_module->psz_longname = "IDCT module";
78 p_module->psz_version = VERSION;
80 p_module->i_capabilities = MODULE_CAPABILITY_NULL
81 | MODULE_CAPABILITY_IDCT;
86 /*****************************************************************************
87 * ActivateModule: set the module to an usable state.
88 *****************************************************************************
89 * This function fills the capability functions and the configuration
90 * structure. Once ActivateModule() has been called, the i_usage can
91 * be set to 0 and calls to NeedModule() be made to increment it. To unload
92 * the module, one has to wait until i_usage == 0 and call DeactivateModule().
93 *****************************************************************************/
96 p_module->p_functions = malloc( sizeof( module_functions_t ) );
97 if( p_module->p_functions == NULL )
102 idct_getfunctions( &p_module->p_functions->idct );
104 p_module->p_config = p_config;
109 /*****************************************************************************
110 * DeactivateModule: make sure the module can be unloaded.
111 *****************************************************************************
112 * This function must only be called when i_usage == 0. If it successfully
113 * returns, i_usage can be set to -1 and the module unloaded. Be careful to
114 * lock usage_lock during the whole process.
115 *****************************************************************************/
118 free( p_module->p_functions );
123 /* Following functions are local */
125 /*****************************************************************************
126 * Functions exported as capabilities. They are declared as static so that
127 * we don't pollute the namespace too much.
128 *****************************************************************************/
129 static void idct_getfunctions( function_list_t * p_function_list )
131 p_function_list->pf_probe = idct_Probe;
132 #define F p_function_list->functions.idct
133 F.pf_idct_init = _M( vdec_InitIDCT );
134 F.pf_sparse_idct = _M( vdec_SparseIDCT );
135 F.pf_idct = _M( vdec_IDCT );
136 F.pf_norm_scan = vdec_NormScan;
137 F.pf_vdec_init = _M( vdec_Init );
138 F.pf_decode_mb_c = _M( vdec_DecodeMacroblockC );
139 F.pf_decode_mb_bw = _M( vdec_DecodeMacroblockBW );
143 /*****************************************************************************
144 * idct_Probe: returns a preference score
145 *****************************************************************************/
146 static int idct_Probe( probedata_t *p_data )
148 if( TestMethod( IDCT_METHOD_VAR, "idct" ) )
153 /* This plugin always works */
157 /*****************************************************************************
158 * vdec_NormScan : Unused in this IDCT
159 *****************************************************************************/
160 static void vdec_NormScan( u8 ppi_scan[2][64] )
164 /*****************************************************************************
165 * vdec_IDCT : IDCT function for normal matrices
166 *****************************************************************************/
167 void _M( vdec_IDCT )( vdec_thread_t * p_vdec, dctelem_t * p_block,
170 s32 tmp0, tmp1, tmp2, tmp3;
171 s32 tmp10, tmp11, tmp12, tmp13;
172 s32 z1, z2, z3, z4, z5;
173 s32 d0, d1, d2, d3, d4, d5, d6, d7;
179 /* Pass 1: process rows. */
180 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
181 /* furthermore, we scale the results by 2**PASS1_BITS. */
185 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
187 /* Due to quantization, we will usually find that many of the input
188 * coefficients are zero, especially the AC terms. We can exploit this
189 * by short-circuiting the IDCT calculation for any row in which all
190 * the AC terms are zero. In that case each output is equal to the
191 * DC coefficient (with scale factor as needed).
192 * With typical images and quantization tables, half or more of the
193 * row DCT calculations can be simplified this way.
196 register int * idataptr = (int*)dataptr;
199 if ( (d1 == 0) && ((idataptr[1] | idataptr[2] | idataptr[3]) == 0) )
201 /* AC terms all zero */
204 /* Compute a 32 bit value to assign. */
205 dctelem_t dcval = (dctelem_t) (d0 << PASS1_BITS);
206 register int v = (dcval & 0xffff) | (dcval << 16);
214 dataptr += DCTSIZE; /* advance pointer to next row */
224 /* Even part: reverse the even part of the forward DCT. */
225 /* The rotator is sqrt(2)*c(-6). */
234 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
235 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
236 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
237 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
239 tmp0 = (d0 + d4) << CONST_BITS;
240 tmp1 = (d0 - d4) << CONST_BITS;
249 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
250 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
251 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
252 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
254 tmp0 = d4 << CONST_BITS;
259 tmp12 = -(tmp0 + tmp2);
266 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
267 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
268 tmp3 = MULTIPLY(d6, FIX(0.541196100));
270 tmp0 = (d0 + d4) << CONST_BITS;
271 tmp1 = (d0 - d4) << CONST_BITS;
280 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
281 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
282 tmp3 = MULTIPLY(d6, FIX(0.541196100));
284 tmp0 = d4 << CONST_BITS;
289 tmp12 = -(tmp0 + tmp2);
299 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
300 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
301 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
302 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
304 tmp0 = d0 << CONST_BITS;
313 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
314 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
315 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
316 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
328 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
329 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
330 tmp3 = MULTIPLY(d6, FIX(0.541196100));
332 tmp0 = d0 << CONST_BITS;
341 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
342 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
343 tmp3 = MULTIPLY(d6, FIX(0.541196100));
361 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
362 tmp2 = MULTIPLY(d2, FIX(0.541196100));
363 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
365 tmp0 = (d0 + d4) << CONST_BITS;
366 tmp1 = (d0 - d4) << CONST_BITS;
375 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
376 tmp2 = MULTIPLY(d2, FIX(0.541196100));
377 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
379 tmp0 = d4 << CONST_BITS;
384 tmp12 = -(tmp0 + tmp2);
391 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
392 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
393 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
397 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
398 tmp10 = tmp13 = d4 << CONST_BITS;
399 tmp11 = tmp12 = -tmp10;
409 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
410 tmp2 = MULTIPLY(d2, FIX(0.541196100));
411 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
413 tmp0 = d0 << CONST_BITS;
422 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
423 tmp2 = MULTIPLY(d2, FIX(0.541196100));
424 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
436 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
437 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
441 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
442 tmp10 = tmp13 = tmp11 = tmp12 = 0;
449 /* Odd part per figure 8; the matrix is unitary and hence its
450 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
461 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
466 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
468 tmp0 = MULTIPLY(d7, FIX(0.298631336));
469 tmp1 = MULTIPLY(d5, FIX(2.053119869));
470 tmp2 = MULTIPLY(d3, FIX(3.072711026));
471 tmp3 = MULTIPLY(d1, FIX(1.501321110));
472 z1 = MULTIPLY(z1, - FIX(0.899976223));
473 z2 = MULTIPLY(z2, - FIX(2.562915447));
474 z3 = MULTIPLY(z3, - FIX(1.961570560));
475 z4 = MULTIPLY(z4, - FIX(0.390180644));
487 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
490 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
492 tmp0 = MULTIPLY(d7, FIX(0.298631336));
493 tmp1 = MULTIPLY(d5, FIX(2.053119869));
494 tmp2 = MULTIPLY(d3, FIX(3.072711026));
495 z1 = MULTIPLY(d7, - FIX(0.899976223));
496 z2 = MULTIPLY(z2, - FIX(2.562915447));
497 z3 = MULTIPLY(z3, - FIX(1.961570560));
498 z4 = MULTIPLY(d5, - FIX(0.390180644));
513 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
516 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
518 tmp0 = MULTIPLY(d7, FIX(0.298631336));
519 tmp1 = MULTIPLY(d5, FIX(2.053119869));
520 tmp3 = MULTIPLY(d1, FIX(1.501321110));
521 z1 = MULTIPLY(z1, - FIX(0.899976223));
522 z2 = MULTIPLY(d5, - FIX(2.562915447));
523 z3 = MULTIPLY(d7, - FIX(1.961570560));
524 z4 = MULTIPLY(z4, - FIX(0.390180644));
536 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
537 z5 = MULTIPLY(d7 + d5, FIX(1.175875602));
539 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
540 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
541 z1 = MULTIPLY(d7, - FIX(0.899976223));
542 z3 = MULTIPLY(d7, - FIX(1.961570560));
543 z2 = MULTIPLY(d5, - FIX(2.562915447));
544 z4 = MULTIPLY(d5, - FIX(0.390180644));
562 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
565 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
567 tmp0 = MULTIPLY(d7, FIX(0.298631336));
568 tmp2 = MULTIPLY(d3, FIX(3.072711026));
569 tmp3 = MULTIPLY(d1, FIX(1.501321110));
570 z1 = MULTIPLY(z1, - FIX(0.899976223));
571 z2 = MULTIPLY(d3, - FIX(2.562915447));
572 z3 = MULTIPLY(z3, - FIX(1.961570560));
573 z4 = MULTIPLY(d1, - FIX(0.390180644));
585 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
587 z5 = MULTIPLY(z3, FIX(1.175875602));
589 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
590 tmp2 = MULTIPLY(d3, FIX(0.509795579));
591 z1 = MULTIPLY(d7, - FIX(0.899976223));
592 z2 = MULTIPLY(d3, - FIX(2.562915447));
593 z3 = MULTIPLY(z3, - FIX2(0.785694958));
605 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
607 z5 = MULTIPLY(z1, FIX(1.175875602));
609 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
610 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
611 z1 = MULTIPLY(z1, FIX2(0.275899379));
612 z3 = MULTIPLY(d7, - FIX(1.961570560));
613 z4 = MULTIPLY(d1, - FIX(0.390180644));
622 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
623 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
624 tmp1 = MULTIPLY(d7, FIX(1.175875602));
625 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
626 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
639 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
642 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
644 tmp1 = MULTIPLY(d5, FIX(2.053119869));
645 tmp2 = MULTIPLY(d3, FIX(3.072711026));
646 tmp3 = MULTIPLY(d1, FIX(1.501321110));
647 z1 = MULTIPLY(d1, - FIX(0.899976223));
648 z2 = MULTIPLY(z2, - FIX(2.562915447));
649 z3 = MULTIPLY(d3, - FIX(1.961570560));
650 z4 = MULTIPLY(z4, - FIX(0.390180644));
662 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
664 z5 = MULTIPLY(z2, FIX(1.175875602));
666 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
667 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
668 z2 = MULTIPLY(z2, - FIX2(1.387039845));
669 z3 = MULTIPLY(d3, - FIX(1.961570560));
670 z4 = MULTIPLY(d5, - FIX(0.390180644));
682 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
684 z5 = MULTIPLY(z4, FIX(1.175875602));
686 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
687 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
688 z1 = MULTIPLY(d1, - FIX(0.899976223));
689 z2 = MULTIPLY(d5, - FIX(2.562915447));
690 z4 = MULTIPLY(z4, FIX2(0.785694958));
699 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
700 tmp0 = MULTIPLY(d5, FIX(1.175875602));
701 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
702 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
703 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
713 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
716 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
717 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
718 z1 = MULTIPLY(d1, FIX(1.061594337));
719 z2 = MULTIPLY(d3, - FIX(2.172734803));
720 z4 = MULTIPLY(z5, FIX(0.785694958));
721 z5 = MULTIPLY(z5, FIX(1.175875602));
730 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
731 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
732 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
733 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
734 tmp3 = MULTIPLY(d3, FIX(1.175875602));
741 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
742 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
743 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
744 tmp2 = MULTIPLY(d1, FIX(1.175875602));
745 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
749 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
750 tmp0 = tmp1 = tmp2 = tmp3 = 0;
756 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
758 dataptr[0] = (dctelem_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
759 dataptr[7] = (dctelem_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
760 dataptr[1] = (dctelem_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
761 dataptr[6] = (dctelem_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
762 dataptr[2] = (dctelem_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
763 dataptr[5] = (dctelem_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
764 dataptr[3] = (dctelem_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
765 dataptr[4] = (dctelem_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
767 dataptr += DCTSIZE; /* advance pointer to next row */
770 /* Pass 2: process columns. */
771 /* Note that we must descale the results by a factor of 8 == 2**3, */
772 /* and also undo the PASS1_BITS scaling. */
775 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--)
777 /* Columns of zeroes can be exploited in the same way as we did with rows.
778 * However, the row calculation has created many nonzero AC terms, so the
779 * simplification applies less often (typically 5% to 10% of the time).
780 * On machines with very fast multiplication, it's possible that the
781 * test takes more time than it's worth. In that case this section
782 * may be commented out.
785 d0 = dataptr[DCTSIZE*0];
786 d1 = dataptr[DCTSIZE*1];
787 d2 = dataptr[DCTSIZE*2];
788 d3 = dataptr[DCTSIZE*3];
789 d4 = dataptr[DCTSIZE*4];
790 d5 = dataptr[DCTSIZE*5];
791 d6 = dataptr[DCTSIZE*6];
792 d7 = dataptr[DCTSIZE*7];
794 /* Even part: reverse the even part of the forward DCT. */
795 /* The rotator is sqrt(2)*c(-6). */
804 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
805 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
806 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
807 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
809 tmp0 = (d0 + d4) << CONST_BITS;
810 tmp1 = (d0 - d4) << CONST_BITS;
819 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
820 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
821 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
822 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
824 tmp0 = d4 << CONST_BITS;
829 tmp12 = -(tmp0 + tmp2);
836 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
837 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
838 tmp3 = MULTIPLY(d6, FIX(0.541196100));
840 tmp0 = (d0 + d4) << CONST_BITS;
841 tmp1 = (d0 - d4) << CONST_BITS;
850 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
851 tmp2 = MULTIPLY(d6, -FIX2(1.306562965));
852 tmp3 = MULTIPLY(d6, FIX(0.541196100));
854 tmp0 = d4 << CONST_BITS;
859 tmp12 = -(tmp0 + tmp2);
869 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
870 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
871 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
872 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
874 tmp0 = d0 << CONST_BITS;
883 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
884 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
885 tmp2 = z1 + MULTIPLY(d6, - FIX(1.847759065));
886 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
898 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
899 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
900 tmp3 = MULTIPLY(d6, FIX(0.541196100));
902 tmp0 = d0 << CONST_BITS;
911 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
912 tmp2 = MULTIPLY(d6, - FIX2(1.306562965));
913 tmp3 = MULTIPLY(d6, FIX(0.541196100));
930 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
931 tmp2 = MULTIPLY(d2, FIX(0.541196100));
932 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
934 tmp0 = (d0 + d4) << CONST_BITS;
935 tmp1 = (d0 - d4) << CONST_BITS;
944 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
945 tmp2 = MULTIPLY(d2, FIX(0.541196100));
946 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
948 tmp0 = d4 << CONST_BITS;
953 tmp12 = -(tmp0 + tmp2);
960 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
961 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
962 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
966 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
967 tmp10 = tmp13 = d4 << CONST_BITS;
968 tmp11 = tmp12 = -tmp10;
978 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
979 tmp2 = MULTIPLY(d2, FIX(0.541196100));
980 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
982 tmp0 = d0 << CONST_BITS;
991 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
992 tmp2 = MULTIPLY(d2, FIX(0.541196100));
993 tmp3 = MULTIPLY(d2, (FIX(1.306562965) + .5));
1005 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1006 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1010 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1011 tmp10 = tmp13 = tmp11 = tmp12 = 0;
1017 /* Odd part per figure 8; the matrix is unitary and hence its
1018 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
1028 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
1033 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
1035 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1036 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1037 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1038 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1039 z1 = MULTIPLY(z1, - FIX(0.899976223));
1040 z2 = MULTIPLY(z2, - FIX(2.562915447));
1041 z3 = MULTIPLY(z3, - FIX(1.961570560));
1042 z4 = MULTIPLY(z4, - FIX(0.390180644));
1054 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
1057 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
1059 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1060 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1061 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1062 z1 = MULTIPLY(d7, - FIX(0.899976223));
1063 z2 = MULTIPLY(z2, - FIX(2.562915447));
1064 z3 = MULTIPLY(z3, - FIX(1.961570560));
1065 z4 = MULTIPLY(d5, - FIX(0.390180644));
1080 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
1083 z5 = MULTIPLY(d7 + z4, FIX(1.175875602));
1085 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1086 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1087 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1088 z1 = MULTIPLY(z1, - FIX(0.899976223));
1089 z2 = MULTIPLY(d5, - FIX(2.562915447));
1090 z3 = MULTIPLY(d7, - FIX(1.961570560));
1091 z4 = MULTIPLY(z4, - FIX(0.390180644));
1103 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
1104 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
1106 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1107 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1108 z1 = MULTIPLY(d7, - FIX(0.899976223));
1109 z3 = MULTIPLY(d7, - FIX(1.961570560));
1110 z2 = MULTIPLY(d5, - FIX(2.562915447));
1111 z4 = MULTIPLY(d5, - FIX(0.390180644));
1129 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
1132 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
1134 tmp0 = MULTIPLY(d7, FIX(0.298631336));
1135 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1136 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1137 z1 = MULTIPLY(z1, - FIX(0.899976223));
1138 z2 = MULTIPLY(d3, - FIX(2.562915447));
1139 z3 = MULTIPLY(z3, - FIX(1.961570560));
1140 z4 = MULTIPLY(d1, - FIX(0.390180644));
1152 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1154 z5 = MULTIPLY(z3, FIX(1.175875602));
1156 tmp0 = MULTIPLY(d7, - FIX2(0.601344887));
1157 z1 = MULTIPLY(d7, - FIX(0.899976223));
1158 tmp2 = MULTIPLY(d3, FIX(0.509795579));
1159 z2 = MULTIPLY(d3, - FIX(2.562915447));
1160 z3 = MULTIPLY(z3, - FIX2(0.785694958));
1172 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1174 z5 = MULTIPLY(z1, FIX(1.175875602));
1176 tmp0 = MULTIPLY(d7, - FIX2(1.662939224));
1177 tmp3 = MULTIPLY(d1, FIX2(1.111140466));
1178 z1 = MULTIPLY(z1, FIX2(0.275899379));
1179 z3 = MULTIPLY(d7, - FIX(1.961570560));
1180 z4 = MULTIPLY(d1, - FIX(0.390180644));
1189 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1190 tmp0 = MULTIPLY(d7, - FIX2(1.387039845));
1191 tmp1 = MULTIPLY(d7, FIX(1.175875602));
1192 tmp2 = MULTIPLY(d7, - FIX2(0.785694958));
1193 tmp3 = MULTIPLY(d7, FIX2(0.275899379));
1206 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1209 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1211 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1212 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1213 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1214 z1 = MULTIPLY(d1, - FIX(0.899976223));
1215 z2 = MULTIPLY(z2, - FIX(2.562915447));
1216 z3 = MULTIPLY(d3, - FIX(1.961570560));
1217 z4 = MULTIPLY(z4, - FIX(0.390180644));
1229 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1231 z5 = MULTIPLY(z2, FIX(1.175875602));
1233 tmp1 = MULTIPLY(d5, FIX2(1.662939225));
1234 tmp2 = MULTIPLY(d3, FIX2(1.111140466));
1235 z2 = MULTIPLY(z2, - FIX2(1.387039845));
1236 z3 = MULTIPLY(d3, - FIX(1.961570560));
1237 z4 = MULTIPLY(d5, - FIX(0.390180644));
1249 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1251 z5 = MULTIPLY(z4, FIX(1.175875602));
1253 tmp1 = MULTIPLY(d5, - FIX2(0.509795578));
1254 tmp3 = MULTIPLY(d1, FIX2(0.601344887));
1255 z1 = MULTIPLY(d1, - FIX(0.899976223));
1256 z2 = MULTIPLY(d5, - FIX(2.562915447));
1257 z4 = MULTIPLY(z4, FIX2(0.785694958));
1266 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1267 tmp0 = MULTIPLY(d5, FIX(1.175875602));
1268 tmp1 = MULTIPLY(d5, FIX2(0.275899380));
1269 tmp2 = MULTIPLY(d5, - FIX2(1.387039845));
1270 tmp3 = MULTIPLY(d5, FIX2(0.785694958));
1280 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1283 tmp2 = MULTIPLY(d3, - FIX(1.451774981));
1284 tmp3 = MULTIPLY(d1, (FIX(0.211164243) - 1));
1285 z1 = MULTIPLY(d1, FIX(1.061594337));
1286 z2 = MULTIPLY(d3, - FIX(2.172734803));
1287 z4 = MULTIPLY(z5, FIX(0.785694958));
1288 z5 = MULTIPLY(z5, FIX(1.175875602));
1297 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1298 tmp0 = MULTIPLY(d3, - FIX2(0.785694958));
1299 tmp1 = MULTIPLY(d3, - FIX2(1.387039845));
1300 tmp2 = MULTIPLY(d3, - FIX2(0.275899379));
1301 tmp3 = MULTIPLY(d3, FIX(1.175875602));
1308 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1309 tmp0 = MULTIPLY(d1, FIX2(0.275899379));
1310 tmp1 = MULTIPLY(d1, FIX2(0.785694958));
1311 tmp2 = MULTIPLY(d1, FIX(1.175875602));
1312 tmp3 = MULTIPLY(d1, FIX2(1.387039845));
1316 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1317 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1323 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1325 dataptr[DCTSIZE*0] = (dctelem_t) DESCALE(tmp10 + tmp3,
1326 CONST_BITS+PASS1_BITS+3);
1327 dataptr[DCTSIZE*7] = (dctelem_t) DESCALE(tmp10 - tmp3,
1328 CONST_BITS+PASS1_BITS+3);
1329 dataptr[DCTSIZE*1] = (dctelem_t) DESCALE(tmp11 + tmp2,
1330 CONST_BITS+PASS1_BITS+3);
1331 dataptr[DCTSIZE*6] = (dctelem_t) DESCALE(tmp11 - tmp2,
1332 CONST_BITS+PASS1_BITS+3);
1333 dataptr[DCTSIZE*2] = (dctelem_t) DESCALE(tmp12 + tmp1,
1334 CONST_BITS+PASS1_BITS+3);
1335 dataptr[DCTSIZE*5] = (dctelem_t) DESCALE(tmp12 - tmp1,
1336 CONST_BITS+PASS1_BITS+3);
1337 dataptr[DCTSIZE*3] = (dctelem_t) DESCALE(tmp13 + tmp0,
1338 CONST_BITS+PASS1_BITS+3);
1339 dataptr[DCTSIZE*4] = (dctelem_t) DESCALE(tmp13 - tmp0,
1340 CONST_BITS+PASS1_BITS+3);
1342 dataptr++; /* advance pointer to next column */