1 /***************************************************************
3 * Copyright: (c) Copyright Motorola Inc. 1998
7 * Function: Matrix_Transpose
9 * Description: The following Matrix Transpose is adapted
10 * from an algorithm developed by Brett Olsson
11 * from IBM. It performs a 8x8 16-bit element
12 * full matrix transpose.
14 * Inputs: array elements stored in input
15 * input[0] = [ 00 01 02 03 04 05 06 07 ]
16 * input[1] = [ 10 11 12 13 14 15 16 17 ]
17 * input[2] = [ 20 21 22 23 24 25 26 27 ]
18 * input[3] = [ 30 31 32 33 34 35 36 37 ]
19 * input[4] = [ 40 41 42 43 44 45 46 47 ]
20 * input[5] = [ 50 51 52 53 54 55 56 57 ]
21 * input[6] = [ 60 61 62 63 64 65 66 67 ]
22 * input[7] = [ 70 71 72 73 74 75 76 77 ]
24 * Outputs: transposed elements in output
26 **************************************************************/
28 static __inline__ void Matrix_Transpose ( vector signed short *input,
29 vector signed short *output )
31 vector signed short a0, a1, a2, a3, a4, a5, a6, a7;
32 vector signed short b0, b1, b2, b3, b4, b5, b6, b7;
34 b0 = vec_mergeh( input[0], input[4] ); /* [ 00 40 01 41 02 42 03 43 ]*/
35 b1 = vec_mergel( input[0], input[4] ); /* [ 04 44 05 45 06 46 07 47 ]*/
36 b2 = vec_mergeh( input[1], input[5] ); /* [ 10 50 11 51 12 52 13 53 ]*/
37 b3 = vec_mergel( input[1], input[5] ); /* [ 14 54 15 55 16 56 17 57 ]*/
38 b4 = vec_mergeh( input[2], input[6] ); /* [ 20 60 21 61 22 62 23 63 ]*/
39 b5 = vec_mergel( input[2], input[6] ); /* [ 24 64 25 65 26 66 27 67 ]*/
40 b6 = vec_mergeh( input[3], input[7] ); /* [ 30 70 31 71 32 72 33 73 ]*/
41 b7 = vec_mergel( input[3], input[7] ); /* [ 34 74 35 75 36 76 37 77 ]*/
43 a0 = vec_mergeh( b0, b4 ); /* [ 00 20 40 60 01 21 41 61 ]*/
44 a1 = vec_mergel( b0, b4 ); /* [ 02 22 42 62 03 23 43 63 ]*/
45 a2 = vec_mergeh( b1, b5 ); /* [ 04 24 44 64 05 25 45 65 ]*/
46 a3 = vec_mergel( b1, b5 ); /* [ 06 26 46 66 07 27 47 67 ]*/
47 a4 = vec_mergeh( b2, b6 ); /* [ 10 30 50 70 11 31 51 71 ]*/
48 a5 = vec_mergel( b2, b6 ); /* [ 12 32 52 72 13 33 53 73 ]*/
49 a6 = vec_mergeh( b3, b7 ); /* [ 14 34 54 74 15 35 55 75 ]*/
50 a7 = vec_mergel( b3, b7 ); /* [ 16 36 56 76 17 37 57 77 ]*/
52 output[0] = vec_mergeh( a0, a4 ); /* [ 00 10 20 30 40 50 60 70 ]*/
53 output[1] = vec_mergel( a0, a4 ); /* [ 01 11 21 31 41 51 61 71 ]*/
54 output[2] = vec_mergeh( a1, a5 ); /* [ 02 12 22 32 42 52 62 72 ]*/
55 output[3] = vec_mergel( a1, a5 ); /* [ 03 13 23 33 43 53 63 73 ]*/
56 output[4] = vec_mergeh( a2, a6 ); /* [ 04 14 24 34 44 54 64 74 ]*/
57 output[5] = vec_mergel( a2, a6 ); /* [ 05 15 25 35 45 55 65 75 ]*/
58 output[6] = vec_mergeh( a3, a7 ); /* [ 06 16 26 36 46 56 66 76 ]*/
59 output[7] = vec_mergel( a3, a7 ); /* [ 07 17 27 37 47 57 67 77 ]*/
64 /***************************************************************
66 * Copyright: (c) Copyright Motorola Inc. 1998
68 * Date: April 20, 1998
70 * Macro: IDCT_Transform
72 * Description: Discrete Cosign Transform implemented by the
73 * Scaled Chen (III) Algorithm developed by Haifa
74 * Research Lab. The major difference between this
75 * algorithm and the Scaled Chen (I) is that
76 * certain multiply-subtracts are replaced by
77 * multiply adds. A full description of the
78 * Scaled Chen (I) algorithm can be found in:
79 * W.C.Chen, C.H.Smith and S.C.Fralick, "A Fast
80 * Computational Algorithm for the Discrete Cosine
81 * Transform", IEEE Transactions on Commnuications,
82 * Vol. COM-25, No. 9, pp 1004-1009, Sept. 1997.
84 * Inputs: vx : array of vector short
85 * t1-t10 : temporary vector variables set up by caller
92 * zero : an array of zero elements
94 * Outputs: vy : array of vector short
96 **************************************************************/
98 #define IDCT_Transform(vx,vy) \
101 t9 = vec_mradds( a1, vx[1], zero ); /* t8 = (a1) * x1 - x7 */ \
102 t8 = vec_subs( t9, vx[7]); \
103 t1 = vec_mradds( a1, vx[7], vx[1] ); /* t1 = (a1) * x7 + x1 */ \
104 t7 = vec_mradds( a2, vx[5], vx[3] ); /* t7 = (a2) * x5 + x3 */ \
105 t3 = vec_mradds( ma2, vx[3], vx[5] );/* t3 = (-a2) * x5 + x3 */ \
108 t5 = vec_adds( vx[0], vx[4] ); /* t5 = x0 + x4 */ \
109 t0 = vec_subs( vx[0], vx[4] ); /* t0 = x0 - x4 */ \
110 t9 = vec_mradds( a0, vx[2], zero ); /* t4 = (a0) * x2 - x6 */ \
111 t4 = vec_subs( t9, vx[6] ); \
112 t2 = vec_mradds( a0, vx[6], vx[2] ); /* t2 = (a0) * x6 + x2 */ \
114 t6 = vec_adds( t8, t3 ); /* t6 = t8 + t3 */ \
115 t3 = vec_subs( t8, t3 ); /* t3 = t8 - t3 */ \
116 t8 = vec_subs( t1, t7 ); /* t8 = t1 - t7 */ \
117 t1 = vec_adds( t1, t7 ); /* t1 = t1 + t7 */ \
120 t7 = vec_adds( t5, t2 ); /* t7 = t5 + t2 */ \
121 t2 = vec_subs( t5, t2 ); /* t2 = t5 - t2 */ \
122 t5 = vec_adds( t0, t4 ); /* t5 = t0 + t4 */ \
123 t0 = vec_subs( t0, t4 ); /* t0 = t0 - t4 */ \
125 t4 = vec_subs( t8, t3 ); /* t4 = t8 - t3 */ \
126 t3 = vec_adds( t8, t3 ); /* t3 = t8 + t3 */ \
129 vy[0] = vec_adds( t7, t1 ); /* y0 = t7 + t1 */ \
130 vy[7] = vec_subs( t7, t1 ); /* y7 = t7 - t1 */ \
131 vy[1] = vec_mradds( c4, t3, t5 ); /* y1 = (c4) * t3 + t5 */ \
132 vy[6] = vec_mradds( mc4, t3, t5 ); /* y6 = (-c4) * t3 + t5 */ \
133 vy[2] = vec_mradds( c4, t4, t0 ); /* y2 = (c4) * t4 + t0 */ \
134 vy[5] = vec_mradds( mc4, t4, t0 ); /* y5 = (-c4) * t4 + t0 */ \
135 vy[3] = vec_adds( t2, t6 ); /* y3 = t2 + t6 */ \
136 vy[4] = vec_subs( t2, t6 ); /* y4 = t2 - t6 */
139 /* Pre-Scaling matrix -- scaled by 1 */
140 static vector signed short PreScale[8] = {
141 (vector signed short)( 4095, 5681, 5351, 4816, 4095, 4816, 5351, 5681 ),
142 (vector signed short)( 5681, 7880, 7422, 6680, 5681, 6680, 7422, 7880 ),
143 (vector signed short)( 5351, 7422, 6992, 6292, 5351, 6292, 6992, 7422 ),
144 (vector signed short)( 4816, 6680, 6292, 5663, 4816, 5663, 6292, 6680 ),
145 (vector signed short)( 4095, 5681, 5351, 4816, 4095, 4816, 5351, 5681 ),
146 (vector signed short)( 4816, 6680, 6292, 5663, 4816, 5663, 6292, 6680 ),
147 (vector signed short)( 5351, 7422, 6992, 6292, 5351, 6292, 6992, 7422 ),
148 (vector signed short)( 5681, 7880, 7422, 6680, 5681, 6680, 7422, 7880 )
151 /***************************************************************
153 * Copyright: (c) Copyright Motorola Inc. 1998
155 * Date: April 17, 1998
159 * Description: Scaled Chen (III) algorithm for IDCT
160 * Arithmetic is 16-bit fixed point.
162 * Inputs: input - Pointer to input data (short), which
163 * must be between -2048 to +2047.
164 * It is assumed that the allocated array
165 * has been 128-bit aligned and contains
166 * 8x8 short elements.
168 * Outputs: output - Pointer to output area for the transfored
169 * data. The output values are between -255
170 * and 255 . It is assumed that a 128-bit
171 * aligned 8x8 array of short has been
176 ***************************************************************/
178 static __inline__ void IDCT(short *input, short *output) {
180 vector signed short t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
181 vector signed short a0, a1, a2, ma2, c4, mc4, zero;
182 vector signed short vx[8], vy[8];
183 vector signed short *vec_ptr; /* used for conversion between
184 arrays of short and vector
185 signed short array. */
188 /* Load the multiplication constants. Note: these constants
189 * could all be loaded directly ( like zero case ), but using the
190 * SpecialConstants approach causes vsplth instructions to be
191 * generated instead of lvx which is more efficient given the remainder
192 * of the instruction mix.
194 vector signed short SpecialConstants =
195 (vector signed short)( 23170, 13573, 6518, 21895, -23170, -21895, 0 , 0
198 c4 = vec_splat( SpecialConstants, 0 ); /* c4 = cos(4*pi/16) */
199 a0 = vec_splat( SpecialConstants, 1 ); /* a0 = c6/c2 */
200 a1 = vec_splat( SpecialConstants, 2 ); /* a1 = c7/c1 */
201 a2 = vec_splat( SpecialConstants, 3 ); /* a2 = c5/c3 */
202 mc4 = vec_splat( SpecialConstants, 4 ); /* -c4 */
203 ma2 = vec_splat( SpecialConstants, 5 ); /* -a2 */
204 zero = (vector signed short)(0);
206 /* Load the rows of input data and Pre-Scale them. */
207 vec_ptr = ( vector signed short * ) input;
208 vx[0] = vec_mradds( vec_ptr[0], PreScale[0], zero );
209 vx[1] = vec_mradds( vec_ptr[1], PreScale[1], zero );
210 vx[2] = vec_mradds( vec_ptr[2], PreScale[2], zero );
211 vx[3] = vec_mradds( vec_ptr[3], PreScale[3], zero );
212 vx[4] = vec_mradds( vec_ptr[4], PreScale[4], zero );
213 vx[5] = vec_mradds( vec_ptr[5], PreScale[5], zero );
214 vx[6] = vec_mradds( vec_ptr[6], PreScale[6], zero );
215 vx[7] = vec_mradds( vec_ptr[7], PreScale[7], zero );
217 /* Perform IDCT first on the 8 columns */
218 IDCT_Transform( vx, vy );
220 /* Transpose matrix to work on rows */
221 Matrix_Transpose( vy, vx );
223 /* Perform IDCT next on the 8 rows */
224 IDCT_Transform( vx, vy );
226 /* Post-scale and store result. */
227 vec_ptr = (vector signed short *) output;