]> git.sesse.net Git - x264/blob - core/ppc/ppccommon.h
* all: re-import of the CVS.
[x264] / core / ppc / ppccommon.h
1 /*****************************************************************************
2  * ppccommon.h: h264 encoder
3  *****************************************************************************
4  * Copyright (C) 2003 Laurent Aimar
5  * $Id: ppccommon.h,v 1.1 2004/06/03 19:27:07 fenrir Exp $
6  *
7  * Authors: Eric Petit <titer@m0k.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /* Handy */
25 #define vector_u8_t  vector unsigned char
26 #define vector_s16_t vector signed short
27 #define vector_u32_t vector unsigned int
28 #define vector_s32_t vector signed int
29
30 #define LOAD_ZERO    vector_s32_t zero = vec_splat_s32( 0 )
31 #define zero_u8      (vector_u8_t)  zero
32 #define zero_s16     (vector_s16_t) zero
33 #define zero_s32     (vector_s32_t) zero
34
35 #define CONVERT_U8_TO_S16( a ) \
36     a = (vector_s16_t) vec_mergeh( zero_u8, (vector_u8_t) a )
37
38 /* Macros to load aligned or unaligned data without risking buffer
39    overflows. */
40 #define LOAD_16( p, v )                                \
41     if( (int) p & 0xF )                                \
42     {                                                  \
43         v = vec_perm( vec_ld( 0, p ), vec_ld( 16, p ), \
44                       vec_lvsl( 0, p ) );              \
45     }                                                  \
46     else                                               \
47     {                                                  \
48         v = vec_ld( 0, p );                            \
49     }
50
51 #define LOAD_8( p, v )                                             \
52     if( !( (int) p & 0xF ) )                                       \
53     {                                                              \
54         v = vec_ld( 0, p );                                        \
55     }                                                              \
56     else if( ( (int) p & 0xF ) < 9 )                               \
57     {                                                              \
58         v = vec_perm( vec_ld( 0, p ), (vector unsigned char) zero, \
59                       vec_lvsl( 0, p ) );                          \
60     }                                                              \
61     else                                                           \
62     {                                                              \
63         v = vec_perm( vec_ld( 0, p ), vec_ld( 16, p ),             \
64                       vec_lvsl( 0, p ) );                          \
65     }
66
67 #define LOAD_4( p, v )                                             \
68     if( !( (int) p & 0xF ) )                                       \
69     {                                                              \
70         v = vec_ld( 0, p );                                        \
71     }                                                              \
72     else if( ( (int) p & 0xF ) < 13 )                              \
73     {                                                              \
74         v = vec_perm( vec_ld( 0, p ), (vector unsigned char) zero, \
75                       vec_lvsl( 0, p ) );                          \
76     }                                                              \
77     else                                                           \
78     {                                                              \
79         v = vec_perm( vec_ld( 0, p ), vec_ld( 16, p ),             \
80                       vec_lvsl( 0, p ) );                          \
81     }
82
83 /* Store aligned or unaligned data */
84 #define STORE_16( v, p )                              \
85     if( (int) p & 0xF )                               \
86     {                                                 \
87         vector unsigned char tmp1, tmp2;              \
88         vector unsigned char align, mask;             \
89         tmp1 = vec_ld( 0, p );                        \
90         tmp2 = vec_ld( 16, p );                       \
91         align = vec_lvsr( 0, p );                     \
92         mask = vec_perm( (vector unsigned char) (0),  \
93                          (vector unsigned char) (-1), \
94                          align);                      \
95         v = vec_perm( v, v, align);                   \
96         tmp1 = vec_sel( tmp1, v, mask );              \
97         tmp2 = vec_sel( v, tmp2, mask );              \
98         vec_st( tmp1, 0, p );                         \
99         vec_st( tmp2, 16, p );                        \
100     }                                                 \
101     else                                              \
102     {                                                 \
103         vec_st( v, 0, p );                            \
104     }
105
106 /* Transpose 8x8 (vector_s16_t [8]) */
107 #define TRANSPOSE8x8( a, b )           \
108     b[0] = vec_mergeh( a[0], a[4] ); \
109     b[1] = vec_mergel( a[0], a[4] ); \
110     b[2] = vec_mergeh( a[1], a[5] ); \
111     b[3] = vec_mergel( a[1], a[5] ); \
112     b[4] = vec_mergeh( a[2], a[6] ); \
113     b[5] = vec_mergel( a[2], a[6] ); \
114     b[6] = vec_mergeh( a[3], a[7] ); \
115     b[7] = vec_mergel( a[3], a[7] ); \
116     a[0] = vec_mergeh( b[0], b[4] ); \
117     a[1] = vec_mergel( b[0], b[4] ); \
118     a[2] = vec_mergeh( b[1], b[5] ); \
119     a[3] = vec_mergel( b[1], b[5] ); \
120     a[4] = vec_mergeh( b[2], b[6] ); \
121     a[5] = vec_mergel( b[2], b[6] ); \
122     a[6] = vec_mergeh( b[3], b[7] ); \
123     a[7] = vec_mergel( b[3], b[7] ); \
124     b[0] = vec_mergeh( a[0], a[4] ); \
125     b[1] = vec_mergel( a[0], a[4] ); \
126     b[2] = vec_mergeh( a[1], a[5] ); \
127     b[3] = vec_mergel( a[1], a[5] ); \
128     b[4] = vec_mergeh( a[2], a[6] ); \
129     b[5] = vec_mergel( a[2], a[6] ); \
130     b[6] = vec_mergeh( a[3], a[7] ); \
131     b[7] = vec_mergel( a[3], a[7] );
132
133 /* Transpose 4x4 (vector_s16_t [4]) */
134 #define TRANSPOSE4x4( a, b ) \
135     (b)[0] = vec_mergeh( (a)[0], zero_s16 ); \
136     (b)[1] = vec_mergeh( (a)[1], zero_s16 ); \
137     (b)[2] = vec_mergeh( (a)[2], zero_s16 ); \
138     (b)[3] = vec_mergeh( (a)[3], zero_s16 ); \
139     (a)[0] = vec_mergeh( (b)[0], (b)[2] );   \
140     (a)[1] = vec_mergel( (b)[0], (b)[2] );   \
141     (a)[2] = vec_mergeh( (b)[1], (b)[3] );   \
142     (a)[3] = vec_mergel( (b)[1], (b)[3] );   \
143     (b)[0] = vec_mergeh( (a)[0], (a)[2] );   \
144     (b)[1] = vec_mergel( (a)[0], (a)[2] );   \
145     (b)[2] = vec_mergeh( (a)[1], (a)[3] );   \
146     (b)[3] = vec_mergel( (a)[1], (a)[3] );
147
148 /* Hadamar (vector_s16_t [4]) */
149 #define HADAMAR( a, b ) \
150     s01v   = vec_add( (a)[0], (a)[1] ); \
151     s23v   = vec_add( (a)[2], (a)[3] ); \
152     d01v   = vec_sub( (a)[0], (a)[1] ); \
153     d23v   = vec_sub( (a)[2], (a)[3] ); \
154     (b)[0] = vec_add( s01v, s23v );     \
155     (b)[1] = vec_sub( s01v, s23v );     \
156     (b)[2] = vec_sub( d01v, d23v );     \
157     (b)[3] = vec_add( d01v, d23v );
158