]> git.sesse.net Git - x264/blob - common/csp.c
* Made -DNEED_ALTIVEC unnecessary, thanks to Guillaume Poirier.
[x264] / common / csp.c
1 /*****************************************************************************
2  * csp.c: h264 encoder library
3  *****************************************************************************
4  * Copyright (C) 2004 Laurent Aimar
5  * $Id: csp.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
6  *
7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #include <stdio.h>
25 #include <string.h>
26
27 #include "common.h"
28
29 static inline void plane_copy_vflip( x264_mc_functions_t *mc,
30                                      uint8_t *dst, int i_dst,
31                                      uint8_t *src, int i_src, int w, int h)
32 {
33     mc->plane_copy( dst, i_dst, src + (h -1)*i_src, -i_src, w, h );
34 }
35
36 static inline void plane_subsamplev2( uint8_t *dst, int i_dst,
37                                       uint8_t *src, int i_src, int w, int h)
38 {
39     for( ; h > 0; h-- )
40     {
41         uint8_t *d = dst;
42         uint8_t *s = src;
43         int     i;
44         for( i = 0; i < w; i++ )
45         {
46             *d++ = ( s[0] + s[i_src] + 1 ) >> 1;
47             s++;
48         }
49         dst += i_dst;
50         src += 2 * i_src;
51     }
52 }
53
54 static inline void plane_subsamplev2_vlip( uint8_t *dst, int i_dst,
55                                            uint8_t *src, int i_src, int w, int h)
56 {
57     plane_subsamplev2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
58 }
59
60 static inline void plane_subsamplehv2( uint8_t *dst, int i_dst,
61                                        uint8_t *src, int i_src, int w, int h)
62 {
63     for( ; h > 0; h-- )
64     {
65         uint8_t *d = dst;
66         uint8_t *s = src;
67         int     i;
68         for( i = 0; i < w; i++ )
69         {
70             *d++ = ( s[0] + s[1] + s[i_src] + s[i_src+1] + 1 ) >> 2;
71             s += 2;
72         }
73         dst += i_dst;
74         src += 2 * i_src;
75     }
76 }
77
78 static inline void plane_subsamplehv2_vlip( uint8_t *dst, int i_dst,
79                                             uint8_t *src, int i_src, int w, int h)
80 {
81     plane_subsamplehv2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
82 }
83
84 static void i420_to_i420( x264_mc_functions_t *mc,
85                           x264_frame_t *frm, x264_image_t *img,
86                           int i_width, int i_height )
87 {
88     if( img->i_csp & X264_CSP_VFLIP )
89     {
90         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
91                           img->plane[0], img->i_stride[0],
92                           i_width, i_height );
93         plane_copy_vflip( mc, frm->plane[1], frm->i_stride[1],
94                           img->plane[1], img->i_stride[1],
95                           i_width / 2, i_height / 2 );
96         plane_copy_vflip( mc, frm->plane[2], frm->i_stride[2],
97                           img->plane[2], img->i_stride[2],
98                           i_width / 2, i_height / 2 );
99     }
100     else
101     {
102         mc->plane_copy( frm->plane[0], frm->i_stride[0],
103                         img->plane[0], img->i_stride[0],
104                         i_width, i_height );
105         mc->plane_copy( frm->plane[1], frm->i_stride[1],
106                         img->plane[1], img->i_stride[1],
107                         i_width / 2, i_height / 2 );
108         mc->plane_copy( frm->plane[2], frm->i_stride[2],
109                         img->plane[2], img->i_stride[2],
110                         i_width / 2, i_height / 2 );
111     }
112 }
113
114 static void yv12_to_i420( x264_mc_functions_t *mc,
115                           x264_frame_t *frm, x264_image_t *img,
116                           int i_width, int i_height )
117 {
118     if( img->i_csp & X264_CSP_VFLIP )
119     {
120         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
121                           img->plane[0], img->i_stride[0],
122                           i_width, i_height );
123         plane_copy_vflip( mc, frm->plane[2], frm->i_stride[2],
124                           img->plane[1], img->i_stride[1],
125                           i_width / 2, i_height / 2 );
126         plane_copy_vflip( mc, frm->plane[1], frm->i_stride[1],
127                           img->plane[2], img->i_stride[2],
128                           i_width / 2, i_height / 2 );
129     }
130     else
131     {
132         mc->plane_copy( frm->plane[0], frm->i_stride[0],
133                         img->plane[0], img->i_stride[0],
134                         i_width, i_height );
135         mc->plane_copy( frm->plane[2], frm->i_stride[2],
136                         img->plane[1], img->i_stride[1],
137                         i_width / 2, i_height / 2 );
138         mc->plane_copy( frm->plane[1], frm->i_stride[1],
139                         img->plane[2], img->i_stride[2],
140                         i_width / 2, i_height / 2 );
141     }
142 }
143
144 static void i422_to_i420( x264_mc_functions_t *mc,
145                           x264_frame_t *frm, x264_image_t *img,
146                           int i_width, int i_height )
147 {
148     if( img->i_csp & X264_CSP_VFLIP )
149     {
150         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
151                           img->plane[0], img->i_stride[0],
152                           i_width, i_height );
153
154         plane_subsamplev2_vlip( frm->plane[1], frm->i_stride[1],
155                                 img->plane[1], img->i_stride[1],
156                                 i_width / 2, i_height / 2 );
157         plane_subsamplev2_vlip( frm->plane[2], frm->i_stride[2],
158                                 img->plane[2], img->i_stride[2],
159                                 i_width / 2, i_height / 2 );
160     }
161     else
162     {
163         mc->plane_copy( frm->plane[0], frm->i_stride[0],
164                         img->plane[0], img->i_stride[0],
165                         i_width, i_height );
166
167         plane_subsamplev2( frm->plane[1], frm->i_stride[1],
168                            img->plane[1], img->i_stride[1],
169                            i_width / 2, i_height / 2 );
170         plane_subsamplev2( frm->plane[2], frm->i_stride[2],
171                            img->plane[2], img->i_stride[2],
172                            i_width / 2, i_height / 2 );
173     }
174 }
175
176 static void i444_to_i420( x264_mc_functions_t *mc,
177                           x264_frame_t *frm, x264_image_t *img,
178                           int i_width, int i_height )
179 {
180     if( img->i_csp & X264_CSP_VFLIP )
181     {
182         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
183                           img->plane[0], img->i_stride[0],
184                           i_width, i_height );
185
186         plane_subsamplehv2_vlip( frm->plane[1], frm->i_stride[1],
187                                  img->plane[1], img->i_stride[1],
188                                  i_width / 2, i_height / 2 );
189         plane_subsamplehv2_vlip( frm->plane[2], frm->i_stride[2],
190                                  img->plane[2], img->i_stride[2],
191                                  i_width / 2, i_height / 2 );
192     }
193     else
194     {
195         mc->plane_copy( frm->plane[0], frm->i_stride[0],
196                         img->plane[0], img->i_stride[0],
197                         i_width, i_height );
198
199         plane_subsamplehv2( frm->plane[1], frm->i_stride[1],
200                             img->plane[1], img->i_stride[1],
201                             i_width / 2, i_height / 2 );
202         plane_subsamplehv2( frm->plane[2], frm->i_stride[2],
203                             img->plane[2], img->i_stride[2],
204                             i_width / 2, i_height / 2 );
205     }
206 }
207 static void yuyv_to_i420( x264_mc_functions_t *mc,
208                           x264_frame_t *frm, x264_image_t *img,
209                           int i_width, int i_height )
210 {
211     uint8_t *src = img->plane[0];
212     int     i_src= img->i_stride[0];
213
214     uint8_t *y   = frm->plane[0];
215     uint8_t *u   = frm->plane[1];
216     uint8_t *v   = frm->plane[2];
217
218     if( img->i_csp & X264_CSP_VFLIP )
219     {
220         src += ( i_height - 1 ) * i_src;
221         i_src = -i_src;
222     }
223
224     for( ; i_height > 0; i_height -= 2 )
225     {
226         uint8_t *ss = src;
227         uint8_t *yy = y;
228         uint8_t *uu = u;
229         uint8_t *vv = v;
230         int w;
231
232         for( w = i_width; w > 0; w -= 2 )
233         {
234             *yy++ = ss[0];
235             *yy++ = ss[2];
236
237             *uu++ = ( ss[1] + ss[1+i_src] + 1 ) >> 1;
238             *vv++ = ( ss[3] + ss[3+i_src] + 1 ) >> 1;
239
240             ss += 4;
241         }
242         src += i_src;
243         y += frm->i_stride[0];
244         u += frm->i_stride[1];
245         v += frm->i_stride[2];
246
247         ss = src;
248         yy = y;
249         for( w = i_width; w > 0; w -= 2 )
250         {
251             *yy++ = ss[0];
252             *yy++ = ss[2];
253             ss += 4;
254         }
255         src += i_src;
256         y += frm->i_stride[0];
257     }
258 }
259
260 /* Same value than in XviD */
261 #define BITS 8
262 #define FIX(f) ((int)((f) * (1 << BITS) + 0.5))
263
264 #define Y_R   FIX(0.257)
265 #define Y_G   FIX(0.504)
266 #define Y_B   FIX(0.098)
267 #define Y_ADD 16
268
269 #define U_R   FIX(0.148)
270 #define U_G   FIX(0.291)
271 #define U_B   FIX(0.439)
272 #define U_ADD 128
273
274 #define V_R   FIX(0.439)
275 #define V_G   FIX(0.368)
276 #define V_B   FIX(0.071)
277 #define V_ADD 128
278 #define RGB_TO_I420( name, POS_R, POS_G, POS_B, S_RGB ) \
279 static void name( x264_mc_functions_t *mc,              \
280                   x264_frame_t *frm, x264_image_t *img, \
281                   int i_width, int i_height )           \
282 {                                                       \
283     uint8_t *src = img->plane[0];                       \
284     int     i_src= img->i_stride[0];                    \
285     int     i_y  = frm->i_stride[0];                    \
286     uint8_t *y   = frm->plane[0];                       \
287     uint8_t *u   = frm->plane[1];                       \
288     uint8_t *v   = frm->plane[2];                       \
289                                                         \
290     if( img->i_csp & X264_CSP_VFLIP )                   \
291     {                                                   \
292         src += ( i_height - 1 ) * i_src;                \
293         i_src = -i_src;                                 \
294     }                                                   \
295                                                         \
296     for(  ; i_height > 0; i_height -= 2 )               \
297     {                                                   \
298         uint8_t *ss = src;                              \
299         uint8_t *yy = y;                                \
300         uint8_t *uu = u;                                \
301         uint8_t *vv = v;                                \
302         int w;                                          \
303                                                         \
304         for( w = i_width; w > 0; w -= 2 )               \
305         {                                               \
306             int cr = 0,cg = 0,cb = 0;                   \
307             int r, g, b;                                \
308                                                         \
309             /* Luma */                                  \
310             cr = r = ss[POS_R];                         \
311             cg = g = ss[POS_G];                         \
312             cb = b = ss[POS_B];                         \
313                                                         \
314             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
315                                                         \
316             cr+= r = ss[POS_R+i_src];                   \
317             cg+= g = ss[POS_G+i_src];                   \
318             cb+= b = ss[POS_B+i_src];                   \
319             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
320             yy++;                                       \
321             ss += S_RGB;                                \
322                                                         \
323             cr+= r = ss[POS_R];                         \
324             cg+= g = ss[POS_G];                         \
325             cb+= b = ss[POS_B];                         \
326                                                         \
327             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
328                                                         \
329             cr+= r = ss[POS_R+i_src];                   \
330             cg+= g = ss[POS_G+i_src];                   \
331             cb+= b = ss[POS_B+i_src];                   \
332             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
333             yy++;                                       \
334             ss += S_RGB;                                \
335                                                         \
336             /* Chroma */                                \
337             *uu++ = (uint8_t)(U_ADD + ((-U_R * cr - U_G * cg + U_B * cb) >> (BITS+2)) ); \
338             *vv++ = (uint8_t)(V_ADD + (( V_R * cr - V_G * cg - V_B * cb) >> (BITS+2)) ); \
339         }                                               \
340                                                         \
341         src += 2*i_src;                                   \
342         y += 2*frm->i_stride[0];                        \
343         u += frm->i_stride[1];                          \
344         v += frm->i_stride[2];                          \
345     }                                                   \
346 }
347
348 RGB_TO_I420( rgb_to_i420,  0, 1, 2, 3 );
349 RGB_TO_I420( bgr_to_i420,  2, 1, 0, 3 );
350 RGB_TO_I420( bgra_to_i420, 2, 1, 0, 4 );
351
352 void x264_csp_init( int cpu, int i_csp, x264_csp_function_t *pf )
353 {
354     switch( i_csp )
355     {
356         case X264_CSP_I420:
357             pf->convert[X264_CSP_I420] = i420_to_i420;
358             pf->convert[X264_CSP_I422] = i422_to_i420;
359             pf->convert[X264_CSP_I444] = i444_to_i420;
360             pf->convert[X264_CSP_YV12] = yv12_to_i420;
361             pf->convert[X264_CSP_YUYV] = yuyv_to_i420;
362             pf->convert[X264_CSP_RGB ] =  rgb_to_i420;
363             pf->convert[X264_CSP_BGR ] =  bgr_to_i420;
364             pf->convert[X264_CSP_BGRA] = bgra_to_i420;
365             break;
366
367         default:
368             /* For now, can't happen */
369             fprintf( stderr, "arg in x264_csp_init\n" );
370             exit( -1 );
371             break;
372     }
373 }
374