]> git.sesse.net Git - x264/blob - common/csp.c
SSE2 pixel comparison functions
[x264] / common / csp.c
1 /*****************************************************************************
2  * csp.c: h264 encoder library
3  *****************************************************************************
4  * Copyright (C) 2004 Laurent Aimar
5  * $Id: csp.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
6  *
7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27
28 #include "common.h"
29
30 static inline void plane_copy( uint8_t *dst, int i_dst,
31                                uint8_t *src, int i_src, int w, int h)
32 {
33     for( ; h > 0; h-- )
34     {
35         memcpy( dst, src, w );
36         dst += i_dst;
37         src += i_src;
38     }
39 }
40 static inline void plane_copy_vflip( uint8_t *dst, int i_dst,
41                                      uint8_t *src, int i_src, int w, int h)
42 {
43     plane_copy( dst, i_dst, src + (h -1)*i_src, -i_src, w, h );
44 }
45
46 static inline void plane_subsamplev2( uint8_t *dst, int i_dst,
47                                       uint8_t *src, int i_src, int w, int h)
48 {
49     for( ; h > 0; h-- )
50     {
51         uint8_t *d = dst;
52         uint8_t *s = src;
53         int     i;
54         for( i = 0; i < w; i++ )
55         {
56             *d++ = ( s[0] + s[i_src] + 1 ) >> 1;
57             s++;
58         }
59         dst += i_dst;
60         src += 2 * i_src;
61     }
62 }
63
64 static inline void plane_subsamplev2_vlip( uint8_t *dst, int i_dst,
65                                            uint8_t *src, int i_src, int w, int h)
66 {
67     plane_subsamplev2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
68 }
69
70 static inline void plane_subsamplehv2( uint8_t *dst, int i_dst,
71                                        uint8_t *src, int i_src, int w, int h)
72 {
73     for( ; h > 0; h-- )
74     {
75         uint8_t *d = dst;
76         uint8_t *s = src;
77         int     i;
78         for( i = 0; i < w; i++ )
79         {
80             *d++ = ( s[0] + s[1] + s[i_src] + s[i_src+1] + 1 ) >> 2;
81             s += 2;
82         }
83         dst += i_dst;
84         src += 2 * i_src;
85     }
86 }
87
88 static inline void plane_subsamplehv2_vlip( uint8_t *dst, int i_dst,
89                                             uint8_t *src, int i_src, int w, int h)
90 {
91     plane_subsamplehv2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
92 }
93
94 static void i420_to_i420( x264_frame_t *frm, x264_image_t *img,
95                           int i_width, int i_height )
96 {
97     if( img->i_csp & X264_CSP_VFLIP )
98     {
99         plane_copy_vflip( frm->plane[0], frm->i_stride[0],
100                           img->plane[0], img->i_stride[0],
101                           i_width, i_height );
102         plane_copy_vflip( frm->plane[1], frm->i_stride[1],
103                           img->plane[1], img->i_stride[1],
104                           i_width / 2, i_height / 2 );
105         plane_copy_vflip( frm->plane[2], frm->i_stride[2],
106                           img->plane[2], img->i_stride[2],
107                           i_width / 2, i_height / 2 );
108     }
109     else
110     {
111         plane_copy( frm->plane[0], frm->i_stride[0],
112                     img->plane[0], img->i_stride[0],
113                     i_width, i_height );
114         plane_copy( frm->plane[1], frm->i_stride[1],
115                     img->plane[1], img->i_stride[1],
116                     i_width / 2, i_height / 2 );
117         plane_copy( frm->plane[2], frm->i_stride[2],
118                     img->plane[2], img->i_stride[2],
119                     i_width / 2, i_height / 2 );
120     }
121 }
122
123 static void yv12_to_i420( x264_frame_t *frm, x264_image_t *img,
124                           int i_width, int i_height )
125 {
126     if( img->i_csp & X264_CSP_VFLIP )
127     {
128         plane_copy_vflip( frm->plane[0], frm->i_stride[0],
129                           img->plane[0], img->i_stride[0],
130                           i_width, i_height );
131         plane_copy_vflip( frm->plane[2], frm->i_stride[2],
132                           img->plane[1], img->i_stride[1],
133                           i_width / 2, i_height / 2 );
134         plane_copy_vflip( frm->plane[1], frm->i_stride[1],
135                           img->plane[2], img->i_stride[2],
136                           i_width / 2, i_height / 2 );
137     }
138     else
139     {
140         plane_copy( frm->plane[0], frm->i_stride[0],
141                     img->plane[0], img->i_stride[0],
142                     i_width, i_height );
143         plane_copy( frm->plane[2], frm->i_stride[2],
144                     img->plane[1], img->i_stride[1],
145                     i_width / 2, i_height / 2 );
146         plane_copy( frm->plane[1], frm->i_stride[1],
147                     img->plane[2], img->i_stride[2],
148                     i_width / 2, i_height / 2 );
149     }
150 }
151
152 static void i422_to_i420( x264_frame_t *frm, x264_image_t *img,
153                           int i_width, int i_height )
154 {
155     if( img->i_csp & X264_CSP_VFLIP )
156     {
157         plane_copy_vflip( frm->plane[0], frm->i_stride[0],
158                           img->plane[0], img->i_stride[0],
159                           i_width, i_height );
160
161         plane_subsamplev2_vlip( frm->plane[1], frm->i_stride[1],
162                                 img->plane[1], img->i_stride[1],
163                                 i_width / 2, i_height / 2 );
164         plane_subsamplev2_vlip( frm->plane[2], frm->i_stride[2],
165                                 img->plane[2], img->i_stride[2],
166                                 i_width / 2, i_height / 2 );
167     }
168     else
169     {
170         plane_copy( frm->plane[0], frm->i_stride[0],
171                     img->plane[0], img->i_stride[0],
172                     i_width, i_height );
173
174         plane_subsamplev2( frm->plane[1], frm->i_stride[1],
175                            img->plane[1], img->i_stride[1],
176                            i_width / 2, i_height / 2 );
177         plane_subsamplev2( frm->plane[2], frm->i_stride[2],
178                            img->plane[2], img->i_stride[2],
179                            i_width / 2, i_height / 2 );
180     }
181 }
182
183 static void i444_to_i420( x264_frame_t *frm, x264_image_t *img,
184                           int i_width, int i_height )
185 {
186     if( img->i_csp & X264_CSP_VFLIP )
187     {
188         plane_copy_vflip( frm->plane[0], frm->i_stride[0],
189                           img->plane[0], img->i_stride[0],
190                           i_width, i_height );
191
192         plane_subsamplehv2_vlip( frm->plane[1], frm->i_stride[1],
193                                  img->plane[1], img->i_stride[1],
194                                  i_width / 2, i_height / 2 );
195         plane_subsamplehv2_vlip( frm->plane[2], frm->i_stride[2],
196                                  img->plane[2], img->i_stride[2],
197                                  i_width / 2, i_height / 2 );
198     }
199     else
200     {
201         plane_copy( frm->plane[0], frm->i_stride[0],
202                     img->plane[0], img->i_stride[0],
203                     i_width, i_height );
204
205         plane_subsamplehv2( frm->plane[1], frm->i_stride[1],
206                             img->plane[1], img->i_stride[1],
207                             i_width / 2, i_height / 2 );
208         plane_subsamplehv2( frm->plane[2], frm->i_stride[2],
209                             img->plane[2], img->i_stride[2],
210                             i_width / 2, i_height / 2 );
211     }
212 }
213 static void yuyv_to_i420( x264_frame_t *frm, x264_image_t *img,
214                           int i_width, int i_height )
215 {
216     uint8_t *src = img->plane[0];
217     int     i_src= img->i_stride[0];
218
219     uint8_t *y   = frm->plane[0];
220     uint8_t *u   = frm->plane[1];
221     uint8_t *v   = frm->plane[2];
222
223     if( img->i_csp & X264_CSP_VFLIP )
224     {
225         src += ( i_height - 1 ) * i_src;
226         i_src = -i_src;
227     }
228
229     for( ; i_height > 0; i_height -= 2 )
230     {
231         uint8_t *ss = src;
232         uint8_t *yy = y;
233         uint8_t *uu = u;
234         uint8_t *vv = v;
235         int w;
236
237         for( w = i_width; w > 0; w -= 2 )
238         {
239             *yy++ = ss[0];
240             *yy++ = ss[2];
241
242             *uu++ = ( ss[1] + ss[1+i_src] + 1 ) >> 1;
243             *vv++ = ( ss[3] + ss[3+i_src] + 1 ) >> 1;
244
245             ss += 4;
246         }
247         src += i_src;
248         y += frm->i_stride[0];
249         u += frm->i_stride[1];
250         v += frm->i_stride[2];
251
252         ss = src;
253         yy = y;
254         for( w = i_width; w > 0; w -= 2 )
255         {
256             *yy++ = ss[0];
257             *yy++ = ss[2];
258             ss += 4;
259         }
260         src += i_src;
261         y += frm->i_stride[0];
262     }
263 }
264
265 /* Same value than in XviD */
266 #define BITS 8
267 #define FIX(f) ((int)((f) * (1 << BITS) + 0.5))
268
269 #define Y_R   FIX(0.257)
270 #define Y_G   FIX(0.504)
271 #define Y_B   FIX(0.098)
272 #define Y_ADD 16
273
274 #define U_R   FIX(0.148)
275 #define U_G   FIX(0.291)
276 #define U_B   FIX(0.439)
277 #define U_ADD 128
278
279 #define V_R   FIX(0.439)
280 #define V_G   FIX(0.368)
281 #define V_B   FIX(0.071)
282 #define V_ADD 128
283 #define RGB_TO_I420( name, POS_R, POS_G, POS_B, S_RGB ) \
284 static void name( x264_frame_t *frm, x264_image_t *img, \
285                   int i_width, int i_height )           \
286 {                                                       \
287     uint8_t *src = img->plane[0];                       \
288     int     i_src= img->i_stride[0];                    \
289     int     i_y  = frm->i_stride[0];                    \
290     uint8_t *y   = frm->plane[0];                       \
291     uint8_t *u   = frm->plane[1];                       \
292     uint8_t *v   = frm->plane[2];                       \
293                                                         \
294     if( img->i_csp & X264_CSP_VFLIP )                   \
295     {                                                   \
296         src += ( i_height - 1 ) * i_src;                \
297         i_src = -i_src;                                 \
298     }                                                   \
299                                                         \
300     for(  ; i_height > 0; i_height -= 2 )               \
301     {                                                   \
302         uint8_t *ss = src;                              \
303         uint8_t *yy = y;                                \
304         uint8_t *uu = u;                                \
305         uint8_t *vv = v;                                \
306         int w;                                          \
307                                                         \
308         for( w = i_width; w > 0; w -= 2 )               \
309         {                                               \
310             int cr = 0,cg = 0,cb = 0;                   \
311             int r, g, b;                                \
312                                                         \
313             /* Luma */                                  \
314             cr = r = ss[POS_R];                         \
315             cg = g = ss[POS_G];                         \
316             cb = b = ss[POS_B];                         \
317                                                         \
318             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
319                                                         \
320             cr+= r = ss[POS_R+i_src];                   \
321             cg+= g = ss[POS_G+i_src];                   \
322             cb+= b = ss[POS_B+i_src];                   \
323             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
324             yy++;                                       \
325             ss += S_RGB;                                \
326                                                         \
327             cr+= r = ss[POS_R];                         \
328             cg+= g = ss[POS_G];                         \
329             cb+= b = ss[POS_B];                         \
330                                                         \
331             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
332                                                         \
333             cr+= r = ss[POS_R+i_src];                   \
334             cg+= g = ss[POS_G+i_src];                   \
335             cb+= b = ss[POS_B+i_src];                   \
336             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
337             yy++;                                       \
338             ss += S_RGB;                                \
339                                                         \
340             /* Chroma */                                \
341             *uu++ = (uint8_t)(U_ADD + ((-U_R * cr - U_G * cg + U_B * cb) >> (BITS+2)) ); \
342             *vv++ = (uint8_t)(V_ADD + (( V_R * cr - V_G * cg - V_B * cb) >> (BITS+2)) ); \
343         }                                               \
344                                                         \
345         src += 2*i_src;                                   \
346         y += 2*frm->i_stride[0];                        \
347         u += frm->i_stride[1];                          \
348         v += frm->i_stride[2];                          \
349     }                                                   \
350 }
351
352 RGB_TO_I420( rgb_to_i420,  0, 1, 2, 3 );
353 RGB_TO_I420( bgr_to_i420,  2, 1, 0, 3 );
354 RGB_TO_I420( bgra_to_i420, 2, 1, 0, 4 );
355
356 void x264_csp_init( int cpu, int i_csp, x264_csp_function_t *pf )
357 {
358     switch( i_csp )
359     {
360         case X264_CSP_I420:
361             pf->i420 = i420_to_i420;
362             pf->i422 = i422_to_i420;
363             pf->i444 = i444_to_i420;
364             pf->yv12 = yv12_to_i420;
365             pf->yuyv = yuyv_to_i420;
366             pf->rgb  = rgb_to_i420;
367             pf->bgr  = bgr_to_i420;
368             pf->bgra = bgra_to_i420;
369             break;
370
371         default:
372             /* For now, can't happen */
373             fprintf( stderr, "arg in x264_csp_init\n" );
374             exit( -1 );
375             break;
376     }
377 }
378