]> git.sesse.net Git - vlc/blob - modules/video_chroma/i420_yuy2.c
i420_yuy2.c: made the altivec optim a bit nicer (probably a bit faster, too)
[vlc] / modules / video_chroma / i420_yuy2.c
1 /*****************************************************************************
2  * i420_yuy2.c : YUV to YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001 VideoLAN
5  * $Id: i420_yuy2.c,v 1.7 2004/01/27 03:22:03 titer Exp $
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <string.h>                                            /* strerror() */
28 #include <stdlib.h>                                      /* malloc(), free() */
29
30 #include <vlc/vlc.h>
31 #include <vlc/vout.h>
32
33 #include "i420_yuy2.h"
34
35 #define SRC_FOURCC  "I420,IYUV,YV12"
36
37 #if defined (MODULE_NAME_IS_i420_yuy2)
38 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
39 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
40 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
41 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
42 #    define DEST_FOURCC "YUY2,YUNV"
43 #endif
44
45 /*****************************************************************************
46  * Local and extern prototypes.
47  *****************************************************************************/
48 static int  Activate ( vlc_object_t * );
49
50 static void I420_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
51 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
52 static void I420_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
53 static void I420_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
54 static void I420_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
55 static void I420_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
56 #endif
57 #if defined (MODULE_NAME_IS_i420_yuy2)
58 static void I420_Y211           ( vout_thread_t *, picture_t *, picture_t * );
59 #endif
60
61 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
62 static uint64_t i_00ffw;
63 static uint64_t i_80w;
64 #endif
65
66 /*****************************************************************************
67  * Module descriptor.
68  *****************************************************************************/
69 vlc_module_begin();
70 #if defined (MODULE_NAME_IS_i420_yuy2)
71     set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
72     set_capability( "chroma", 80 );
73 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
74     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
75     set_capability( "chroma", 100 );
76     add_requirement( MMX );
77     /* Initialize MMX-specific constants */
78     i_00ffw = 0x00ff00ff00ff00ffULL;
79     i_80w   = 0x0000000080808080ULL;
80 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
81     set_description(
82             _("Altivec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
83     set_capability( "chroma", 100 );
84     add_requirement( ALTIVEC );
85 #endif
86     set_callbacks( Activate, NULL );
87 vlc_module_end();
88
89 /*****************************************************************************
90  * Activate: allocate a chroma function
91  *****************************************************************************
92  * This function allocates and initializes a chroma function
93  *****************************************************************************/
94 static int Activate( vlc_object_t *p_this )
95 {
96     vout_thread_t *p_vout = (vout_thread_t *)p_this;
97
98     if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
99     {
100         return -1;
101     }
102
103     switch( p_vout->render.i_chroma )
104     {
105         case VLC_FOURCC('Y','V','1','2'):
106         case VLC_FOURCC('I','4','2','0'):
107         case VLC_FOURCC('I','Y','U','V'):
108             switch( p_vout->output.i_chroma )
109             {
110                 case VLC_FOURCC('Y','U','Y','2'):
111                 case VLC_FOURCC('Y','U','N','V'):
112                     p_vout->chroma.pf_convert = I420_YUY2;
113                     break;
114
115 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
116                 case VLC_FOURCC('Y','V','Y','U'):
117                     p_vout->chroma.pf_convert = I420_YVYU;
118                     break;
119
120                 case VLC_FOURCC('U','Y','V','Y'):
121                 case VLC_FOURCC('U','Y','N','V'):
122                 case VLC_FOURCC('Y','4','2','2'):
123                     p_vout->chroma.pf_convert = I420_UYVY;
124                     break;
125
126                 case VLC_FOURCC('I','U','Y','V'):
127                     p_vout->chroma.pf_convert = I420_IUYV;
128                     break;
129
130                 case VLC_FOURCC('c','y','u','v'):
131                     p_vout->chroma.pf_convert = I420_cyuv;
132                     break;
133 #endif
134
135 #if defined (MODULE_NAME_IS_i420_yuy2)
136                 case VLC_FOURCC('Y','2','1','1'):
137                     p_vout->chroma.pf_convert = I420_Y211;
138                     break;
139 #endif
140
141                 default:
142                     return -1;
143             }
144             break;
145
146         default:
147             return -1;
148     }
149
150     return 0;
151 }
152
153 /* Following functions are local */
154
155 /*****************************************************************************
156  * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
157  *****************************************************************************/
158 static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
159                                               picture_t *p_dest )
160 {
161     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
162     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
163     uint8_t *p_u = p_source->U_PIXELS;
164     uint8_t *p_v = p_source->V_PIXELS;
165
166     int i_x, i_y;
167
168 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
169     const int i_source_margin = p_source->p->i_pitch
170                                  - p_source->p->i_visible_pitch;
171     const int i_dest_margin = p_dest->p->i_pitch
172                                - p_dest->p->i_visible_pitch;
173
174     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
175     {
176         p_line1 = p_line2;
177         p_line2 += p_dest->p->i_pitch;
178
179         p_y1 = p_y2;
180         p_y2 += p_source->p[Y_PLANE].i_pitch;
181
182         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
183         {
184 #if defined (MODULE_NAME_IS_i420_yuy2)
185             C_YUV420_YUYV( );
186             C_YUV420_YUYV( );
187             C_YUV420_YUYV( );
188             C_YUV420_YUYV( );
189 #else
190             MMX_CALL( MMX_YUV420_YUYV );
191 #endif
192         }
193
194         p_y1 += i_source_margin;
195         p_y2 += i_source_margin;
196         p_line1 += i_dest_margin;
197         p_line2 += i_dest_margin;
198     }
199 #else
200 #define VEC_NEXT_LINES( ) \
201     p_line1  = p_line2; \
202     p_line2 += p_dest->p->i_pitch; \
203     p_y1     = p_y2; \
204     p_y2    += p_source->p[Y_PLANE].i_pitch;
205
206 #define VEC_LOAD_UV( ) \
207     u_vec = vec_ld( 0, p_u ); p_u += 16; \
208     v_vec = vec_ld( 0, p_v ); p_v += 16;
209
210 #define VEC_MERGE( a ) \
211     uv_vec = a( u_vec, v_vec ); \
212     y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
213     vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
214     vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
215     y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
216     vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
217     vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
218
219     vector unsigned char u_vec;
220     vector unsigned char v_vec;
221     vector unsigned char uv_vec;
222     vector unsigned char y_vec;
223
224     if( !( p_vout->render.i_width % 32 ) )
225     {
226         /* Width is a multiple of 32, we take 2 lines at a time */
227         for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
228         {
229             VEC_NEXT_LINES( );
230             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
231             {
232                 VEC_LOAD_UV( );
233                 VEC_MERGE( vec_mergeh );
234                 VEC_MERGE( vec_mergel );
235             }
236         }
237     }
238     else
239     {
240         /* Width is only a multiple of 16, we take 4 lines at a time */
241         for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
242         {
243             /* Line 1 and 2, pixels 0 to ( width - 16 ) */
244             VEC_NEXT_LINES( );
245             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
246             {
247                 VEC_LOAD_UV( );
248                 VEC_MERGE( vec_mergeh );
249                 VEC_MERGE( vec_mergel );
250             }
251
252             /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
253             VEC_LOAD_UV( );
254             VEC_MERGE( vec_mergeh );
255
256             /* Line 3 and 4, pixels 0 to 16 */
257             VEC_NEXT_LINES( );
258             VEC_MERGE( vec_mergel );
259
260             /* Line 3 and 4, pixels 16 to ( width ) */
261             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
262             {
263                 VEC_LOAD_UV( );
264                 VEC_MERGE( vec_mergeh );
265                 VEC_MERGE( vec_mergel );
266             }
267         }
268     }
269 #undef VEC_NEXT_LINES
270 #undef VEC_LOAD_UV
271 #undef VEC_MERGE
272 #endif
273 }
274
275 /*****************************************************************************
276  * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
277  *****************************************************************************/
278 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
279 static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
280                                               picture_t *p_dest )
281 {
282     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
283     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
284     uint8_t *p_u = p_source->U_PIXELS;
285     uint8_t *p_v = p_source->V_PIXELS;
286
287     int i_x, i_y;
288
289     const int i_source_margin = p_source->p->i_pitch
290                                  - p_source->p->i_visible_pitch;
291     const int i_dest_margin = p_dest->p->i_pitch
292                                - p_dest->p->i_visible_pitch;
293
294     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
295     {
296         p_line1 = p_line2;
297         p_line2 += p_dest->p->i_pitch;
298
299         p_y1 = p_y2;
300         p_y2 += p_source->p[Y_PLANE].i_pitch;
301
302         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
303         {
304 #if defined (MODULE_NAME_IS_i420_yuy2)
305             C_YUV420_YVYU( );
306             C_YUV420_YVYU( );
307             C_YUV420_YVYU( );
308             C_YUV420_YVYU( );
309 #else
310             MMX_CALL( MMX_YUV420_YVYU );
311 #endif
312         }
313
314         p_y1 += i_source_margin;
315         p_y2 += i_source_margin;
316         p_line1 += i_dest_margin;
317         p_line2 += i_dest_margin;
318     }
319 }
320
321 /*****************************************************************************
322  * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
323  *****************************************************************************/
324 static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
325                                               picture_t *p_dest )
326 {
327     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
328     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
329     uint8_t *p_u = p_source->U_PIXELS;
330     uint8_t *p_v = p_source->V_PIXELS;
331
332     int i_x, i_y;
333
334     const int i_source_margin = p_source->p->i_pitch
335                                  - p_source->p->i_visible_pitch;
336     const int i_dest_margin = p_dest->p->i_pitch
337                                - p_dest->p->i_visible_pitch;
338
339     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
340     {
341         p_line1 = p_line2;
342         p_line2 += p_dest->p->i_pitch;
343
344         p_y1 = p_y2;
345         p_y2 += p_source->p[Y_PLANE].i_pitch;
346
347         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
348         {
349 #if defined (MODULE_NAME_IS_i420_yuy2)
350             C_YUV420_UYVY( );
351             C_YUV420_UYVY( );
352             C_YUV420_UYVY( );
353             C_YUV420_UYVY( );
354 #else
355             MMX_CALL( MMX_YUV420_UYVY );
356 #endif
357         }
358
359         p_y1 += i_source_margin;
360         p_y2 += i_source_margin;
361         p_line1 += i_dest_margin;
362         p_line2 += i_dest_margin;
363     }
364 }
365
366 /*****************************************************************************
367  * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
368  *****************************************************************************/
369 static void I420_IUYV( vout_thread_t *p_vout, picture_t *p_source,
370                                               picture_t *p_dest )
371 {
372     /* FIXME: TODO ! */
373     msg_Err( p_vout, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
374 }
375
376 /*****************************************************************************
377  * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
378  *****************************************************************************/
379 static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
380                                               picture_t *p_dest )
381 {
382     uint8_t *p_line1 = p_dest->p->p_pixels +
383                        p_dest->p->i_lines * p_dest->p->i_pitch
384                        + p_dest->p->i_pitch;
385     uint8_t *p_line2 = p_dest->p->p_pixels +
386                        p_dest->p->i_lines * p_dest->p->i_pitch;
387     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
388     uint8_t *p_u = p_source->U_PIXELS;
389     uint8_t *p_v = p_source->V_PIXELS;
390
391     int i_x, i_y;
392
393     const int i_source_margin = p_source->p->i_pitch
394                                  - p_source->p->i_visible_pitch;
395     const int i_dest_margin = p_dest->p->i_pitch
396                                - p_dest->p->i_visible_pitch;
397
398     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
399     {
400         p_line1 -= 3 * p_dest->p->i_pitch;
401         p_line2 -= 3 * p_dest->p->i_pitch;
402
403         p_y1 = p_y2;
404         p_y2 += p_source->p[Y_PLANE].i_pitch;
405
406         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
407         {
408 #if defined (MODULE_NAME_IS_i420_yuy2)
409             C_YUV420_UYVY( );
410             C_YUV420_UYVY( );
411             C_YUV420_UYVY( );
412             C_YUV420_UYVY( );
413 #else
414             MMX_CALL( MMX_YUV420_UYVY );
415 #endif
416         }
417
418         p_y1 += i_source_margin;
419         p_y2 += i_source_margin;
420         p_line1 += i_dest_margin;
421         p_line2 += i_dest_margin;
422     }
423 }
424 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
425
426 /*****************************************************************************
427  * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
428  *****************************************************************************/
429 #if defined (MODULE_NAME_IS_i420_yuy2)
430 static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source,
431                                               picture_t *p_dest )
432 {
433     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
434     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
435     uint8_t *p_u = p_source->U_PIXELS;
436     uint8_t *p_v = p_source->V_PIXELS;
437
438     int i_x, i_y;
439
440     const int i_source_margin = p_source->p->i_pitch
441                                  - p_source->p->i_visible_pitch;
442     const int i_dest_margin = p_dest->p->i_pitch
443                                - p_dest->p->i_visible_pitch;
444
445     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
446     {
447         p_line1 = p_line2;
448         p_line2 += p_dest->p->i_pitch;
449
450         p_y1 = p_y2;
451         p_y2 += p_source->p[Y_PLANE].i_pitch;
452
453         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
454         {
455             C_YUV420_Y211( );
456             C_YUV420_Y211( );
457         }
458
459         p_y1 += i_source_margin;
460         p_y2 += i_source_margin;
461         p_line1 += i_dest_margin;
462         p_line2 += i_dest_margin;
463     }
464 }
465 #endif
466