]> git.sesse.net Git - vlc/blob - modules/video_chroma/i420_yuy2.c
Use emms after I420_UYVY because the OpenGL vout uses floats in the same thread.
[vlc] / modules / video_chroma / i420_yuy2.c
1 /*****************************************************************************
2  * i420_yuy2.c : YUV to YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <string.h>                                            /* strerror() */
28 #include <stdlib.h>                                      /* malloc(), free() */
29
30 #include <vlc/vlc.h>
31 #include <vlc/vout.h>
32
33 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
34 #   include <altivec.h>
35 #endif
36
37 #include "i420_yuy2.h"
38
39 #define SRC_FOURCC  "I420,IYUV,YV12"
40
41 #if defined (MODULE_NAME_IS_i420_yuy2)
42 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
43 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
44 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
45 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
46 #    define DEST_FOURCC "YUY2,YUNV"
47 #endif
48
49 /*****************************************************************************
50  * Local and extern prototypes.
51  *****************************************************************************/
52 static int  Activate ( vlc_object_t * );
53
54 static void I420_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
55 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
56 static void I420_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
57 static void I420_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
58 static void I420_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
59 static void I420_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
60 #endif
61 #if defined (MODULE_NAME_IS_i420_yuy2)
62 static void I420_Y211           ( vout_thread_t *, picture_t *, picture_t * );
63 #endif
64
65 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
66 static uint64_t i_00ffw;
67 static uint64_t i_80w;
68 #endif
69
70 /*****************************************************************************
71  * Module descriptor.
72  *****************************************************************************/
73 vlc_module_begin();
74 #if defined (MODULE_NAME_IS_i420_yuy2)
75     set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
76     set_capability( "chroma", 80 );
77 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
78     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
79     set_capability( "chroma", 100 );
80     add_requirement( MMX );
81     /* Initialize MMX-specific constants */
82     i_00ffw = 0x00ff00ff00ff00ffULL;
83     i_80w   = 0x0000000080808080ULL;
84 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
85     set_description(
86             _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
87     set_capability( "chroma", 100 );
88     add_requirement( ALTIVEC );
89 #endif
90     set_callbacks( Activate, NULL );
91 vlc_module_end();
92
93 /*****************************************************************************
94  * Activate: allocate a chroma function
95  *****************************************************************************
96  * This function allocates and initializes a chroma function
97  *****************************************************************************/
98 static int Activate( vlc_object_t *p_this )
99 {
100     vout_thread_t *p_vout = (vout_thread_t *)p_this;
101
102     if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
103     {
104         return -1;
105     }
106
107     switch( p_vout->render.i_chroma )
108     {
109         case VLC_FOURCC('Y','V','1','2'):
110         case VLC_FOURCC('I','4','2','0'):
111         case VLC_FOURCC('I','Y','U','V'):
112             switch( p_vout->output.i_chroma )
113             {
114                 case VLC_FOURCC('Y','U','Y','2'):
115                 case VLC_FOURCC('Y','U','N','V'):
116                     p_vout->chroma.pf_convert = I420_YUY2;
117                     break;
118
119 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
120                 case VLC_FOURCC('Y','V','Y','U'):
121                     p_vout->chroma.pf_convert = I420_YVYU;
122                     break;
123
124                 case VLC_FOURCC('U','Y','V','Y'):
125                 case VLC_FOURCC('U','Y','N','V'):
126                 case VLC_FOURCC('Y','4','2','2'):
127                     p_vout->chroma.pf_convert = I420_UYVY;
128                     break;
129
130                 case VLC_FOURCC('I','U','Y','V'):
131                     p_vout->chroma.pf_convert = I420_IUYV;
132                     break;
133
134                 case VLC_FOURCC('c','y','u','v'):
135                     p_vout->chroma.pf_convert = I420_cyuv;
136                     break;
137 #endif
138
139 #if defined (MODULE_NAME_IS_i420_yuy2)
140                 case VLC_FOURCC('Y','2','1','1'):
141                     p_vout->chroma.pf_convert = I420_Y211;
142                     break;
143 #endif
144
145                 default:
146                     return -1;
147             }
148             break;
149
150         default:
151             return -1;
152     }
153
154     return 0;
155 }
156
157 /* Following functions are local */
158
159 /*****************************************************************************
160  * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
161  *****************************************************************************/
162 static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
163                                               picture_t *p_dest )
164 {
165     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
166     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
167     uint8_t *p_u = p_source->U_PIXELS;
168     uint8_t *p_v = p_source->V_PIXELS;
169
170     int i_x, i_y;
171
172 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
173 #define VEC_NEXT_LINES( ) \
174     p_line1  = p_line2; \
175     p_line2 += p_dest->p->i_pitch; \
176     p_y1     = p_y2; \
177     p_y2    += p_source->p[Y_PLANE].i_pitch;
178
179 #define VEC_LOAD_UV( ) \
180     u_vec = vec_ld( 0, p_u ); p_u += 16; \
181     v_vec = vec_ld( 0, p_v ); p_v += 16;
182
183 #define VEC_MERGE( a ) \
184     uv_vec = a( u_vec, v_vec ); \
185     y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
186     vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
187     vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
188     y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
189     vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
190     vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
191
192     vector unsigned char u_vec;
193     vector unsigned char v_vec;
194     vector unsigned char uv_vec;
195     vector unsigned char y_vec;
196
197     if( !( ( p_vout->render.i_width % 32 ) |
198            ( p_vout->render.i_height % 2 ) ) )
199     {
200         /* Width is a multiple of 32, we take 2 lines at a time */
201         for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
202         {
203             VEC_NEXT_LINES( );
204             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
205             {
206                 VEC_LOAD_UV( );
207                 VEC_MERGE( vec_mergeh );
208                 VEC_MERGE( vec_mergel );
209             }
210         }
211     }
212     else if( !( ( p_vout->render.i_width % 16 ) |
213                 ( p_vout->render.i_height % 4 ) ) )
214     {
215         /* Width is only a multiple of 16, we take 4 lines at a time */
216         for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
217         {
218             /* Line 1 and 2, pixels 0 to ( width - 16 ) */
219             VEC_NEXT_LINES( );
220             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
221             {
222                 VEC_LOAD_UV( );
223                 VEC_MERGE( vec_mergeh );
224                 VEC_MERGE( vec_mergel );
225             }
226
227             /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
228             VEC_LOAD_UV( );
229             VEC_MERGE( vec_mergeh );
230
231             /* Line 3 and 4, pixels 0 to 16 */
232             VEC_NEXT_LINES( );
233             VEC_MERGE( vec_mergel );
234
235             /* Line 3 and 4, pixels 16 to ( width ) */
236             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
237             {
238                 VEC_LOAD_UV( );
239                 VEC_MERGE( vec_mergeh );
240                 VEC_MERGE( vec_mergel );
241             }
242         }
243     }
244     else
245     {
246         /* Crap, use the C version */
247 #undef VEC_NEXT_LINES
248 #undef VEC_LOAD_UV
249 #undef VEC_MERGE
250 #endif
251
252     const int i_source_margin = p_source->p[0].i_pitch
253                                  - p_source->p[0].i_visible_pitch;
254     const int i_source_margin_c = p_source->p[1].i_pitch
255                                  - p_source->p[1].i_visible_pitch;
256     const int i_dest_margin = p_dest->p->i_pitch
257                                - p_dest->p->i_visible_pitch;
258
259     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
260     {
261         p_line1 = p_line2;
262         p_line2 += p_dest->p->i_pitch;
263
264         p_y1 = p_y2;
265         p_y2 += p_source->p[Y_PLANE].i_pitch;
266
267 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
268         for( i_x = p_vout->render.i_width / 2 ; i_x-- ; )
269         {
270             C_YUV420_YUYV( );
271         }
272 #else
273         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
274         {
275             MMX_CALL( MMX_YUV420_YUYV );
276         }
277         for( i_x = ( p_vout->render.i_width % 8 ) / 2; i_x-- ; )
278         {
279             C_YUV420_YUYV( );
280         }
281 #endif
282
283         p_y1 += i_source_margin;
284         p_y2 += i_source_margin;
285         p_u += i_source_margin_c;
286         p_v += i_source_margin_c;
287         p_line1 += i_dest_margin;
288         p_line2 += i_dest_margin;
289     }
290
291 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
292     }
293 #endif
294 }
295
296 /*****************************************************************************
297  * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
298  *****************************************************************************/
299 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
300 static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
301                                               picture_t *p_dest )
302 {
303     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
304     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
305     uint8_t *p_u = p_source->U_PIXELS;
306     uint8_t *p_v = p_source->V_PIXELS;
307
308     int i_x, i_y;
309
310     const int i_source_margin = p_source->p[0].i_pitch
311                                  - p_source->p[0].i_visible_pitch;
312     const int i_source_margin_c = p_source->p[1].i_pitch
313                                  - p_source->p[1].i_visible_pitch;
314     const int i_dest_margin = p_dest->p->i_pitch
315                                - p_dest->p->i_visible_pitch;
316
317     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
318     {
319         p_line1 = p_line2;
320         p_line2 += p_dest->p->i_pitch;
321
322         p_y1 = p_y2;
323         p_y2 += p_source->p[Y_PLANE].i_pitch;
324
325         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
326         {
327 #if defined (MODULE_NAME_IS_i420_yuy2)
328             C_YUV420_YVYU( );
329             C_YUV420_YVYU( );
330             C_YUV420_YVYU( );
331             C_YUV420_YVYU( );
332 #else
333             MMX_CALL( MMX_YUV420_YVYU );
334 #endif
335         }
336
337         p_y1 += i_source_margin;
338         p_y2 += i_source_margin;
339         p_u += i_source_margin_c;
340         p_v += i_source_margin_c;
341         p_line1 += i_dest_margin;
342         p_line2 += i_dest_margin;
343     }
344 }
345
346 /*****************************************************************************
347  * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
348  *****************************************************************************/
349 static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
350                                               picture_t *p_dest )
351 {
352     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
353     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
354     uint8_t *p_u = p_source->U_PIXELS;
355     uint8_t *p_v = p_source->V_PIXELS;
356
357     int i_x, i_y;
358
359     const int i_source_margin = p_source->p[0].i_pitch
360                                  - p_source->p[0].i_visible_pitch;
361     const int i_source_margin_c = p_source->p[1].i_pitch
362                                  - p_source->p[1].i_visible_pitch;
363     const int i_dest_margin = p_dest->p->i_pitch
364                                - p_dest->p->i_visible_pitch;
365
366     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
367     {
368         p_line1 = p_line2;
369         p_line2 += p_dest->p->i_pitch;
370
371         p_y1 = p_y2;
372         p_y2 += p_source->p[Y_PLANE].i_pitch;
373
374         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
375         {
376 #if defined (MODULE_NAME_IS_i420_yuy2)
377             C_YUV420_UYVY( );
378             C_YUV420_UYVY( );
379             C_YUV420_UYVY( );
380             C_YUV420_UYVY( );
381 #else
382             MMX_CALL( MMX_YUV420_UYVY );
383 #endif
384         }
385
386         p_y1 += i_source_margin;
387         p_y2 += i_source_margin;
388         p_u += i_source_margin_c;
389         p_v += i_source_margin_c;
390         p_line1 += i_dest_margin;
391         p_line2 += i_dest_margin;
392     }
393
394 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
395     __asm__ __volatile__("emms" :: );
396 #endif
397 }
398
399 /*****************************************************************************
400  * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
401  *****************************************************************************/
402 static void I420_IUYV( vout_thread_t *p_vout, picture_t *p_source,
403                                               picture_t *p_dest )
404 {
405     /* FIXME: TODO ! */
406     msg_Err( p_vout, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
407 }
408
409 /*****************************************************************************
410  * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
411  *****************************************************************************/
412 static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
413                                               picture_t *p_dest )
414 {
415     uint8_t *p_line1 = p_dest->p->p_pixels +
416                        p_dest->p->i_visible_lines * p_dest->p->i_pitch
417                        + p_dest->p->i_pitch;
418     uint8_t *p_line2 = p_dest->p->p_pixels +
419                        p_dest->p->i_visible_lines * p_dest->p->i_pitch;
420     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
421     uint8_t *p_u = p_source->U_PIXELS;
422     uint8_t *p_v = p_source->V_PIXELS;
423
424     int i_x, i_y;
425
426     const int i_source_margin = p_source->p[0].i_pitch
427                                  - p_source->p[0].i_visible_pitch;
428     const int i_source_margin_c = p_source->p[1].i_pitch
429                                  - p_source->p[1].i_visible_pitch;
430     const int i_dest_margin = p_dest->p->i_pitch
431                                - p_dest->p->i_visible_pitch;
432
433     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
434     {
435         p_line1 -= 3 * p_dest->p->i_pitch;
436         p_line2 -= 3 * p_dest->p->i_pitch;
437
438         p_y1 = p_y2;
439         p_y2 += p_source->p[Y_PLANE].i_pitch;
440
441         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
442         {
443 #if defined (MODULE_NAME_IS_i420_yuy2)
444             C_YUV420_UYVY( );
445             C_YUV420_UYVY( );
446             C_YUV420_UYVY( );
447             C_YUV420_UYVY( );
448 #else
449             MMX_CALL( MMX_YUV420_UYVY );
450 #endif
451         }
452
453         p_y1 += i_source_margin;
454         p_y2 += i_source_margin;
455         p_u += i_source_margin_c;
456         p_v += i_source_margin_c;
457         p_line1 += i_dest_margin;
458         p_line2 += i_dest_margin;
459     }
460 }
461 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
462
463 /*****************************************************************************
464  * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
465  *****************************************************************************/
466 #if defined (MODULE_NAME_IS_i420_yuy2)
467 static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source,
468                                               picture_t *p_dest )
469 {
470     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
471     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
472     uint8_t *p_u = p_source->U_PIXELS;
473     uint8_t *p_v = p_source->V_PIXELS;
474
475     int i_x, i_y;
476
477     const int i_source_margin = p_source->p[0].i_pitch
478                                  - p_source->p[0].i_visible_pitch;
479     const int i_source_margin_c = p_source->p[1].i_pitch
480                                  - p_source->p[1].i_visible_pitch;
481     const int i_dest_margin = p_dest->p->i_pitch
482                                - p_dest->p->i_visible_pitch;
483
484     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
485     {
486         p_line1 = p_line2;
487         p_line2 += p_dest->p->i_pitch;
488
489         p_y1 = p_y2;
490         p_y2 += p_source->p[Y_PLANE].i_pitch;
491
492         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
493         {
494             C_YUV420_Y211( );
495             C_YUV420_Y211( );
496         }
497
498         p_y1 += i_source_margin;
499         p_y2 += i_source_margin;
500         p_u += i_source_margin_c;
501         p_v += i_source_margin_c;
502         p_line1 += i_dest_margin;
503         p_line2 += i_dest_margin;
504     }
505 }
506 #endif
507