]> git.sesse.net Git - vlc/blob - modules/video_chroma/i420_yuy2.c
i420_yuy2.c: fixed AltiVec conversion for unusual image sizes
[vlc] / modules / video_chroma / i420_yuy2.c
1 /*****************************************************************************
2  * i420_yuy2.c : YUV to YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2000, 2001 VideoLAN
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #include <string.h>                                            /* strerror() */
28 #include <stdlib.h>                                      /* malloc(), free() */
29
30 #include <vlc/vlc.h>
31 #include <vlc/vout.h>
32
33 #include "i420_yuy2.h"
34
35 #define SRC_FOURCC  "I420,IYUV,YV12"
36
37 #if defined (MODULE_NAME_IS_i420_yuy2)
38 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
39 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
40 #    define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
41 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
42 #    define DEST_FOURCC "YUY2,YUNV"
43 #endif
44
45 /*****************************************************************************
46  * Local and extern prototypes.
47  *****************************************************************************/
48 static int  Activate ( vlc_object_t * );
49
50 static void I420_YUY2           ( vout_thread_t *, picture_t *, picture_t * );
51 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
52 static void I420_YVYU           ( vout_thread_t *, picture_t *, picture_t * );
53 static void I420_UYVY           ( vout_thread_t *, picture_t *, picture_t * );
54 static void I420_IUYV           ( vout_thread_t *, picture_t *, picture_t * );
55 static void I420_cyuv           ( vout_thread_t *, picture_t *, picture_t * );
56 #endif
57 #if defined (MODULE_NAME_IS_i420_yuy2)
58 static void I420_Y211           ( vout_thread_t *, picture_t *, picture_t * );
59 #endif
60
61 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
62 static uint64_t i_00ffw;
63 static uint64_t i_80w;
64 #endif
65
66 /*****************************************************************************
67  * Module descriptor.
68  *****************************************************************************/
69 vlc_module_begin();
70 #if defined (MODULE_NAME_IS_i420_yuy2)
71     set_description( _("Conversions from " SRC_FOURCC " to " DEST_FOURCC) );
72     set_capability( "chroma", 80 );
73 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
74     set_description( _("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) );
75     set_capability( "chroma", 100 );
76     add_requirement( MMX );
77     /* Initialize MMX-specific constants */
78     i_00ffw = 0x00ff00ff00ff00ffULL;
79     i_80w   = 0x0000000080808080ULL;
80 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
81     set_description(
82             _("Altivec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
83     set_capability( "chroma", 100 );
84     add_requirement( ALTIVEC );
85 #endif
86     set_callbacks( Activate, NULL );
87 vlc_module_end();
88
89 /*****************************************************************************
90  * Activate: allocate a chroma function
91  *****************************************************************************
92  * This function allocates and initializes a chroma function
93  *****************************************************************************/
94 static int Activate( vlc_object_t *p_this )
95 {
96     vout_thread_t *p_vout = (vout_thread_t *)p_this;
97
98     if( p_vout->render.i_width & 1 || p_vout->render.i_height & 1 )
99     {
100         return -1;
101     }
102
103     switch( p_vout->render.i_chroma )
104     {
105         case VLC_FOURCC('Y','V','1','2'):
106         case VLC_FOURCC('I','4','2','0'):
107         case VLC_FOURCC('I','Y','U','V'):
108             switch( p_vout->output.i_chroma )
109             {
110                 case VLC_FOURCC('Y','U','Y','2'):
111                 case VLC_FOURCC('Y','U','N','V'):
112                     p_vout->chroma.pf_convert = I420_YUY2;
113                     break;
114
115 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
116                 case VLC_FOURCC('Y','V','Y','U'):
117                     p_vout->chroma.pf_convert = I420_YVYU;
118                     break;
119
120                 case VLC_FOURCC('U','Y','V','Y'):
121                 case VLC_FOURCC('U','Y','N','V'):
122                 case VLC_FOURCC('Y','4','2','2'):
123                     p_vout->chroma.pf_convert = I420_UYVY;
124                     break;
125
126                 case VLC_FOURCC('I','U','Y','V'):
127                     p_vout->chroma.pf_convert = I420_IUYV;
128                     break;
129
130                 case VLC_FOURCC('c','y','u','v'):
131                     p_vout->chroma.pf_convert = I420_cyuv;
132                     break;
133 #endif
134
135 #if defined (MODULE_NAME_IS_i420_yuy2)
136                 case VLC_FOURCC('Y','2','1','1'):
137                     p_vout->chroma.pf_convert = I420_Y211;
138                     break;
139 #endif
140
141                 default:
142                     return -1;
143             }
144             break;
145
146         default:
147             return -1;
148     }
149
150     return 0;
151 }
152
153 /* Following functions are local */
154
155 /*****************************************************************************
156  * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
157  *****************************************************************************/
158 static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
159                                               picture_t *p_dest )
160 {
161     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
162     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
163     uint8_t *p_u = p_source->U_PIXELS;
164     uint8_t *p_v = p_source->V_PIXELS;
165
166     int i_x, i_y;
167
168 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
169 #define VEC_NEXT_LINES( ) \
170     p_line1  = p_line2; \
171     p_line2 += p_dest->p->i_pitch; \
172     p_y1     = p_y2; \
173     p_y2    += p_source->p[Y_PLANE].i_pitch;
174
175 #define VEC_LOAD_UV( ) \
176     u_vec = vec_ld( 0, p_u ); p_u += 16; \
177     v_vec = vec_ld( 0, p_v ); p_v += 16;
178
179 #define VEC_MERGE( a ) \
180     uv_vec = a( u_vec, v_vec ); \
181     y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
182     vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
183     vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
184     y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
185     vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
186     vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
187
188     vector unsigned char u_vec;
189     vector unsigned char v_vec;
190     vector unsigned char uv_vec;
191     vector unsigned char y_vec;
192
193     if( !( ( p_vout->render.i_width % 32 ) |
194            ( p_vout->render.i_height % 2 ) ) )
195     {
196         /* Width is a multiple of 32, we take 2 lines at a time */
197         for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
198         {
199             VEC_NEXT_LINES( );
200             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
201             {
202                 VEC_LOAD_UV( );
203                 VEC_MERGE( vec_mergeh );
204                 VEC_MERGE( vec_mergel );
205             }
206         }
207     }
208     else if( !( ( p_vout->render.i_width % 16 ) |
209                 ( p_vout->render.i_height % 4 ) ) )
210     {
211         /* Width is only a multiple of 16, we take 4 lines at a time */
212         for( i_y = p_vout->render.i_height / 4 ; i_y-- ; )
213         {
214             /* Line 1 and 2, pixels 0 to ( width - 16 ) */
215             VEC_NEXT_LINES( );
216             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
217             {
218                 VEC_LOAD_UV( );
219                 VEC_MERGE( vec_mergeh );
220                 VEC_MERGE( vec_mergel );
221             }
222
223             /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
224             VEC_LOAD_UV( );
225             VEC_MERGE( vec_mergeh );
226
227             /* Line 3 and 4, pixels 0 to 16 */
228             VEC_NEXT_LINES( );
229             VEC_MERGE( vec_mergel );
230
231             /* Line 3 and 4, pixels 16 to ( width ) */
232             for( i_x = p_vout->render.i_width / 32 ; i_x-- ; )
233             {
234                 VEC_LOAD_UV( );
235                 VEC_MERGE( vec_mergeh );
236                 VEC_MERGE( vec_mergel );
237             }
238         }
239     }
240     else
241     {
242         /* Crap, use the C version */
243 #undef VEC_NEXT_LINES
244 #undef VEC_LOAD_UV
245 #undef VEC_MERGE
246 #endif
247
248     const int i_source_margin = p_source->p->i_pitch
249                                  - p_source->p->i_visible_pitch;
250     const int i_dest_margin = p_dest->p->i_pitch
251                                - p_dest->p->i_visible_pitch;
252
253     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
254     {
255         p_line1 = p_line2;
256         p_line2 += p_dest->p->i_pitch;
257
258         p_y1 = p_y2;
259         p_y2 += p_source->p[Y_PLANE].i_pitch;
260
261         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
262         {
263 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
264             C_YUV420_YUYV( );
265             C_YUV420_YUYV( );
266             C_YUV420_YUYV( );
267             C_YUV420_YUYV( );
268 #else
269             MMX_CALL( MMX_YUV420_YUYV );
270 #endif
271         }
272
273         p_y1 += i_source_margin;
274         p_y2 += i_source_margin;
275         p_line1 += i_dest_margin;
276         p_line2 += i_dest_margin;
277     }
278
279 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
280     }
281 #endif
282 }
283
284 /*****************************************************************************
285  * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
286  *****************************************************************************/
287 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
288 static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
289                                               picture_t *p_dest )
290 {
291     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
292     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
293     uint8_t *p_u = p_source->U_PIXELS;
294     uint8_t *p_v = p_source->V_PIXELS;
295
296     int i_x, i_y;
297
298     const int i_source_margin = p_source->p->i_pitch
299                                  - p_source->p->i_visible_pitch;
300     const int i_dest_margin = p_dest->p->i_pitch
301                                - p_dest->p->i_visible_pitch;
302
303     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
304     {
305         p_line1 = p_line2;
306         p_line2 += p_dest->p->i_pitch;
307
308         p_y1 = p_y2;
309         p_y2 += p_source->p[Y_PLANE].i_pitch;
310
311         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
312         {
313 #if defined (MODULE_NAME_IS_i420_yuy2)
314             C_YUV420_YVYU( );
315             C_YUV420_YVYU( );
316             C_YUV420_YVYU( );
317             C_YUV420_YVYU( );
318 #else
319             MMX_CALL( MMX_YUV420_YVYU );
320 #endif
321         }
322
323         p_y1 += i_source_margin;
324         p_y2 += i_source_margin;
325         p_line1 += i_dest_margin;
326         p_line2 += i_dest_margin;
327     }
328 }
329
330 /*****************************************************************************
331  * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
332  *****************************************************************************/
333 static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
334                                               picture_t *p_dest )
335 {
336     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
337     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
338     uint8_t *p_u = p_source->U_PIXELS;
339     uint8_t *p_v = p_source->V_PIXELS;
340
341     int i_x, i_y;
342
343     const int i_source_margin = p_source->p->i_pitch
344                                  - p_source->p->i_visible_pitch;
345     const int i_dest_margin = p_dest->p->i_pitch
346                                - p_dest->p->i_visible_pitch;
347
348     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
349     {
350         p_line1 = p_line2;
351         p_line2 += p_dest->p->i_pitch;
352
353         p_y1 = p_y2;
354         p_y2 += p_source->p[Y_PLANE].i_pitch;
355
356         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
357         {
358 #if defined (MODULE_NAME_IS_i420_yuy2)
359             C_YUV420_UYVY( );
360             C_YUV420_UYVY( );
361             C_YUV420_UYVY( );
362             C_YUV420_UYVY( );
363 #else
364             MMX_CALL( MMX_YUV420_UYVY );
365 #endif
366         }
367
368         p_y1 += i_source_margin;
369         p_y2 += i_source_margin;
370         p_line1 += i_dest_margin;
371         p_line2 += i_dest_margin;
372     }
373 }
374
375 /*****************************************************************************
376  * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
377  *****************************************************************************/
378 static void I420_IUYV( vout_thread_t *p_vout, picture_t *p_source,
379                                               picture_t *p_dest )
380 {
381     /* FIXME: TODO ! */
382     msg_Err( p_vout, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
383 }
384
385 /*****************************************************************************
386  * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
387  *****************************************************************************/
388 static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
389                                               picture_t *p_dest )
390 {
391     uint8_t *p_line1 = p_dest->p->p_pixels +
392                        p_dest->p->i_lines * p_dest->p->i_pitch
393                        + p_dest->p->i_pitch;
394     uint8_t *p_line2 = p_dest->p->p_pixels +
395                        p_dest->p->i_lines * p_dest->p->i_pitch;
396     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
397     uint8_t *p_u = p_source->U_PIXELS;
398     uint8_t *p_v = p_source->V_PIXELS;
399
400     int i_x, i_y;
401
402     const int i_source_margin = p_source->p->i_pitch
403                                  - p_source->p->i_visible_pitch;
404     const int i_dest_margin = p_dest->p->i_pitch
405                                - p_dest->p->i_visible_pitch;
406
407     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
408     {
409         p_line1 -= 3 * p_dest->p->i_pitch;
410         p_line2 -= 3 * p_dest->p->i_pitch;
411
412         p_y1 = p_y2;
413         p_y2 += p_source->p[Y_PLANE].i_pitch;
414
415         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
416         {
417 #if defined (MODULE_NAME_IS_i420_yuy2)
418             C_YUV420_UYVY( );
419             C_YUV420_UYVY( );
420             C_YUV420_UYVY( );
421             C_YUV420_UYVY( );
422 #else
423             MMX_CALL( MMX_YUV420_UYVY );
424 #endif
425         }
426
427         p_y1 += i_source_margin;
428         p_y2 += i_source_margin;
429         p_line1 += i_dest_margin;
430         p_line2 += i_dest_margin;
431     }
432 }
433 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
434
435 /*****************************************************************************
436  * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
437  *****************************************************************************/
438 #if defined (MODULE_NAME_IS_i420_yuy2)
439 static void I420_Y211( vout_thread_t *p_vout, picture_t *p_source,
440                                               picture_t *p_dest )
441 {
442     uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
443     uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
444     uint8_t *p_u = p_source->U_PIXELS;
445     uint8_t *p_v = p_source->V_PIXELS;
446
447     int i_x, i_y;
448
449     const int i_source_margin = p_source->p->i_pitch
450                                  - p_source->p->i_visible_pitch;
451     const int i_dest_margin = p_dest->p->i_pitch
452                                - p_dest->p->i_visible_pitch;
453
454     for( i_y = p_vout->render.i_height / 2 ; i_y-- ; )
455     {
456         p_line1 = p_line2;
457         p_line2 += p_dest->p->i_pitch;
458
459         p_y1 = p_y2;
460         p_y2 += p_source->p[Y_PLANE].i_pitch;
461
462         for( i_x = p_vout->render.i_width / 8 ; i_x-- ; )
463         {
464             C_YUV420_Y211( );
465             C_YUV420_Y211( );
466         }
467
468         p_y1 += i_source_margin;
469         p_y2 += i_source_margin;
470         p_line1 += i_dest_margin;
471         p_line2 += i_dest_margin;
472     }
473 }
474 #endif
475