1 /*****************************************************************************
2 * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
28 #include <string.h> /* strerror() */
29 #include <stdlib.h> /* malloc(), free() */
35 #if defined (MODULE_NAME_IS_i420_rgb)
36 # include "i420_rgb_c.h"
37 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
38 # include "i420_rgb_mmx.h"
39 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
40 # include "i420_rgb_mmx.h"
43 static void SetOffset( int, int, int, int, vlc_bool_t *,
44 unsigned int *, int * );
46 #if defined (MODULE_NAME_IS_i420_rgb)
47 /*****************************************************************************
48 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
49 *****************************************************************************
50 * Horizontal alignment needed:
51 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
52 * - output: 1 pixel (2 bytes), margins allowed
53 * Vertical alignment needed:
54 * - input: 2 lines (2 Y lines, 1 U/V line)
56 *****************************************************************************/
57 void E_(I420_RGB16_dither)( vout_thread_t *p_vout, picture_t *p_src,
60 /* We got this one from the old arguments */
61 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
62 uint8_t *p_y = p_src->Y_PIXELS;
63 uint8_t *p_u = p_src->U_PIXELS;
64 uint8_t *p_v = p_src->V_PIXELS;
66 vlc_bool_t b_hscale; /* horizontal scaling type */
67 unsigned int i_vscale; /* vertical scaling type */
68 unsigned int i_x, i_y; /* horizontal and vertical indexes */
69 unsigned int i_real_y; /* y % 4 */
73 int i_scale_count; /* scale modulo counter */
74 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
75 uint16_t * p_pic_start; /* beginning of the current line for copy */
76 int i_uval, i_vval; /* U and V samples */
77 int i_red, i_green, i_blue; /* U and V modified samples */
78 uint16_t * p_yuv = p_vout->chroma.p_sys->p_rgb16;
79 uint16_t * p_ybase; /* Y dependant conversion table */
81 /* Conversion buffer pointer */
82 uint16_t * p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
85 /* Offset array pointer */
86 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
89 const int i_source_margin = p_src->p[0].i_pitch
90 - p_src->p[0].i_visible_pitch;
91 const int i_source_margin_c = p_src->p[1].i_pitch
92 - p_src->p[1].i_visible_pitch;
94 /* The dithering matrices */
95 int dither10[4] = { 0x0, 0x8, 0x2, 0xa };
96 int dither11[4] = { 0xc, 0x4, 0xe, 0x6 };
97 int dither12[4] = { 0x3, 0xb, 0x1, 0x9 };
98 int dither13[4] = { 0xf, 0x7, 0xd, 0x5 };
100 for(i_x = 0; i_x < 4; i_x++)
102 dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
103 dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
104 dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
105 dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_vout->output.i_rrshift);
108 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
110 if( p_vout->render.i_width & 7 )
112 i_rewind = 8 - ( p_vout->render.i_width & 7 );
119 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
120 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
121 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
122 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
123 p_vout->output.i_width, p_vout->output.i_height,
124 &b_hscale, &i_vscale, p_offset_start );
129 i_scale_count = ( i_vscale == 1 ) ?
130 p_vout->output.i_height : p_vout->render.i_height;
131 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
133 i_real_y = i_y & 0x3;
135 p_buffer = b_hscale ? p_buffer_start : p_pic;
137 for ( i_x = p_vout->render.i_width / 8; i_x--; )
139 int *p_dither = dither10;
140 CONVERT_YUV_PIXEL_DITHER(2);
142 CONVERT_Y_PIXEL_DITHER(2);
144 CONVERT_YUV_PIXEL_DITHER(2);
146 CONVERT_Y_PIXEL_DITHER(2);
148 CONVERT_YUV_PIXEL_DITHER(2);
150 CONVERT_Y_PIXEL_DITHER(2);
152 CONVERT_YUV_PIXEL_DITHER(2);
154 CONVERT_Y_PIXEL_DITHER(2);
157 /* Here we do some unaligned reads and duplicate conversions, but
158 * at least we have all the pixels */
161 int *p_dither = dither10;
163 p_u -= i_rewind >> 1;
164 p_v -= i_rewind >> 1;
165 p_buffer -= i_rewind;
166 CONVERT_YUV_PIXEL_DITHER(2);
168 CONVERT_Y_PIXEL_DITHER(2);
170 CONVERT_YUV_PIXEL_DITHER(2);
172 CONVERT_Y_PIXEL_DITHER(2);
174 CONVERT_YUV_PIXEL_DITHER(2);
176 CONVERT_Y_PIXEL_DITHER(2);
178 CONVERT_YUV_PIXEL_DITHER(2);
180 CONVERT_Y_PIXEL_DITHER(2);
183 SCALE_HEIGHT( 420, 2 );
185 p_y += i_source_margin;
188 p_u += i_source_margin_c;
189 p_v += i_source_margin_c;
195 /*****************************************************************************
196 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
197 *****************************************************************************
198 * Horizontal alignment needed:
199 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
200 * - output: 1 pixel (2 bytes), margins allowed
201 * Vertical alignment needed:
202 * - input: 2 lines (2 Y lines, 1 U/V line)
204 *****************************************************************************/
206 #if defined (MODULE_NAME_IS_i420_rgb)
208 void E_(I420_RGB16)( vout_thread_t *p_vout, picture_t *p_src,
211 /* We got this one from the old arguments */
212 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
213 uint8_t *p_y = p_src->Y_PIXELS;
214 uint8_t *p_u = p_src->U_PIXELS;
215 uint8_t *p_v = p_src->V_PIXELS;
217 vlc_bool_t b_hscale; /* horizontal scaling type */
218 unsigned int i_vscale; /* vertical scaling type */
219 unsigned int i_x, i_y; /* horizontal and vertical indexes */
223 int i_scale_count; /* scale modulo counter */
224 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
225 uint16_t * p_pic_start; /* beginning of the current line for copy */
226 int i_uval, i_vval; /* U and V samples */
227 int i_red, i_green, i_blue; /* U and V modified samples */
228 uint16_t * p_yuv = p_vout->chroma.p_sys->p_rgb16;
229 uint16_t * p_ybase; /* Y dependant conversion table */
231 /* Conversion buffer pointer */
232 uint16_t * p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
235 /* Offset array pointer */
236 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
239 const int i_source_margin = p_src->p[0].i_pitch
240 - p_src->p[0].i_visible_pitch;
241 const int i_source_margin_c = p_src->p[1].i_pitch
242 - p_src->p[1].i_visible_pitch;
244 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
246 if( p_vout->render.i_width & 7 )
248 i_rewind = 8 - ( p_vout->render.i_width & 7 );
255 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
256 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
257 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
258 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
259 p_vout->output.i_width, p_vout->output.i_height,
260 &b_hscale, &i_vscale, p_offset_start );
265 i_scale_count = ( i_vscale == 1 ) ?
266 p_vout->output.i_height : p_vout->render.i_height;
267 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
270 p_buffer = b_hscale ? p_buffer_start : p_pic;
272 for ( i_x = p_vout->render.i_width / 8; i_x--; )
274 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
275 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
276 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
277 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
280 /* Here we do some unaligned reads and duplicate conversions, but
281 * at least we have all the pixels */
285 p_u -= i_rewind >> 1;
286 p_v -= i_rewind >> 1;
287 p_buffer -= i_rewind;
289 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
290 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
291 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
292 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
295 SCALE_HEIGHT( 420, 2 );
297 p_y += i_source_margin;
300 p_u += i_source_margin_c;
301 p_v += i_source_margin_c;
306 #else // ! defined (MODULE_NAME_IS_i420_rgb)
308 void E_(I420_R5G5B5)( vout_thread_t *p_vout, picture_t *p_src,
311 /* We got this one from the old arguments */
312 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
313 uint8_t *p_y = p_src->Y_PIXELS;
314 uint8_t *p_u = p_src->U_PIXELS;
315 uint8_t *p_v = p_src->V_PIXELS;
317 vlc_bool_t b_hscale; /* horizontal scaling type */
318 unsigned int i_vscale; /* vertical scaling type */
319 unsigned int i_x, i_y; /* horizontal and vertical indexes */
323 int i_scale_count; /* scale modulo counter */
324 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
325 uint16_t * p_pic_start; /* beginning of the current line for copy */
327 /* Conversion buffer pointer */
328 uint16_t * p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
331 /* Offset array pointer */
332 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
335 const int i_source_margin = p_src->p[0].i_pitch
336 - p_src->p[0].i_visible_pitch;
337 const int i_source_margin_c = p_src->p[1].i_pitch
338 - p_src->p[1].i_visible_pitch;
340 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
342 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
343 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
344 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
345 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
346 p_vout->output.i_width, p_vout->output.i_height,
347 &b_hscale, &i_vscale, p_offset_start );
353 i_scale_count = ( i_vscale == 1 ) ?
354 p_vout->output.i_height : p_vout->render.i_height;
356 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
358 if( p_vout->render.i_width & 15 )
360 i_rewind = 16 - ( p_vout->render.i_width & 15 );
368 ** SSE2 128 bits fetch/store instructions are faster
369 ** if memory access is 16 bytes aligned
372 p_buffer = b_hscale ? p_buffer_start : p_pic;
373 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
378 /* use faster SSE2 aligned fetch and store */
379 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
383 for ( i_x = p_vout->render.i_width/16; i_x--; )
389 SSE2_UNPACK_15_ALIGNED
396 /* Here we do some unaligned reads and duplicate conversions, but
397 * at least we have all the pixels */
401 p_u -= i_rewind >> 1;
402 p_v -= i_rewind >> 1;
403 p_buffer -= i_rewind;
406 SSE2_INIT_16_UNALIGNED
409 SSE2_UNPACK_15_UNALIGNED
416 SCALE_HEIGHT( 420, 2 );
418 p_y += i_source_margin;
421 p_u += i_source_margin_c;
422 p_v += i_source_margin_c;
424 p_buffer = b_hscale ? p_buffer_start : p_pic;
429 /* use slower SSE2 unaligned fetch and store */
430 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
433 p_buffer = b_hscale ? p_buffer_start : p_pic;
435 for ( i_x = p_vout->render.i_width/16; i_x--; )
438 SSE2_INIT_16_UNALIGNED
441 SSE2_UNPACK_15_UNALIGNED
448 /* Here we do some unaligned reads and duplicate conversions, but
449 * at least we have all the pixels */
453 p_u -= i_rewind >> 1;
454 p_v -= i_rewind >> 1;
455 p_buffer -= i_rewind;
458 SSE2_INIT_16_UNALIGNED
461 SSE2_UNPACK_15_UNALIGNED
468 SCALE_HEIGHT( 420, 2 );
470 p_y += i_source_margin;
473 p_u += i_source_margin_c;
474 p_v += i_source_margin_c;
476 p_buffer = b_hscale ? p_buffer_start : p_pic;
480 /* make sure all SSE2 stores are visible thereafter */
483 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
485 if( p_vout->render.i_width & 7 )
487 i_rewind = 8 - ( p_vout->render.i_width & 7 );
494 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
497 p_buffer = b_hscale ? p_buffer_start : p_pic;
499 for ( i_x = p_vout->render.i_width / 8; i_x--; )
513 /* Here we do some unaligned reads and duplicate conversions, but
514 * at least we have all the pixels */
518 p_u -= i_rewind >> 1;
519 p_v -= i_rewind >> 1;
520 p_buffer -= i_rewind;
534 SCALE_HEIGHT( 420, 2 );
536 p_y += i_source_margin;
539 p_u += i_source_margin_c;
540 p_v += i_source_margin_c;
543 /* re-enable FPU registers */
549 void E_(I420_R5G6B5)( vout_thread_t *p_vout, picture_t *p_src,
552 /* We got this one from the old arguments */
553 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
554 uint8_t *p_y = p_src->Y_PIXELS;
555 uint8_t *p_u = p_src->U_PIXELS;
556 uint8_t *p_v = p_src->V_PIXELS;
558 vlc_bool_t b_hscale; /* horizontal scaling type */
559 unsigned int i_vscale; /* vertical scaling type */
560 unsigned int i_x, i_y; /* horizontal and vertical indexes */
564 int i_scale_count; /* scale modulo counter */
565 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
566 uint16_t * p_pic_start; /* beginning of the current line for copy */
568 /* Conversion buffer pointer */
569 uint16_t * p_buffer_start = (uint16_t*)p_vout->chroma.p_sys->p_buffer;
572 /* Offset array pointer */
573 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
576 const int i_source_margin = p_src->p[0].i_pitch
577 - p_src->p[0].i_visible_pitch;
578 const int i_source_margin_c = p_src->p[1].i_pitch
579 - p_src->p[1].i_visible_pitch;
581 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
583 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
584 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
585 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
586 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
587 p_vout->output.i_width, p_vout->output.i_height,
588 &b_hscale, &i_vscale, p_offset_start );
594 i_scale_count = ( i_vscale == 1 ) ?
595 p_vout->output.i_height : p_vout->render.i_height;
597 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
599 if( p_vout->render.i_width & 15 )
601 i_rewind = 16 - ( p_vout->render.i_width & 15 );
609 ** SSE2 128 bits fetch/store instructions are faster
610 ** if memory access is 16 bytes aligned
613 p_buffer = b_hscale ? p_buffer_start : p_pic;
614 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
619 /* use faster SSE2 aligned fetch and store */
620 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
624 for ( i_x = p_vout->render.i_width/16; i_x--; )
630 SSE2_UNPACK_16_ALIGNED
637 /* Here we do some unaligned reads and duplicate conversions, but
638 * at least we have all the pixels */
642 p_u -= i_rewind >> 1;
643 p_v -= i_rewind >> 1;
644 p_buffer -= i_rewind;
647 SSE2_INIT_16_UNALIGNED
650 SSE2_UNPACK_16_UNALIGNED
657 SCALE_HEIGHT( 420, 2 );
659 p_y += i_source_margin;
662 p_u += i_source_margin_c;
663 p_v += i_source_margin_c;
665 p_buffer = b_hscale ? p_buffer_start : p_pic;
670 /* use slower SSE2 unaligned fetch and store */
671 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
674 p_buffer = b_hscale ? p_buffer_start : p_pic;
676 for ( i_x = p_vout->render.i_width/16; i_x--; )
679 SSE2_INIT_16_UNALIGNED
682 SSE2_UNPACK_16_UNALIGNED
689 /* Here we do some unaligned reads and duplicate conversions, but
690 * at least we have all the pixels */
694 p_u -= i_rewind >> 1;
695 p_v -= i_rewind >> 1;
696 p_buffer -= i_rewind;
699 SSE2_INIT_16_UNALIGNED
702 SSE2_UNPACK_16_UNALIGNED
709 SCALE_HEIGHT( 420, 2 );
711 p_y += i_source_margin;
714 p_u += i_source_margin_c;
715 p_v += i_source_margin_c;
717 p_buffer = b_hscale ? p_buffer_start : p_pic;
721 /* make sure all SSE2 stores are visible thereafter */
724 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
726 if( p_vout->render.i_width & 7 )
728 i_rewind = 8 - ( p_vout->render.i_width & 7 );
735 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
738 p_buffer = b_hscale ? p_buffer_start : p_pic;
740 for ( i_x = p_vout->render.i_width / 8; i_x--; )
754 /* Here we do some unaligned reads and duplicate conversions, but
755 * at least we have all the pixels */
759 p_u -= i_rewind >> 1;
760 p_v -= i_rewind >> 1;
761 p_buffer -= i_rewind;
775 SCALE_HEIGHT( 420, 2 );
777 p_y += i_source_margin;
780 p_u += i_source_margin_c;
781 p_v += i_source_margin_c;
784 /* re-enable FPU registers */
792 /*****************************************************************************
793 * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
794 *****************************************************************************
795 * Horizontal alignment needed:
796 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
797 * - output: 1 pixel (2 bytes), margins allowed
798 * Vertical alignment needed:
799 * - input: 2 lines (2 Y lines, 1 U/V line)
801 *****************************************************************************/
803 #if defined (MODULE_NAME_IS_i420_rgb)
805 void E_(I420_RGB32)( vout_thread_t *p_vout, picture_t *p_src,
808 /* We got this one from the old arguments */
809 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
810 uint8_t *p_y = p_src->Y_PIXELS;
811 uint8_t *p_u = p_src->U_PIXELS;
812 uint8_t *p_v = p_src->V_PIXELS;
814 vlc_bool_t b_hscale; /* horizontal scaling type */
815 unsigned int i_vscale; /* vertical scaling type */
816 unsigned int i_x, i_y; /* horizontal and vertical indexes */
820 int i_scale_count; /* scale modulo counter */
821 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
822 uint32_t * p_pic_start; /* beginning of the current line for copy */
823 int i_uval, i_vval; /* U and V samples */
824 int i_red, i_green, i_blue; /* U and V modified samples */
825 uint32_t * p_yuv = p_vout->chroma.p_sys->p_rgb32;
826 uint32_t * p_ybase; /* Y dependant conversion table */
828 /* Conversion buffer pointer */
829 uint32_t * p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
832 /* Offset array pointer */
833 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
836 const int i_source_margin = p_src->p[0].i_pitch
837 - p_src->p[0].i_visible_pitch;
838 const int i_source_margin_c = p_src->p[1].i_pitch
839 - p_src->p[1].i_visible_pitch;
841 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
843 if( p_vout->render.i_width & 7 )
845 i_rewind = 8 - ( p_vout->render.i_width & 7 );
852 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
853 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
854 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
855 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
856 p_vout->output.i_width, p_vout->output.i_height,
857 &b_hscale, &i_vscale, p_offset_start );
862 i_scale_count = ( i_vscale == 1 ) ?
863 p_vout->output.i_height : p_vout->render.i_height;
864 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
867 p_buffer = b_hscale ? p_buffer_start : p_pic;
869 for ( i_x = p_vout->render.i_width / 8; i_x--; )
871 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
872 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
873 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
874 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
877 /* Here we do some unaligned reads and duplicate conversions, but
878 * at least we have all the pixels */
882 p_u -= i_rewind >> 1;
883 p_v -= i_rewind >> 1;
884 p_buffer -= i_rewind;
885 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
886 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
887 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
888 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
891 SCALE_HEIGHT( 420, 4 );
893 p_y += i_source_margin;
896 p_u += i_source_margin_c;
897 p_v += i_source_margin_c;
902 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
904 void E_(I420_A8R8G8B8)( vout_thread_t *p_vout, picture_t *p_src,
907 /* We got this one from the old arguments */
908 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
909 uint8_t *p_y = p_src->Y_PIXELS;
910 uint8_t *p_u = p_src->U_PIXELS;
911 uint8_t *p_v = p_src->V_PIXELS;
913 vlc_bool_t b_hscale; /* horizontal scaling type */
914 unsigned int i_vscale; /* vertical scaling type */
915 unsigned int i_x, i_y; /* horizontal and vertical indexes */
919 int i_scale_count; /* scale modulo counter */
920 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
921 uint32_t * p_pic_start; /* beginning of the current line for copy */
922 /* Conversion buffer pointer */
923 uint32_t * p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
926 /* Offset array pointer */
927 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
930 const int i_source_margin = p_src->p[0].i_pitch
931 - p_src->p[0].i_visible_pitch;
932 const int i_source_margin_c = p_src->p[1].i_pitch
933 - p_src->p[1].i_visible_pitch;
935 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
937 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
938 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
939 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
940 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
941 p_vout->output.i_width, p_vout->output.i_height,
942 &b_hscale, &i_vscale, p_offset_start );
947 i_scale_count = ( i_vscale == 1 ) ?
948 p_vout->output.i_height : p_vout->render.i_height;
950 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
952 if( p_vout->render.i_width & 15 )
954 i_rewind = 16 - ( p_vout->render.i_width & 15 );
962 ** SSE2 128 bits fetch/store instructions are faster
963 ** if memory access is 16 bytes aligned
966 p_buffer = b_hscale ? p_buffer_start : p_pic;
967 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
972 /* use faster SSE2 aligned fetch and store */
973 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
977 for ( i_x = p_vout->render.i_width / 16; i_x--; )
983 SSE2_UNPACK_32_ARGB_ALIGNED
991 /* Here we do some unaligned reads and duplicate conversions, but
992 * at least we have all the pixels */
996 p_u -= i_rewind >> 1;
997 p_v -= i_rewind >> 1;
998 p_buffer -= i_rewind;
1000 SSE2_INIT_32_UNALIGNED
1003 SSE2_UNPACK_32_ARGB_UNALIGNED
1010 SCALE_HEIGHT( 420, 4 );
1012 p_y += i_source_margin;
1015 p_u += i_source_margin_c;
1016 p_v += i_source_margin_c;
1018 p_buffer = b_hscale ? p_buffer_start : p_pic;
1023 /* use slower SSE2 unaligned fetch and store */
1024 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1026 p_pic_start = p_pic;
1027 p_buffer = b_hscale ? p_buffer_start : p_pic;
1029 for ( i_x = p_vout->render.i_width / 16; i_x--; )
1032 SSE2_INIT_32_UNALIGNED
1035 SSE2_UNPACK_32_ARGB_UNALIGNED
1043 /* Here we do some unaligned reads and duplicate conversions, but
1044 * at least we have all the pixels */
1048 p_u -= i_rewind >> 1;
1049 p_v -= i_rewind >> 1;
1050 p_buffer -= i_rewind;
1052 SSE2_INIT_32_UNALIGNED
1055 SSE2_UNPACK_32_ARGB_UNALIGNED
1062 SCALE_HEIGHT( 420, 4 );
1064 p_y += i_source_margin;
1067 p_u += i_source_margin_c;
1068 p_v += i_source_margin_c;
1070 p_buffer = b_hscale ? p_buffer_start : p_pic;
1074 /* make sure all SSE2 stores are visible thereafter */
1077 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1079 if( p_vout->render.i_width & 7 )
1081 i_rewind = 8 - ( p_vout->render.i_width & 7 );
1088 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1090 p_pic_start = p_pic;
1091 p_buffer = b_hscale ? p_buffer_start : p_pic;
1093 for ( i_x = p_vout->render.i_width / 8; i_x--; )
1107 /* Here we do some unaligned reads and duplicate conversions, but
1108 * at least we have all the pixels */
1112 p_u -= i_rewind >> 1;
1113 p_v -= i_rewind >> 1;
1114 p_buffer -= i_rewind;
1127 SCALE_HEIGHT( 420, 4 );
1129 p_y += i_source_margin;
1132 p_u += i_source_margin_c;
1133 p_v += i_source_margin_c;
1137 /* re-enable FPU registers */
1143 void E_(I420_B8G8R8A8)( vout_thread_t *p_vout, picture_t *p_src,
1146 /* We got this one from the old arguments */
1147 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1148 uint8_t *p_y = p_src->Y_PIXELS;
1149 uint8_t *p_u = p_src->U_PIXELS;
1150 uint8_t *p_v = p_src->V_PIXELS;
1152 vlc_bool_t b_hscale; /* horizontal scaling type */
1153 unsigned int i_vscale; /* vertical scaling type */
1154 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1158 int i_scale_count; /* scale modulo counter */
1159 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
1160 uint32_t * p_pic_start; /* beginning of the current line for copy */
1161 /* Conversion buffer pointer */
1162 uint32_t * p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
1163 uint32_t * p_buffer;
1165 /* Offset array pointer */
1166 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
1169 const int i_source_margin = p_src->p[0].i_pitch
1170 - p_src->p[0].i_visible_pitch;
1171 const int i_source_margin_c = p_src->p[1].i_pitch
1172 - p_src->p[1].i_visible_pitch;
1174 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1176 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1177 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1178 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1179 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
1180 p_vout->output.i_width, p_vout->output.i_height,
1181 &b_hscale, &i_vscale, p_offset_start );
1184 * Perform conversion
1186 i_scale_count = ( i_vscale == 1 ) ?
1187 p_vout->output.i_height : p_vout->render.i_height;
1189 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1191 if( p_vout->render.i_width & 15 )
1193 i_rewind = 16 - ( p_vout->render.i_width & 15 );
1201 ** SSE2 128 bits fetch/store instructions are faster
1202 ** if memory access is 16 bytes aligned
1205 p_buffer = b_hscale ? p_buffer_start : p_pic;
1206 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1211 /* use faster SSE2 aligned fetch and store */
1212 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1214 p_pic_start = p_pic;
1216 for ( i_x = p_vout->render.i_width / 16; i_x--; )
1219 SSE2_INIT_32_ALIGNED
1222 SSE2_UNPACK_32_BGRA_ALIGNED
1230 /* Here we do some unaligned reads and duplicate conversions, but
1231 * at least we have all the pixels */
1235 p_u -= i_rewind >> 1;
1236 p_v -= i_rewind >> 1;
1237 p_buffer -= i_rewind;
1239 SSE2_INIT_32_UNALIGNED
1242 SSE2_UNPACK_32_BGRA_UNALIGNED
1249 SCALE_HEIGHT( 420, 4 );
1251 p_y += i_source_margin;
1254 p_u += i_source_margin_c;
1255 p_v += i_source_margin_c;
1257 p_buffer = b_hscale ? p_buffer_start : p_pic;
1262 /* use slower SSE2 unaligned fetch and store */
1263 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1265 p_pic_start = p_pic;
1266 p_buffer = b_hscale ? p_buffer_start : p_pic;
1268 for ( i_x = p_vout->render.i_width / 16; i_x--; )
1271 SSE2_INIT_32_UNALIGNED
1274 SSE2_UNPACK_32_BGRA_UNALIGNED
1282 /* Here we do some unaligned reads and duplicate conversions, but
1283 * at least we have all the pixels */
1287 p_u -= i_rewind >> 1;
1288 p_v -= i_rewind >> 1;
1289 p_buffer -= i_rewind;
1291 SSE2_INIT_32_UNALIGNED
1294 SSE2_UNPACK_32_BGRA_UNALIGNED
1301 SCALE_HEIGHT( 420, 4 );
1303 p_y += i_source_margin;
1306 p_u += i_source_margin_c;
1307 p_v += i_source_margin_c;
1309 p_buffer = b_hscale ? p_buffer_start : p_pic;
1315 if( p_vout->render.i_width & 7 )
1317 i_rewind = 8 - ( p_vout->render.i_width & 7 );
1324 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1326 p_pic_start = p_pic;
1327 p_buffer = b_hscale ? p_buffer_start : p_pic;
1329 for ( i_x = p_vout->render.i_width / 8; i_x--; )
1343 /* Here we do some unaligned reads and duplicate conversions, but
1344 * at least we have all the pixels */
1348 p_u -= i_rewind >> 1;
1349 p_v -= i_rewind >> 1;
1350 p_buffer -= i_rewind;
1363 SCALE_HEIGHT( 420, 4 );
1365 p_y += i_source_margin;
1368 p_u += i_source_margin_c;
1369 p_v += i_source_margin_c;
1373 /* re-enable FPU registers */
1379 void E_(I420_A8B8G8R8)( vout_thread_t *p_vout, picture_t *p_src,
1382 /* We got this one from the old arguments */
1383 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1384 uint8_t *p_y = p_src->Y_PIXELS;
1385 uint8_t *p_u = p_src->U_PIXELS;
1386 uint8_t *p_v = p_src->V_PIXELS;
1388 vlc_bool_t b_hscale; /* horizontal scaling type */
1389 unsigned int i_vscale; /* vertical scaling type */
1390 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1394 int i_scale_count; /* scale modulo counter */
1395 int i_chroma_width = p_vout->render.i_width / 2; /* chroma width */
1396 uint32_t * p_pic_start; /* beginning of the current line for copy */
1397 /* Conversion buffer pointer */
1398 uint32_t * p_buffer_start = (uint32_t*)p_vout->chroma.p_sys->p_buffer;
1399 uint32_t * p_buffer;
1401 /* Offset array pointer */
1402 int * p_offset_start = p_vout->chroma.p_sys->p_offset;
1405 const int i_source_margin = p_src->p[0].i_pitch
1406 - p_src->p[0].i_visible_pitch;
1407 const int i_source_margin_c = p_src->p[1].i_pitch
1408 - p_src->p[1].i_visible_pitch;
1410 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1412 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1413 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1414 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1415 SetOffset( p_vout->render.i_width, p_vout->render.i_height,
1416 p_vout->output.i_width, p_vout->output.i_height,
1417 &b_hscale, &i_vscale, p_offset_start );
1420 * Perform conversion
1422 i_scale_count = ( i_vscale == 1 ) ?
1423 p_vout->output.i_height : p_vout->render.i_height;
1425 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1427 if( p_vout->render.i_width & 15 )
1429 i_rewind = 16 - ( p_vout->render.i_width & 15 );
1437 ** SSE2 128 bits fetch/store instructions are faster
1438 ** if memory access is 16 bytes aligned
1441 p_buffer = b_hscale ? p_buffer_start : p_pic;
1442 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1447 /* use faster SSE2 aligned fetch and store */
1448 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1450 p_pic_start = p_pic;
1452 for ( i_x = p_vout->render.i_width / 16; i_x--; )
1455 SSE2_INIT_32_ALIGNED
1458 SSE2_UNPACK_32_ABGR_ALIGNED
1466 /* Here we do some unaligned reads and duplicate conversions, but
1467 * at least we have all the pixels */
1471 p_u -= i_rewind >> 1;
1472 p_v -= i_rewind >> 1;
1473 p_buffer -= i_rewind;
1475 SSE2_INIT_32_UNALIGNED
1478 SSE2_UNPACK_32_ABGR_UNALIGNED
1485 SCALE_HEIGHT( 420, 4 );
1487 p_y += i_source_margin;
1490 p_u += i_source_margin_c;
1491 p_v += i_source_margin_c;
1493 p_buffer = b_hscale ? p_buffer_start : p_pic;
1498 /* use slower SSE2 unaligned fetch and store */
1499 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1501 p_pic_start = p_pic;
1502 p_buffer = b_hscale ? p_buffer_start : p_pic;
1504 for ( i_x = p_vout->render.i_width / 16; i_x--; )
1507 SSE2_INIT_32_UNALIGNED
1510 SSE2_UNPACK_32_ABGR_UNALIGNED
1518 /* Here we do some unaligned reads and duplicate conversions, but
1519 * at least we have all the pixels */
1523 p_u -= i_rewind >> 1;
1524 p_v -= i_rewind >> 1;
1525 p_buffer -= i_rewind;
1527 SSE2_INIT_32_UNALIGNED
1530 SSE2_UNPACK_32_ABGR_UNALIGNED
1537 SCALE_HEIGHT( 420, 4 );
1539 p_y += i_source_margin;
1542 p_u += i_source_margin_c;
1543 p_v += i_source_margin_c;
1545 p_buffer = b_hscale ? p_buffer_start : p_pic;
1551 if( p_vout->render.i_width & 7 )
1553 i_rewind = 8 - ( p_vout->render.i_width & 7 );
1560 for( i_y = 0; i_y < p_vout->render.i_height; i_y++ )
1562 p_pic_start = p_pic;
1563 p_buffer = b_hscale ? p_buffer_start : p_pic;
1565 for ( i_x = p_vout->render.i_width / 8; i_x--; )
1579 /* Here we do some unaligned reads and duplicate conversions, but
1580 * at least we have all the pixels */
1584 p_u -= i_rewind >> 1;
1585 p_v -= i_rewind >> 1;
1586 p_buffer -= i_rewind;
1599 SCALE_HEIGHT( 420, 4 );
1601 p_y += i_source_margin;
1604 p_u += i_source_margin_c;
1605 p_v += i_source_margin_c;
1609 /* re-enable FPU registers */
1617 /* Following functions are local */
1619 /*****************************************************************************
1620 * SetOffset: build offset array for conversion functions
1621 *****************************************************************************
1622 * This function will build an offset array used in later conversion functions.
1623 * It will also set horizontal and vertical scaling indicators.
1624 *****************************************************************************/
1625 static void SetOffset( int i_width, int i_height, int i_pic_width,
1626 int i_pic_height, vlc_bool_t *pb_hscale,
1627 unsigned int *pi_vscale, int *p_offset )
1629 int i_x; /* x position in destination */
1630 int i_scale_count; /* modulo counter */
1633 * Prepare horizontal offset array
1635 if( i_pic_width - i_width == 0 )
1637 /* No horizontal scaling: YUV conversion is done directly to picture */
1640 else if( i_pic_width - i_width > 0 )
1642 /* Prepare scaling array for horizontal extension */
1644 i_scale_count = i_pic_width;
1645 for( i_x = i_width; i_x--; )
1647 while( (i_scale_count -= i_width) > 0 )
1652 i_scale_count += i_pic_width;
1655 else /* if( i_pic_width - i_width < 0 ) */
1657 /* Prepare scaling array for horizontal reduction */
1659 i_scale_count = i_width;
1660 for( i_x = i_pic_width; i_x--; )
1663 while( (i_scale_count -= i_pic_width) > 0 )
1668 i_scale_count += i_width;
1673 * Set vertical scaling indicator
1675 if( i_pic_height - i_height == 0 )
1679 else if( i_pic_height - i_height > 0 )
1683 else /* if( i_pic_height - i_height < 0 ) */