1 /*****************************************************************************
2 * i420_rgb16_x86.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 VLC authors and VideoLAN
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
29 #include <vlc_common.h>
30 #include <vlc_filter.h>
35 # include "i420_rgb_sse2.h"
36 # define VLC_TARGET VLC_SSE
38 # include "i420_rgb_mmx.h"
39 # define VLC_TARGET VLC_MMX
42 /*****************************************************************************
43 * SetOffset: build offset array for conversion functions
44 *****************************************************************************
45 * This function will build an offset array used in later conversion functions.
46 * It will also set horizontal and vertical scaling indicators.
47 *****************************************************************************/
48 static void SetOffset( int i_width, int i_height, int i_pic_width,
49 int i_pic_height, bool *pb_hscale,
50 unsigned int *pi_vscale, int *p_offset )
53 * Prepare horizontal offset array
55 if( i_pic_width - i_width == 0 )
56 { /* No horizontal scaling: YUV conversion is done directly to picture */
59 else if( i_pic_width - i_width > 0 )
60 { /* Prepare scaling array for horizontal extension */
61 int i_scale_count = i_pic_width;
64 for( int i_x = i_width; i_x--; )
66 while( (i_scale_count -= i_width) > 0 )
71 i_scale_count += i_pic_width;
74 else /* if( i_pic_width - i_width < 0 ) */
75 { /* Prepare scaling array for horizontal reduction */
76 int i_scale_count = i_pic_width;
79 for( int i_x = i_pic_width; i_x--; )
82 while( (i_scale_count -= i_pic_width) > 0 )
87 i_scale_count += i_width;
92 * Set vertical scaling indicator
94 if( i_pic_height - i_height == 0 )
96 else if( i_pic_height - i_height > 0 )
98 else /* if( i_pic_height - i_height < 0 ) */
103 void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
105 /* We got this one from the old arguments */
106 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
107 uint8_t *p_y = p_src->Y_PIXELS;
108 uint8_t *p_u = p_src->U_PIXELS;
109 uint8_t *p_v = p_src->V_PIXELS;
111 bool b_hscale; /* horizontal scaling type */
112 unsigned int i_vscale; /* vertical scaling type */
113 unsigned int i_x, i_y; /* horizontal and vertical indexes */
117 int i_scale_count; /* scale modulo counter */
118 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
119 uint16_t * p_pic_start; /* beginning of the current line for copy */
121 /* Conversion buffer pointer */
122 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
125 /* Offset array pointer */
126 int * p_offset_start = p_filter->p_sys->p_offset;
129 const int i_source_margin = p_src->p[0].i_pitch
130 - p_src->p[0].i_visible_pitch;
131 const int i_source_margin_c = p_src->p[1].i_pitch
132 - p_src->p[1].i_visible_pitch;
134 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
136 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
137 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
138 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
139 SetOffset( p_filter->fmt_in.video.i_width,
140 p_filter->fmt_in.video.i_height,
141 p_filter->fmt_out.video.i_width,
142 p_filter->fmt_out.video.i_height,
143 &b_hscale, &i_vscale, p_offset_start );
149 i_scale_count = ( i_vscale == 1 ) ?
150 p_filter->fmt_out.video.i_height :
151 p_filter->fmt_in.video.i_height;
155 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
158 ** SSE2 128 bits fetch/store instructions are faster
159 ** if memory access is 16 bytes aligned
162 p_buffer = b_hscale ? p_buffer_start : p_pic;
163 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
166 ((intptr_t)p_buffer))) )
168 /* use faster SSE2 aligned fetch and store */
169 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
173 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
179 SSE2_UNPACK_15_ALIGNED
186 /* Here we do some unaligned reads and duplicate conversions, but
187 * at least we have all the pixels */
191 p_u -= i_rewind >> 1;
192 p_v -= i_rewind >> 1;
193 p_buffer -= i_rewind;
196 SSE2_INIT_16_UNALIGNED
199 SSE2_UNPACK_15_UNALIGNED
206 SCALE_HEIGHT( 420, 2 );
208 p_y += i_source_margin;
211 p_u += i_source_margin_c;
212 p_v += i_source_margin_c;
214 p_buffer = b_hscale ? p_buffer_start : p_pic;
219 /* use slower SSE2 unaligned fetch and store */
220 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
223 p_buffer = b_hscale ? p_buffer_start : p_pic;
225 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
228 SSE2_INIT_16_UNALIGNED
231 SSE2_UNPACK_15_UNALIGNED
238 /* Here we do some unaligned reads and duplicate conversions, but
239 * at least we have all the pixels */
243 p_u -= i_rewind >> 1;
244 p_v -= i_rewind >> 1;
245 p_buffer -= i_rewind;
248 SSE2_INIT_16_UNALIGNED
251 SSE2_UNPACK_15_UNALIGNED
258 SCALE_HEIGHT( 420, 2 );
260 p_y += i_source_margin;
263 p_u += i_source_margin_c;
264 p_v += i_source_margin_c;
266 p_buffer = b_hscale ? p_buffer_start : p_pic;
270 /* make sure all SSE2 stores are visible thereafter */
275 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
277 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
280 p_buffer = b_hscale ? p_buffer_start : p_pic;
282 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
296 /* Here we do some unaligned reads and duplicate conversions, but
297 * at least we have all the pixels */
301 p_u -= i_rewind >> 1;
302 p_v -= i_rewind >> 1;
303 p_buffer -= i_rewind;
317 SCALE_HEIGHT( 420, 2 );
319 p_y += i_source_margin;
322 p_u += i_source_margin_c;
323 p_v += i_source_margin_c;
326 /* re-enable FPU registers */
333 void I420_R5G6B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
335 /* We got this one from the old arguments */
336 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
337 uint8_t *p_y = p_src->Y_PIXELS;
338 uint8_t *p_u = p_src->U_PIXELS;
339 uint8_t *p_v = p_src->V_PIXELS;
341 bool b_hscale; /* horizontal scaling type */
342 unsigned int i_vscale; /* vertical scaling type */
343 unsigned int i_x, i_y; /* horizontal and vertical indexes */
347 int i_scale_count; /* scale modulo counter */
348 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
349 uint16_t * p_pic_start; /* beginning of the current line for copy */
351 /* Conversion buffer pointer */
352 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
355 /* Offset array pointer */
356 int * p_offset_start = p_filter->p_sys->p_offset;
359 const int i_source_margin = p_src->p[0].i_pitch
360 - p_src->p[0].i_visible_pitch;
361 const int i_source_margin_c = p_src->p[1].i_pitch
362 - p_src->p[1].i_visible_pitch;
364 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
366 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
367 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
368 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
369 SetOffset( p_filter->fmt_in.video.i_width,
370 p_filter->fmt_in.video.i_height,
371 p_filter->fmt_out.video.i_width,
372 p_filter->fmt_out.video.i_height,
373 &b_hscale, &i_vscale, p_offset_start );
379 i_scale_count = ( i_vscale == 1 ) ?
380 p_filter->fmt_out.video.i_height :
381 p_filter->fmt_in.video.i_height;
385 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
388 ** SSE2 128 bits fetch/store instructions are faster
389 ** if memory access is 16 bytes aligned
392 p_buffer = b_hscale ? p_buffer_start : p_pic;
393 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
396 ((intptr_t)p_buffer))) )
398 /* use faster SSE2 aligned fetch and store */
399 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
403 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
409 SSE2_UNPACK_16_ALIGNED
416 /* Here we do some unaligned reads and duplicate conversions, but
417 * at least we have all the pixels */
421 p_u -= i_rewind >> 1;
422 p_v -= i_rewind >> 1;
423 p_buffer -= i_rewind;
426 SSE2_INIT_16_UNALIGNED
429 SSE2_UNPACK_16_UNALIGNED
436 SCALE_HEIGHT( 420, 2 );
438 p_y += i_source_margin;
441 p_u += i_source_margin_c;
442 p_v += i_source_margin_c;
444 p_buffer = b_hscale ? p_buffer_start : p_pic;
449 /* use slower SSE2 unaligned fetch and store */
450 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
453 p_buffer = b_hscale ? p_buffer_start : p_pic;
455 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
458 SSE2_INIT_16_UNALIGNED
461 SSE2_UNPACK_16_UNALIGNED
468 /* Here we do some unaligned reads and duplicate conversions, but
469 * at least we have all the pixels */
473 p_u -= i_rewind >> 1;
474 p_v -= i_rewind >> 1;
475 p_buffer -= i_rewind;
478 SSE2_INIT_16_UNALIGNED
481 SSE2_UNPACK_16_UNALIGNED
488 SCALE_HEIGHT( 420, 2 );
490 p_y += i_source_margin;
493 p_u += i_source_margin_c;
494 p_v += i_source_margin_c;
496 p_buffer = b_hscale ? p_buffer_start : p_pic;
500 /* make sure all SSE2 stores are visible thereafter */
505 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
507 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
510 p_buffer = b_hscale ? p_buffer_start : p_pic;
512 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
526 /* Here we do some unaligned reads and duplicate conversions, but
527 * at least we have all the pixels */
531 p_u -= i_rewind >> 1;
532 p_v -= i_rewind >> 1;
533 p_buffer -= i_rewind;
547 SCALE_HEIGHT( 420, 2 );
549 p_y += i_source_margin;
552 p_u += i_source_margin_c;
553 p_v += i_source_margin_c;
556 /* re-enable FPU registers */
563 void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
566 /* We got this one from the old arguments */
567 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
568 uint8_t *p_y = p_src->Y_PIXELS;
569 uint8_t *p_u = p_src->U_PIXELS;
570 uint8_t *p_v = p_src->V_PIXELS;
572 bool b_hscale; /* horizontal scaling type */
573 unsigned int i_vscale; /* vertical scaling type */
574 unsigned int i_x, i_y; /* horizontal and vertical indexes */
578 int i_scale_count; /* scale modulo counter */
579 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
580 uint32_t * p_pic_start; /* beginning of the current line for copy */
581 /* Conversion buffer pointer */
582 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
585 /* Offset array pointer */
586 int * p_offset_start = p_filter->p_sys->p_offset;
589 const int i_source_margin = p_src->p[0].i_pitch
590 - p_src->p[0].i_visible_pitch;
591 const int i_source_margin_c = p_src->p[1].i_pitch
592 - p_src->p[1].i_visible_pitch;
594 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
596 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
597 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
598 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
599 SetOffset( p_filter->fmt_in.video.i_width,
600 p_filter->fmt_in.video.i_height,
601 p_filter->fmt_out.video.i_width,
602 p_filter->fmt_out.video.i_height,
603 &b_hscale, &i_vscale, p_offset_start );
608 i_scale_count = ( i_vscale == 1 ) ?
609 p_filter->fmt_out.video.i_height :
610 p_filter->fmt_in.video.i_height;
614 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
617 ** SSE2 128 bits fetch/store instructions are faster
618 ** if memory access is 16 bytes aligned
621 p_buffer = b_hscale ? p_buffer_start : p_pic;
622 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
625 ((intptr_t)p_buffer))) )
627 /* use faster SSE2 aligned fetch and store */
628 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
632 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
638 SSE2_UNPACK_32_ARGB_ALIGNED
646 /* Here we do some unaligned reads and duplicate conversions, but
647 * at least we have all the pixels */
651 p_u -= i_rewind >> 1;
652 p_v -= i_rewind >> 1;
653 p_buffer -= i_rewind;
655 SSE2_INIT_32_UNALIGNED
658 SSE2_UNPACK_32_ARGB_UNALIGNED
665 SCALE_HEIGHT( 420, 4 );
667 p_y += i_source_margin;
670 p_u += i_source_margin_c;
671 p_v += i_source_margin_c;
673 p_buffer = b_hscale ? p_buffer_start : p_pic;
678 /* use slower SSE2 unaligned fetch and store */
679 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
682 p_buffer = b_hscale ? p_buffer_start : p_pic;
684 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
687 SSE2_INIT_32_UNALIGNED
690 SSE2_UNPACK_32_ARGB_UNALIGNED
698 /* Here we do some unaligned reads and duplicate conversions, but
699 * at least we have all the pixels */
703 p_u -= i_rewind >> 1;
704 p_v -= i_rewind >> 1;
705 p_buffer -= i_rewind;
707 SSE2_INIT_32_UNALIGNED
710 SSE2_UNPACK_32_ARGB_UNALIGNED
717 SCALE_HEIGHT( 420, 4 );
719 p_y += i_source_margin;
722 p_u += i_source_margin_c;
723 p_v += i_source_margin_c;
725 p_buffer = b_hscale ? p_buffer_start : p_pic;
729 /* make sure all SSE2 stores are visible thereafter */
734 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
736 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
739 p_buffer = b_hscale ? p_buffer_start : p_pic;
741 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
755 /* Here we do some unaligned reads and duplicate conversions, but
756 * at least we have all the pixels */
760 p_u -= i_rewind >> 1;
761 p_v -= i_rewind >> 1;
762 p_buffer -= i_rewind;
775 SCALE_HEIGHT( 420, 4 );
777 p_y += i_source_margin;
780 p_u += i_source_margin_c;
781 p_v += i_source_margin_c;
785 /* re-enable FPU registers */
792 void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
794 /* We got this one from the old arguments */
795 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
796 uint8_t *p_y = p_src->Y_PIXELS;
797 uint8_t *p_u = p_src->U_PIXELS;
798 uint8_t *p_v = p_src->V_PIXELS;
800 bool b_hscale; /* horizontal scaling type */
801 unsigned int i_vscale; /* vertical scaling type */
802 unsigned int i_x, i_y; /* horizontal and vertical indexes */
806 int i_scale_count; /* scale modulo counter */
807 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
808 uint32_t * p_pic_start; /* beginning of the current line for copy */
809 /* Conversion buffer pointer */
810 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
813 /* Offset array pointer */
814 int * p_offset_start = p_filter->p_sys->p_offset;
817 const int i_source_margin = p_src->p[0].i_pitch
818 - p_src->p[0].i_visible_pitch;
819 const int i_source_margin_c = p_src->p[1].i_pitch
820 - p_src->p[1].i_visible_pitch;
822 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
824 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
825 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
826 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
827 SetOffset( p_filter->fmt_in.video.i_width,
828 p_filter->fmt_in.video.i_height,
829 p_filter->fmt_out.video.i_width,
830 p_filter->fmt_out.video.i_height,
831 &b_hscale, &i_vscale, p_offset_start );
836 i_scale_count = ( i_vscale == 1 ) ?
837 p_filter->fmt_out.video.i_height :
838 p_filter->fmt_in.video.i_height;
842 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
845 ** SSE2 128 bits fetch/store instructions are faster
846 ** if memory access is 16 bytes aligned
849 p_buffer = b_hscale ? p_buffer_start : p_pic;
850 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
853 ((intptr_t)p_buffer))) )
855 /* use faster SSE2 aligned fetch and store */
856 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
860 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
866 SSE2_UNPACK_32_RGBA_ALIGNED
874 /* Here we do some unaligned reads and duplicate conversions, but
875 * at least we have all the pixels */
879 p_u -= i_rewind >> 1;
880 p_v -= i_rewind >> 1;
881 p_buffer -= i_rewind;
883 SSE2_INIT_32_UNALIGNED
886 SSE2_UNPACK_32_RGBA_UNALIGNED
893 SCALE_HEIGHT( 420, 4 );
895 p_y += i_source_margin;
898 p_u += i_source_margin_c;
899 p_v += i_source_margin_c;
901 p_buffer = b_hscale ? p_buffer_start : p_pic;
906 /* use slower SSE2 unaligned fetch and store */
907 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
910 p_buffer = b_hscale ? p_buffer_start : p_pic;
912 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
915 SSE2_INIT_32_UNALIGNED
918 SSE2_UNPACK_32_RGBA_UNALIGNED
926 /* Here we do some unaligned reads and duplicate conversions, but
927 * at least we have all the pixels */
931 p_u -= i_rewind >> 1;
932 p_v -= i_rewind >> 1;
933 p_buffer -= i_rewind;
935 SSE2_INIT_32_UNALIGNED
938 SSE2_UNPACK_32_RGBA_UNALIGNED
945 SCALE_HEIGHT( 420, 4 );
947 p_y += i_source_margin;
950 p_u += i_source_margin_c;
951 p_v += i_source_margin_c;
953 p_buffer = b_hscale ? p_buffer_start : p_pic;
957 /* make sure all SSE2 stores are visible thereafter */
962 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
964 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
967 p_buffer = b_hscale ? p_buffer_start : p_pic;
969 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
983 /* Here we do some unaligned reads and duplicate conversions, but
984 * at least we have all the pixels */
988 p_u -= i_rewind >> 1;
989 p_v -= i_rewind >> 1;
990 p_buffer -= i_rewind;
1003 SCALE_HEIGHT( 420, 4 );
1005 p_y += i_source_margin;
1008 p_u += i_source_margin_c;
1009 p_v += i_source_margin_c;
1013 /* re-enable FPU registers */
1020 void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
1022 /* We got this one from the old arguments */
1023 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1024 uint8_t *p_y = p_src->Y_PIXELS;
1025 uint8_t *p_u = p_src->U_PIXELS;
1026 uint8_t *p_v = p_src->V_PIXELS;
1028 bool b_hscale; /* horizontal scaling type */
1029 unsigned int i_vscale; /* vertical scaling type */
1030 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1034 int i_scale_count; /* scale modulo counter */
1035 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1036 uint32_t * p_pic_start; /* beginning of the current line for copy */
1037 /* Conversion buffer pointer */
1038 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1039 uint32_t * p_buffer;
1041 /* Offset array pointer */
1042 int * p_offset_start = p_filter->p_sys->p_offset;
1045 const int i_source_margin = p_src->p[0].i_pitch
1046 - p_src->p[0].i_visible_pitch;
1047 const int i_source_margin_c = p_src->p[1].i_pitch
1048 - p_src->p[1].i_visible_pitch;
1050 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1052 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1053 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1054 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1055 SetOffset( p_filter->fmt_in.video.i_width,
1056 p_filter->fmt_in.video.i_height,
1057 p_filter->fmt_out.video.i_width,
1058 p_filter->fmt_out.video.i_height,
1059 &b_hscale, &i_vscale, p_offset_start );
1062 * Perform conversion
1064 i_scale_count = ( i_vscale == 1 ) ?
1065 p_filter->fmt_out.video.i_height :
1066 p_filter->fmt_in.video.i_height;
1070 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1073 ** SSE2 128 bits fetch/store instructions are faster
1074 ** if memory access is 16 bytes aligned
1077 p_buffer = b_hscale ? p_buffer_start : p_pic;
1078 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1081 ((intptr_t)p_buffer))) )
1083 /* use faster SSE2 aligned fetch and store */
1084 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1086 p_pic_start = p_pic;
1088 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1091 SSE2_INIT_32_ALIGNED
1094 SSE2_UNPACK_32_BGRA_ALIGNED
1102 /* Here we do some unaligned reads and duplicate conversions, but
1103 * at least we have all the pixels */
1107 p_u -= i_rewind >> 1;
1108 p_v -= i_rewind >> 1;
1109 p_buffer -= i_rewind;
1111 SSE2_INIT_32_UNALIGNED
1114 SSE2_UNPACK_32_BGRA_UNALIGNED
1121 SCALE_HEIGHT( 420, 4 );
1123 p_y += i_source_margin;
1126 p_u += i_source_margin_c;
1127 p_v += i_source_margin_c;
1129 p_buffer = b_hscale ? p_buffer_start : p_pic;
1134 /* use slower SSE2 unaligned fetch and store */
1135 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1137 p_pic_start = p_pic;
1138 p_buffer = b_hscale ? p_buffer_start : p_pic;
1140 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1143 SSE2_INIT_32_UNALIGNED
1146 SSE2_UNPACK_32_BGRA_UNALIGNED
1154 /* Here we do some unaligned reads and duplicate conversions, but
1155 * at least we have all the pixels */
1159 p_u -= i_rewind >> 1;
1160 p_v -= i_rewind >> 1;
1161 p_buffer -= i_rewind;
1163 SSE2_INIT_32_UNALIGNED
1166 SSE2_UNPACK_32_BGRA_UNALIGNED
1173 SCALE_HEIGHT( 420, 4 );
1175 p_y += i_source_margin;
1178 p_u += i_source_margin_c;
1179 p_v += i_source_margin_c;
1181 p_buffer = b_hscale ? p_buffer_start : p_pic;
1187 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1189 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1191 p_pic_start = p_pic;
1192 p_buffer = b_hscale ? p_buffer_start : p_pic;
1194 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1208 /* Here we do some unaligned reads and duplicate conversions, but
1209 * at least we have all the pixels */
1213 p_u -= i_rewind >> 1;
1214 p_v -= i_rewind >> 1;
1215 p_buffer -= i_rewind;
1228 SCALE_HEIGHT( 420, 4 );
1230 p_y += i_source_margin;
1233 p_u += i_source_margin_c;
1234 p_v += i_source_margin_c;
1238 /* re-enable FPU registers */
1245 void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
1247 /* We got this one from the old arguments */
1248 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1249 uint8_t *p_y = p_src->Y_PIXELS;
1250 uint8_t *p_u = p_src->U_PIXELS;
1251 uint8_t *p_v = p_src->V_PIXELS;
1253 bool b_hscale; /* horizontal scaling type */
1254 unsigned int i_vscale; /* vertical scaling type */
1255 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1259 int i_scale_count; /* scale modulo counter */
1260 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1261 uint32_t * p_pic_start; /* beginning of the current line for copy */
1262 /* Conversion buffer pointer */
1263 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1264 uint32_t * p_buffer;
1266 /* Offset array pointer */
1267 int * p_offset_start = p_filter->p_sys->p_offset;
1270 const int i_source_margin = p_src->p[0].i_pitch
1271 - p_src->p[0].i_visible_pitch;
1272 const int i_source_margin_c = p_src->p[1].i_pitch
1273 - p_src->p[1].i_visible_pitch;
1275 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1277 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1278 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1279 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1280 SetOffset( p_filter->fmt_in.video.i_width,
1281 p_filter->fmt_in.video.i_height,
1282 p_filter->fmt_out.video.i_width,
1283 p_filter->fmt_out.video.i_height,
1284 &b_hscale, &i_vscale, p_offset_start );
1287 * Perform conversion
1289 i_scale_count = ( i_vscale == 1 ) ?
1290 p_filter->fmt_out.video.i_height :
1291 p_filter->fmt_in.video.i_height;
1295 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1298 ** SSE2 128 bits fetch/store instructions are faster
1299 ** if memory access is 16 bytes aligned
1302 p_buffer = b_hscale ? p_buffer_start : p_pic;
1303 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1306 ((intptr_t)p_buffer))) )
1308 /* use faster SSE2 aligned fetch and store */
1309 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1311 p_pic_start = p_pic;
1313 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1316 SSE2_INIT_32_ALIGNED
1319 SSE2_UNPACK_32_ABGR_ALIGNED
1327 /* Here we do some unaligned reads and duplicate conversions, but
1328 * at least we have all the pixels */
1332 p_u -= i_rewind >> 1;
1333 p_v -= i_rewind >> 1;
1334 p_buffer -= i_rewind;
1336 SSE2_INIT_32_UNALIGNED
1339 SSE2_UNPACK_32_ABGR_UNALIGNED
1346 SCALE_HEIGHT( 420, 4 );
1348 p_y += i_source_margin;
1351 p_u += i_source_margin_c;
1352 p_v += i_source_margin_c;
1354 p_buffer = b_hscale ? p_buffer_start : p_pic;
1359 /* use slower SSE2 unaligned fetch and store */
1360 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1362 p_pic_start = p_pic;
1363 p_buffer = b_hscale ? p_buffer_start : p_pic;
1365 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1368 SSE2_INIT_32_UNALIGNED
1371 SSE2_UNPACK_32_ABGR_UNALIGNED
1379 /* Here we do some unaligned reads and duplicate conversions, but
1380 * at least we have all the pixels */
1384 p_u -= i_rewind >> 1;
1385 p_v -= i_rewind >> 1;
1386 p_buffer -= i_rewind;
1388 SSE2_INIT_32_UNALIGNED
1391 SSE2_UNPACK_32_ABGR_UNALIGNED
1398 SCALE_HEIGHT( 420, 4 );
1400 p_y += i_source_margin;
1403 p_u += i_source_margin_c;
1404 p_v += i_source_margin_c;
1406 p_buffer = b_hscale ? p_buffer_start : p_pic;
1412 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1414 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1416 p_pic_start = p_pic;
1417 p_buffer = b_hscale ? p_buffer_start : p_pic;
1419 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1433 /* Here we do some unaligned reads and duplicate conversions, but
1434 * at least we have all the pixels */
1438 p_u -= i_rewind >> 1;
1439 p_v -= i_rewind >> 1;
1440 p_buffer -= i_rewind;
1453 SCALE_HEIGHT( 420, 4 );
1455 p_y += i_source_margin;
1458 p_u += i_source_margin_c;
1459 p_v += i_source_margin_c;
1463 /* re-enable FPU registers */