1 /*****************************************************************************
2 * i422_yuy2.h : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2002 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 #ifdef MODULE_NAME_IS_i422_yuy2_mmx
27 #if defined(CAN_COMPILE_MMX)
31 #define MMX_CALL(MMX_INSTRUCTIONS) \
33 __asm__ __volatile__( \
37 : "r" (p_line), "r" (p_y), \
38 "r" (p_u), "r" (p_v) ); \
39 p_line += 16; p_y += 8; \
43 #define MMX_END __asm__ __volatile__ ( "emms" )
45 #define MMX_YUV422_YUYV " \n\
46 movq (%1), %%mm0 # Load 8 Y y7 y6 y5 y4 y3 y2 y1 y0 \n\
47 movd (%2), %%mm1 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
48 movd (%3), %%mm2 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
49 punpcklbw %%mm2, %%mm1 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
50 movq %%mm0, %%mm2 # y7 y6 y5 y4 y3 y2 y1 y0 \n\
51 punpcklbw %%mm1, %%mm2 # v1 y3 u1 y2 v0 y1 u0 y0 \n\
52 movq %%mm2, (%0) # Store low YUYV \n\
53 punpckhbw %%mm1, %%mm0 # v3 y7 u3 y6 v2 y5 u2 y4 \n\
54 movq %%mm0, 8(%0) # Store high YUYV \n\
57 #define MMX_YUV422_YVYU " \n\
58 movq (%1), %%mm0 # Load 8 Y y7 y6 y5 y4 y3 y2 y1 y0 \n\
59 movd (%2), %%mm2 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
60 movd (%3), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
61 punpcklbw %%mm2, %%mm1 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
62 movq %%mm0, %%mm2 # y7 y6 y5 y4 y3 y2 y1 y0 \n\
63 punpcklbw %%mm1, %%mm2 # u1 y3 v1 y2 u0 y1 v0 y0 \n\
64 movq %%mm2, (%0) # Store low YUYV \n\
65 punpckhbw %%mm1, %%mm0 # u3 y7 v3 y6 u2 y5 v2 y4 \n\
66 movq %%mm0, 8(%0) # Store high YUYV \n\
69 #define MMX_YUV422_UYVY " \n\
70 movq (%1), %%mm0 # Load 8 Y y7 y6 y5 y4 y3 y2 y1 y0 \n\
71 movd (%2), %%mm1 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
72 movd (%3), %%mm2 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
73 punpcklbw %%mm2, %%mm1 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
74 movq %%mm1, %%mm2 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
75 punpcklbw %%mm0, %%mm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
76 movq %%mm2, (%0) # Store low UYVY \n\
77 punpckhbw %%mm0, %%mm1 # y7 v3 y6 u3 y5 v2 y4 u2 \n\
78 movq %%mm1, 8(%0) # Store high UYVY \n\
81 #define MMX_YUV422_Y211 " \n\
84 #elif defined(HAVE_MMX_INTRINSICS)
90 #define MMX_END _mm_empty()
94 #elif defined( MODULE_NAME_IS_i422_yuy2_sse2 )
96 #if defined(CAN_COMPILE_SSE2)
100 #define SSE2_END __asm__ __volatile__ ( "sfence" ::: "memory" )
102 #elif defined(HAVE_SSE2_INTRINSICS)
104 /* SSE2 intrinsics */
106 #include <emmintrin.h>
109 #define SSE2_END _mm_sfence()
113 #elif defined (MODULE_NAME_IS_i422_yuy2)
115 #define C_YUV422_YUYV( p_line, p_y, p_u, p_v ) \
116 *(p_line)++ = *(p_y)++; \
117 *(p_line)++ = *(p_u)++; \
118 *(p_line)++ = *(p_y)++; \
119 *(p_line)++ = *(p_v)++; \
121 #define C_YUV422_YVYU( p_line, p_y, p_u, p_v ) \
122 *(p_line)++ = *(p_y)++; \
123 *(p_line)++ = *(p_v)++; \
124 *(p_line)++ = *(p_y)++; \
125 *(p_line)++ = *(p_u)++; \
127 #define C_YUV422_UYVY( p_line, p_y, p_u, p_v ) \
128 *(p_line)++ = *(p_u)++; \
129 *(p_line)++ = *(p_y)++; \
130 *(p_line)++ = *(p_v)++; \
131 *(p_line)++ = *(p_y)++; \
133 #define C_YUV422_Y211( p_line, p_y, p_u, p_v ) \
134 *(p_line)++ = *(p_y); p_y += 2; \
135 *(p_line)++ = *(p_u) - 0x80; p_u += 2; \
136 *(p_line)++ = *(p_y); p_y += 2; \
137 *(p_line)++ = *(p_v) - 0x80; p_v += 2; \