]> git.sesse.net Git - vlc/blob - modules/video_chroma/i422_yuy2.h
mediacodec: skip prerolled frames
[vlc] / modules / video_chroma / i422_yuy2.h
1 /*****************************************************************************
2  * i422_yuy2.h : YUV to YUV conversion module for vlc
3  *****************************************************************************
4  * Copyright (C) 2002 VLC authors and VideoLAN
5  * $Id$
6  *
7  * Authors: Samuel Hocevar <sam@zoy.org>
8  *          Damien Fouilleul <damienf@videolan.org>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU Lesser General Public License as published by
12  * the Free Software Foundation; either version 2.1 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this program; if not, write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23  *****************************************************************************/
24
25 #ifdef MODULE_NAME_IS_i422_yuy2_mmx
26
27 #if defined(CAN_COMPILE_MMX)
28
29 /* MMX assembly */
30  
31 #define MMX_CALL(MMX_INSTRUCTIONS)          \
32     do {                                    \
33     __asm__ __volatile__(                   \
34         ".p2align 3 \n\t"                   \
35         MMX_INSTRUCTIONS                    \
36         :                                   \
37         : "r" (p_line), "r" (p_y),          \
38           "r" (p_u), "r" (p_v)              \
39         : "mm0", "mm1", "mm2" );            \
40         p_line += 16; p_y += 8;             \
41         p_u += 4; p_v += 4;                 \
42     } while(0)
43
44 #define MMX_END __asm__ __volatile__ ( "emms" )
45
46 #define MMX_YUV422_YUYV "                                                 \n\
47 movq       (%1), %%mm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0     \n\
48 movd       (%2), %%mm1  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0     \n\
49 movd       (%3), %%mm2  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0     \n\
50 punpcklbw %%mm2, %%mm1  #                     v3 u3 v2 u2 v1 u1 v0 u0     \n\
51 movq      %%mm0, %%mm2  #                     y7 y6 y5 y4 y3 y2 y1 y0     \n\
52 punpcklbw %%mm1, %%mm2  #                     v1 y3 u1 y2 v0 y1 u0 y0     \n\
53 movq      %%mm2, (%0)   # Store low YUYV                                  \n\
54 punpckhbw %%mm1, %%mm0  #                     v3 y7 u3 y6 v2 y5 u2 y4     \n\
55 movq      %%mm0, 8(%0)  # Store high YUYV                                 \n\
56 "
57
58 #define MMX_YUV422_YVYU "                                                 \n\
59 movq       (%1), %%mm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0     \n\
60 movd       (%2), %%mm2  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0     \n\
61 movd       (%3), %%mm1  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0     \n\
62 punpcklbw %%mm2, %%mm1  #                     u3 v3 u2 v2 u1 v1 u0 v0     \n\
63 movq      %%mm0, %%mm2  #                     y7 y6 y5 y4 y3 y2 y1 y0     \n\
64 punpcklbw %%mm1, %%mm2  #                     u1 y3 v1 y2 u0 y1 v0 y0     \n\
65 movq      %%mm2, (%0)   # Store low YUYV                                  \n\
66 punpckhbw %%mm1, %%mm0  #                     u3 y7 v3 y6 u2 y5 v2 y4     \n\
67 movq      %%mm0, 8(%0)  # Store high YUYV                                 \n\
68 "
69
70 #define MMX_YUV422_UYVY "                                                 \n\
71 movq       (%1), %%mm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0     \n\
72 movd       (%2), %%mm1  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0     \n\
73 movd       (%3), %%mm2  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0     \n\
74 punpcklbw %%mm2, %%mm1  #                     v3 u3 v2 u2 v1 u1 v0 u0     \n\
75 movq      %%mm1, %%mm2  #                     v3 u3 v2 u2 v1 u1 v0 u0     \n\
76 punpcklbw %%mm0, %%mm2  #                     y3 v1 y2 u1 y1 v0 y0 u0     \n\
77 movq      %%mm2, (%0)   # Store low UYVY                                  \n\
78 punpckhbw %%mm0, %%mm1  #                     y7 v3 y6 u3 y5 v2 y4 u2     \n\
79 movq      %%mm1, 8(%0)  # Store high UYVY                                 \n\
80 "
81
82 #define MMX_YUV422_Y211 "                                                 \n\
83 "
84
85 #elif defined(HAVE_MMX_INTRINSICS)
86
87 /* MMX intrinsics */
88
89 #include <mmintrin.h>
90
91 #define MMX_CALL(MMX_INSTRUCTIONS)  \
92     do {                            \
93         __m64 mm0, mm1, mm2;        \
94         MMX_INSTRUCTIONS            \
95         p_line += 16; p_y += 8;     \
96         p_u += 4; p_v += 4;         \
97     } while(0)
98
99 #define MMX_END _mm_empty()
100
101 #define MMX_YUV422_YUYV                     \
102     mm0 = (__m64)*(uint64_t*)p_y;           \
103     mm1 = _mm_cvtsi32_si64(*(int*)p_u);     \
104     mm2 = _mm_cvtsi32_si64(*(int*)p_v);     \
105     mm1 = _mm_unpacklo_pi8(mm1, mm2);       \
106     mm2 = mm0;                              \
107     mm2 = _mm_unpacklo_pi8(mm2, mm1);       \
108     *(uint64_t*)p_line = (uint64_t)mm2;     \
109     mm0 = _mm_unpackhi_pi8(mm0, mm1);       \
110     *(uint64_t*)(p_line+8) = (uint64_t)mm0;
111  
112 #define MMX_YUV422_YVYU                     \
113     mm0 = (__m64)*(uint64_t*)p_y;           \
114     mm2 = _mm_cvtsi32_si64(*(int*)p_u);     \
115     mm1 = _mm_cvtsi32_si64(*(int*)p_v);     \
116     mm1 = _mm_unpacklo_pi8(mm1, mm2);       \
117     mm2 = mm0;                              \
118     mm2 = _mm_unpacklo_pi8(mm2, mm1);       \
119     *(uint64_t*)p_line = (uint64_t)mm2;     \
120     mm0 = _mm_unpackhi_pi8(mm0, mm1);       \
121     *(uint64_t*)(p_line+8) = (uint64_t)mm0;
122
123 #define MMX_YUV422_UYVY                     \
124     mm0 = (__m64)*(uint64_t*)p_y;           \
125     mm1 = _mm_cvtsi32_si64(*(int*)p_u);     \
126     mm2 = _mm_cvtsi32_si64(*(int*)p_v);     \
127     mm1 = _mm_unpacklo_pi8(mm1, mm2);       \
128     mm2 = mm1;                              \
129     mm2 = _mm_unpacklo_pi8(mm2, mm0);       \
130     *(uint64_t*)p_line = (uint64_t)mm2;     \
131     mm1 = _mm_unpackhi_pi8(mm1, mm0);       \
132     *(uint64_t*)(p_line+8) = (uint64_t)mm1;
133
134 #endif
135  
136 #elif defined( MODULE_NAME_IS_i422_yuy2_sse2 )
137
138 #if defined(CAN_COMPILE_SSE2)
139
140 /* SSE2 assembly */
141
142 #define SSE2_CALL(MMX_INSTRUCTIONS)         \
143     do {                                    \
144     __asm__ __volatile__(                   \
145         ".p2align 3 \n\t"                   \
146         MMX_INSTRUCTIONS                    \
147         :                                   \
148         : "r" (p_line), "r" (p_y),          \
149           "r" (p_u), "r" (p_v)              \
150         : "xmm0", "xmm1", "xmm2" );         \
151         p_line += 32; p_y += 16;            \
152         p_u += 8; p_v += 8;                 \
153     } while(0)
154
155 #define SSE2_END  __asm__ __volatile__ ( "sfence" ::: "memory" )
156
157 #define SSE2_YUV422_YUYV_ALIGNED "                                        \n\
158 movdqa      (%1), %%xmm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0   \n\
159 movq        (%2), %%xmm1  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0   \n\
160 movq        (%3), %%xmm2  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0   \n\
161 punpcklbw %%xmm2, %%xmm1  #                     v3 u3 v2 u2 v1 u1 v0 u0   \n\
162 movdqa    %%xmm0, %%xmm2  #                     y7 y6 y5 y4 y3 y2 y1 y0   \n\
163 punpcklbw %%xmm1, %%xmm2  #                     v1 y3 u1 y2 v0 y1 u0 y0   \n\
164 movntdq   %%xmm2, (%0)    # Store low YUYV                                \n\
165 punpckhbw %%xmm1, %%xmm0  #                     v3 y7 u3 y6 v2 y5 u2 y4   \n\
166 movntdq   %%xmm0, 16(%0)  # Store high YUYV                               \n\
167 "
168
169 #define SSE2_YUV422_YUYV_UNALIGNED "                                      \n\
170 movdqu      (%1), %%xmm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0   \n\
171 movq        (%2), %%xmm1  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0   \n\
172 movq        (%3), %%xmm2  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0   \n\
173 prefetchnta (%0)          # Tell CPU not to cache output YUYV data        \n\
174 punpcklbw %%xmm2, %%xmm1  #                     v3 u3 v2 u2 v1 u1 v0 u0   \n\
175 movdqa    %%xmm0, %%xmm2  #                     y7 y6 y5 y4 y3 y2 y1 y0   \n\
176 punpcklbw %%xmm1, %%xmm2  #                     v1 y3 u1 y2 v0 y1 u0 y0   \n\
177 movdqu    %%xmm2, (%0)    # Store low YUYV                                \n\
178 punpckhbw %%xmm1, %%xmm0  #                     v3 y7 u3 y6 v2 y5 u2 y4   \n\
179 movdqu    %%xmm0, 16(%0)  # Store high YUYV                               \n\
180 "
181
182 #define SSE2_YUV422_YVYU_ALIGNED "                                        \n\
183 movdqa      (%1), %%xmm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0   \n\
184 movq        (%2), %%xmm2  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0   \n\
185 movq        (%3), %%xmm1  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0   \n\
186 punpcklbw %%xmm2, %%xmm1  #                     u3 v3 u2 v2 u1 v1 u0 v0   \n\
187 movdqa    %%xmm0, %%xmm2  #                     y7 y6 y5 y4 y3 y2 y1 y0   \n\
188 punpcklbw %%xmm1, %%xmm2  #                     u1 y3 v1 y2 u0 y1 v0 y0   \n\
189 movntdq   %%xmm2, (%0)    # Store low YUYV                                \n\
190 punpckhbw %%xmm1, %%xmm0  #                     u3 y7 v3 y6 u2 y5 v2 y4   \n\
191 movntdq   %%xmm0, 16(%0)  # Store high YUYV                               \n\
192 "
193
194 #define SSE2_YUV422_YVYU_UNALIGNED "                                      \n\
195 movdqu      (%1), %%xmm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0   \n\
196 movq        (%2), %%xmm2  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0   \n\
197 movq        (%3), %%xmm1  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0   \n\
198 prefetchnta (%0)          # Tell CPU not to cache output YUYV data        \n\
199 punpcklbw %%xmm2, %%xmm1  #                     u3 v3 u2 v2 u1 v1 u0 v0   \n\
200 movdqa    %%xmm0, %%xmm2  #                     y7 y6 y5 y4 y3 y2 y1 y0   \n\
201 punpcklbw %%xmm1, %%xmm2  #                     u1 y3 v1 y2 u0 y1 v0 y0   \n\
202 movdqu    %%xmm2, (%0)    # Store low YUYV                                \n\
203 punpckhbw %%xmm1, %%xmm0  #                     u3 y7 v3 y6 u2 y5 v2 y4   \n\
204 movdqu    %%xmm0, 16(%0)  # Store high YUYV                               \n\
205 "
206
207 #define SSE2_YUV422_UYVY_ALIGNED "                                        \n\
208 movdqa      (%1), %%xmm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0   \n\
209 movq        (%2), %%xmm1  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0   \n\
210 movq        (%3), %%xmm2  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0   \n\
211 punpcklbw %%xmm2, %%xmm1  #                     v3 u3 v2 u2 v1 u1 v0 u0   \n\
212 movdqa    %%xmm1, %%xmm2  #                     v3 u3 v2 u2 v1 u1 v0 u0   \n\
213 punpcklbw %%xmm0, %%xmm2  #                     y3 v1 y2 u1 y1 v0 y0 u0   \n\
214 movntdq   %%xmm2, (%0)    # Store low UYVY                                \n\
215 punpckhbw %%xmm0, %%xmm1  #                     y7 v3 y6 u3 y5 v2 y4 u2   \n\
216 movntdq   %%xmm1, 16(%0)  # Store high UYVY                               \n\
217 "
218
219 #define SSE2_YUV422_UYVY_UNALIGNED "                                      \n\
220 movdqu      (%1), %%xmm0  # Load 8 Y            y7 y6 y5 y4 y3 y2 y1 y0   \n\
221 movq        (%2), %%xmm1  # Load 4 Cb           00 00 00 00 u3 u2 u1 u0   \n\
222 movq        (%3), %%xmm2  # Load 4 Cr           00 00 00 00 v3 v2 v1 v0   \n\
223 prefetchnta (%0)          # Tell CPU not to cache output YUYV data        \n\
224 punpcklbw %%xmm2, %%xmm1  #                     v3 u3 v2 u2 v1 u1 v0 u0   \n\
225 movdqa    %%xmm1, %%xmm2  #                     v3 u3 v2 u2 v1 u1 v0 u0   \n\
226 punpcklbw %%xmm0, %%xmm2  #                     y3 v1 y2 u1 y1 v0 y0 u0   \n\
227 movdqu    %%xmm2, (%0)    # Store low UYVY                                \n\
228 punpckhbw %%xmm0, %%xmm1  #                     y7 v3 y6 u3 y5 v2 y4 u2   \n\
229 movdqu    %%xmm1, 16(%0)  # Store high UYVY                               \n\
230 "
231
232 #elif defined(HAVE_SSE2_INTRINSICS)
233
234 /* SSE2 intrinsics */
235
236 #include <emmintrin.h>
237
238 #define SSE2_CALL(SSE2_INSTRUCTIONS)    \
239     do {                                \
240         __m128i xmm0, xmm1, xmm2;        \
241         SSE2_INSTRUCTIONS               \
242         p_line += 32; p_y += 16;        \
243         p_u += 8; p_v += 8;             \
244     } while(0)
245
246 #define SSE2_END  _mm_sfence()
247
248 #define SSE2_YUV422_YUYV_ALIGNED                \
249     xmm0 = _mm_load_si128((__m128i *)p_y);      \
250     xmm1 = _mm_loadl_epi64((__m128i *)p_u);     \
251     xmm2 = _mm_loadl_epi64((__m128i *)p_v);     \
252     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);       \
253     xmm2 = xmm0;                                \
254     xmm2 = _mm_unpacklo_epi8(xmm2, xmm1);       \
255     _mm_stream_si128((__m128i*)(p_line), xmm2); \
256     xmm0 = _mm_unpackhi_epi8(xmm0, xmm1);       \
257     _mm_stream_si128((__m128i*)(p_line+16), xmm0);
258  
259 #define SSE2_YUV422_YUYV_UNALIGNED              \
260     xmm0 = _mm_loadu_si128((__m128i *)p_y);     \
261     xmm1 = _mm_loadl_epi64((__m128i *)p_u);     \
262     xmm2 = _mm_loadl_epi64((__m128i *)p_v);     \
263     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);       \
264     xmm2 = xmm0;                                \
265     xmm2 = _mm_unpacklo_epi8(xmm2, xmm1);       \
266     _mm_storeu_si128((__m128i*)(p_line), xmm2); \
267     xmm0 = _mm_unpackhi_epi8(xmm0, xmm1);       \
268     _mm_storeu_si128((__m128i*)(p_line+16), xmm0);
269  
270 #define SSE2_YUV422_YVYU_ALIGNED                \
271     xmm0 = _mm_load_si128((__m128i *)p_y);      \
272     xmm2 = _mm_loadl_epi64((__m128i *)p_u);     \
273     xmm1 = _mm_loadl_epi64((__m128i *)p_v);     \
274     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);       \
275     xmm2 = xmm0;                                \
276     xmm2 = _mm_unpacklo_epi8(xmm2, xmm1);       \
277     _mm_stream_si128((__m128i*)(p_line), xmm2); \
278     xmm0 = _mm_unpackhi_epi8(xmm0, xmm1);       \
279     _mm_stream_si128((__m128i*)(p_line+16), xmm0);
280
281 #define SSE2_YUV422_YVYU_UNALIGNED              \
282     xmm0 = _mm_loadu_si128((__m128i *)p_y);     \
283     xmm2 = _mm_loadl_epi64((__m128i *)p_u);     \
284     xmm1 = _mm_loadl_epi64((__m128i *)p_v);     \
285     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);       \
286     xmm2 = xmm0;                                \
287     xmm2 = _mm_unpacklo_epi8(xmm2, xmm1);       \
288     _mm_storeu_si128((__m128i*)(p_line), xmm2); \
289     xmm0 = _mm_unpackhi_epi8(xmm0, xmm1);       \
290     _mm_storeu_si128((__m128i*)(p_line+16), xmm0);
291
292 #define SSE2_YUV422_UYVY_ALIGNED                \
293     xmm0 = _mm_load_si128((__m128i *)p_y);      \
294     xmm1 = _mm_loadl_epi64((__m128i *)p_u);     \
295     xmm2 = _mm_loadl_epi64((__m128i *)p_v);     \
296     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);       \
297     xmm2 = xmm1;                                \
298     xmm2 = _mm_unpacklo_epi8(xmm2, xmm0);       \
299     _mm_stream_si128((__m128i*)(p_line), xmm2); \
300     xmm1 = _mm_unpackhi_epi8(xmm1, xmm0);       \
301     _mm_stream_si128((__m128i*)(p_line+16), xmm1);
302
303 #define SSE2_YUV422_UYVY_UNALIGNED              \
304     xmm0 = _mm_loadu_si128((__m128i *)p_y);     \
305     xmm1 = _mm_loadl_epi64((__m128i *)p_u);     \
306     xmm2 = _mm_loadl_epi64((__m128i *)p_v);     \
307     xmm1 = _mm_unpacklo_epi8(xmm1, xmm2);       \
308     xmm2 = xmm1;                                \
309     xmm2 = _mm_unpacklo_epi8(xmm2, xmm0);       \
310     _mm_storeu_si128((__m128i*)(p_line), xmm2); \
311     xmm1 = _mm_unpackhi_epi8(xmm1, xmm0);       \
312     _mm_storeu_si128((__m128i*)(p_line+16), xmm1);
313
314 #endif
315
316 #endif
317
318 #define C_YUV422_YUYV( p_line, p_y, p_u, p_v )                              \
319     *(p_line)++ = *(p_y)++;                                                 \
320     *(p_line)++ = *(p_u)++;                                                 \
321     *(p_line)++ = *(p_y)++;                                                 \
322     *(p_line)++ = *(p_v)++;                                                 \
323
324 #define C_YUV422_YVYU( p_line, p_y, p_u, p_v )                              \
325     *(p_line)++ = *(p_y)++;                                                 \
326     *(p_line)++ = *(p_v)++;                                                 \
327     *(p_line)++ = *(p_y)++;                                                 \
328     *(p_line)++ = *(p_u)++;                                                 \
329
330 #define C_YUV422_UYVY( p_line, p_y, p_u, p_v )                              \
331     *(p_line)++ = *(p_u)++;                                                 \
332     *(p_line)++ = *(p_y)++;                                                 \
333     *(p_line)++ = *(p_v)++;                                                 \
334     *(p_line)++ = *(p_y)++;                                                 \
335
336 #define C_YUV422_Y211( p_line, p_y, p_u, p_v )                              \
337     *(p_line)++ = *(p_y); p_y += 2;                                         \
338     *(p_line)++ = *(p_u) - 0x80; p_u += 2;                                  \
339     *(p_line)++ = *(p_y); p_y += 2;                                         \
340     *(p_line)++ = *(p_v) - 0x80; p_v += 2;                                  \
341
342