1 /*****************************************************************************
2 * vdec_motion_inner_mmxext.c : motion compensation inner routines optimized
4 *****************************************************************************
5 * Copyright (C) 1999, 2000 VideoLAN
6 * $Id: vdec_motion_inner_mmxext.c,v 1.1 2001/01/18 05:13:22 sam Exp $
8 * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9 * work done by the livid project <http://www.linuxvideo.org/>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
24 *****************************************************************************/
26 /*****************************************************************************
28 *****************************************************************************/
38 #include "attributes.h"
41 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
43 static mmx_t mask_one = {0x0101010101010101LL};
49 #define pavg_r2r(src,dest) pavgb_r2r (src, dest);
50 #define pavg_m2r(src,dest) pavgb_m2r (src, dest);
52 #define __MotionComponent_x_y_copy(width,height) \
53 void MotionComponent_x_y_copy_##width##_##height(yuv_data_t * p_src, \
54 yuv_data_t * p_dest, \
59 pxor_r2r (mm0, mm0); \
60 pxor_r2r (mm1, mm1); \
61 pxor_r2r (mm2, mm2); \
62 pxor_r2r (mm3, mm3); \
63 pxor_r2r (mm4, mm4); \
64 pxor_r2r (mm5, mm5); \
65 pxor_r2r (mm6, mm6); \
66 pxor_r2r (mm7, mm7); \
68 for( i_y = 0; i_y < height; i_y ++ ) \
70 movq_m2r( *p_src, mm0 ); /* load 8 ref bytes */ \
72 movq_m2r( *(p_src + 8), mm1 ); \
75 movq_r2m( mm0, *p_dest ); /* store 8 bytes at curr */ \
77 movq_r2m( mm1, *(p_dest + 8) ); \
82 #define __MotionComponent_X_y_copy(width,height) \
83 void MotionComponent_X_y_copy_##width##_##height(yuv_data_t * p_src, \
84 yuv_data_t * p_dest, \
89 for( i_y = 0; i_y < height; i_y ++ ) \
91 movq_m2r (*p_src, mm0); \
93 movq_m2r (*(p_src + 8), mm1); \
94 pavg_m2r (*(p_src + 1), mm0); \
96 pavg_m2r (*(p_src + 9), mm1); \
97 movq_r2m (mm0, *p_dest); \
100 movq_r2m (mm1, *(p_dest + 8)); \
101 p_dest += i_stride; \
105 #define __MotionComponent_x_Y_copy(width,height) \
106 void MotionComponent_x_Y_copy_##width##_##height(yuv_data_t * p_src, \
107 yuv_data_t * p_dest, \
111 yuv_data_t * p_next_src = p_src + i_stride; \
113 for( i_y = 0; i_y < height; i_y ++ ) \
115 movq_m2r (*p_src, mm0); \
117 movq_m2r (*(p_src + 8), mm1); \
118 pavg_m2r (*(p_next_src), mm0); \
120 pavg_m2r (*(p_next_src + 8), mm1); \
121 movq_r2m (mm0, *p_dest); \
123 p_next_src += i_stride; \
125 movq_r2m (mm1, *(p_dest + 8)); \
126 p_dest += i_stride; \
130 #define __MotionComponent_X_Y_copy(width,height) \
131 void MotionComponent_X_Y_copy_##width##_##height(yuv_data_t * p_src, \
132 yuv_data_t * p_dest, \
139 for( i_y = 0; i_y < height; i_y ++ ) \
141 movq_m2r (*p_src, mm0); \
142 movq_m2r (*(p_src+i_stride+1), mm1); \
143 movq_r2r (mm0, mm7); \
144 movq_m2r (*(p_src+1), mm2); \
145 pxor_r2r (mm1, mm7); \
146 movq_m2r (*(p_src + i_stride), mm3); \
147 movq_r2r (mm2, mm6); \
148 pxor_r2r (mm3, mm6); \
149 pavg_r2r (mm1, mm0); \
150 pavg_r2r (mm3, mm2); \
151 por_r2r (mm6, mm7); \
152 movq_r2r (mm0, mm6); \
153 pxor_r2r (mm2, mm6); \
154 pand_r2r (mm6, mm7); \
155 pand_m2r (mask_one, mm7); \
156 pavg_r2r (mm2, mm0); \
157 psubusb_r2r (mm7, mm0); \
158 movq_r2m (mm0, *p_dest); \
160 movq_m2r (*(p_src+8), mm0); \
161 movq_m2r (*(p_src+i_stride+9), mm1); \
162 movq_r2r (mm0, mm7); \
163 movq_m2r (*(p_src+9), mm2); \
164 pxor_r2r (mm1, mm7); \
165 movq_m2r (*(p_src+i_stride+8), mm3); \
166 movq_r2r (mm2, mm6); \
167 pxor_r2r (mm3, mm6); \
168 pavg_r2r (mm1, mm0); \
169 pavg_r2r (mm3, mm2); \
170 por_r2r (mm6, mm7); \
171 movq_r2r (mm0, mm6); \
172 pxor_r2r (mm2, mm6); \
173 pand_r2r (mm6, mm7); \
174 pand_m2r (mask_one, mm7); \
175 pavg_r2r (mm2, mm0); \
176 psubusb_r2r (mm7, mm0); \
178 movq_r2m (mm0, *(p_dest+8)); \
179 p_dest += i_stride; \
184 movq_m2r (*p_src, mm0); \
185 movq_m2r (*(p_src+1), mm1); \
186 movq_r2r (mm0, mm7); \
187 pxor_r2r (mm1, mm7); \
188 pavg_r2r (mm1, mm0); \
191 for( i_y = 0; i_y < height; i_y ++ ) \
193 movq_m2r (*p_src, mm2); \
194 movq_r2r (mm0, mm5); \
195 movq_m2r (*(p_src+1), mm3); \
196 movq_r2r (mm2, mm6); \
197 pxor_r2r (mm3, mm6); \
198 pavg_r2r (mm3, mm2); \
199 por_r2r (mm6, mm7); \
200 pxor_r2r (mm2, mm5); \
201 pand_r2r (mm5, mm7); \
202 pavg_r2r (mm2, mm0); \
203 pand_m2r (mask_one, mm7); \
204 psubusb_r2r (mm7, mm0); \
206 movq_r2m (mm0, *p_dest); \
207 p_dest += i_stride; \
208 movq_r2r (mm6, mm7); \
209 movq_r2r (mm2, mm0); \
214 #define __MotionComponent_x_y_avg(width,height) \
215 void MotionComponent_x_y_avg_##width##_##height(yuv_data_t * p_src, \
216 yuv_data_t * p_dest, \
221 for( i_y = 0; i_y < height; i_y ++ ) \
223 movq_m2r( *p_src, mm0 ); \
225 movq_m2r( *(p_src + 8), mm1 ); \
226 pavg_m2r( *p_dest, mm0 ); \
228 pavg_m2r( *(p_dest + 8), mm1 ); \
229 movq_r2m( mm0, *p_dest ); \
232 movq_r2m( mm1, *(p_dest + 8) ); \
233 p_dest += i_stride; \
237 #define __MotionComponent_X_y_avg(width,height) \
238 void MotionComponent_X_y_avg_##width##_##height(yuv_data_t * p_src, \
239 yuv_data_t * p_dest, \
244 for( i_y = 0; i_y < height; i_y ++ ) \
246 movq_m2r (*p_src, mm0); \
248 movq_m2r (*(p_src + 8), mm1); \
249 pavg_m2r (*(p_src + 1), mm0); \
251 pavg_m2r (*(p_src + 9), mm1); \
252 pavg_m2r (*p_dest, mm0); \
254 pavg_m2r (*(p_dest + 8), mm1); \
256 movq_r2m (mm0, *p_dest); \
258 movq_r2m (mm1, *(p_dest + 8)); \
259 p_dest += i_stride; \
263 #define __MotionComponent_x_Y_avg(width,height) \
264 void MotionComponent_x_Y_avg_##width##_##height(yuv_data_t * p_src, \
265 yuv_data_t * p_dest, \
269 yuv_data_t * p_next_src = p_src + i_stride; \
271 for( i_y = 0; i_y < height; i_y ++ ) \
273 movq_m2r (*p_src, mm0); \
275 movq_m2r (*(p_src + 8), mm1); \
276 pavg_m2r (*(p_next_src), mm0); \
278 pavg_m2r (*(p_next_src + 8), mm1); \
279 pavg_m2r (*p_dest, mm0); \
281 pavg_m2r (*(p_dest + 8), mm1); \
283 p_next_src += i_stride; \
284 movq_r2m (mm0, *p_dest); \
286 movq_r2m (mm1, *(p_dest + 8)); \
287 p_dest += i_stride; \
291 #define __MotionComponent_X_Y_avg(width,height) \
292 void MotionComponent_X_Y_avg_##width##_##height(yuv_data_t * p_src, \
293 yuv_data_t * p_dest, \
300 for( i_y = 0; i_y < height; i_y ++ ) \
302 movq_m2r (*p_src, mm0); \
303 movq_m2r (*(p_src+i_stride+1), mm1); \
304 movq_r2r (mm0, mm7); \
305 movq_m2r (*(p_src+1), mm2); \
306 pxor_r2r (mm1, mm7); \
307 movq_m2r (*(p_src+i_stride), mm3); \
308 movq_r2r (mm2, mm6); \
309 pxor_r2r (mm3, mm6); \
310 pavg_r2r (mm1, mm0); \
311 pavg_r2r (mm3, mm2); \
312 por_r2r (mm6, mm7); \
313 movq_r2r (mm0, mm6); \
314 pxor_r2r (mm2, mm6); \
315 pand_r2r (mm6, mm7); \
316 pand_m2r (mask_one, mm7); \
317 pavg_r2r (mm2, mm0); \
318 psubusb_r2r (mm7, mm0); \
319 movq_m2r (*p_dest, mm1); \
320 pavg_r2r (mm1, mm0); \
321 movq_r2m (mm0, *p_dest); \
323 movq_m2r (*(p_src+8), mm0); \
324 movq_m2r (*(p_src+i_stride+9), mm1); \
325 movq_r2r (mm0, mm7); \
326 movq_m2r (*(p_src+9), mm2); \
327 pxor_r2r (mm1, mm7); \
328 movq_m2r (*(p_src+i_stride+8), mm3); \
329 movq_r2r (mm2, mm6); \
330 pxor_r2r (mm3, mm6); \
331 pavg_r2r (mm1, mm0); \
332 pavg_r2r (mm3, mm2); \
333 por_r2r (mm6, mm7); \
334 movq_r2r (mm0, mm6); \
335 pxor_r2r (mm2, mm6); \
336 pand_r2r (mm6, mm7); \
337 pand_m2r (mask_one, mm7); \
338 pavg_r2r (mm2, mm0); \
339 psubusb_r2r (mm7, mm0); \
340 movq_m2r (*(p_dest+8), mm1); \
341 pavg_r2r (mm1, mm0); \
343 movq_r2m (mm0, *(p_dest+8)); \
344 p_dest += i_stride; \
349 for( i_y = 0; i_y < height; i_y ++ ) \
351 movq_m2r (*p_src, mm0); \
352 movq_m2r (*(p_src+i_stride+1), mm1); \
353 movq_r2r (mm0, mm7); \
354 movq_m2r (*(p_src+1), mm2); \
355 pxor_r2r (mm1, mm7); \
356 movq_m2r (*(p_src+i_stride), mm3); \
357 movq_r2r (mm2, mm6); \
358 pxor_r2r (mm3, mm6); \
359 pavg_r2r (mm1, mm0); \
360 pavg_r2r (mm3, mm2); \
361 por_r2r (mm6, mm7); \
362 movq_r2r (mm0, mm6); \
363 pxor_r2r (mm2, mm6); \
364 pand_r2r (mm6, mm7); \
365 pand_m2r (mask_one, mm7); \
366 pavg_r2r (mm2, mm0); \
367 psubusb_r2r (mm7, mm0); \
368 movq_m2r (*p_dest, mm1); \
369 pavg_r2r (mm1, mm0); \
371 movq_r2m (mm0, *p_dest); \
372 p_dest += i_stride; \
377 #define __MotionComponents(width,height) \
378 __MotionComponent_x_y_copy(width,height) \
379 __MotionComponent_X_y_copy(width,height) \
380 __MotionComponent_x_Y_copy(width,height) \
381 __MotionComponent_X_Y_copy(width,height) \
382 __MotionComponent_x_y_avg(width,height) \
383 __MotionComponent_X_y_avg(width,height) \
384 __MotionComponent_x_Y_avg(width,height) \
385 __MotionComponent_X_Y_avg(width,height)
387 __MotionComponents (16,16) /* 444, 422, 420 */
388 __MotionComponents (16,8) /* 444, 422, 420 */
389 __MotionComponents (8,8) /* 422, 420 */
390 __MotionComponents (8,4) /* 420 */
392 __MotionComponents (8,16) /* 422 */