1 /*****************************************************************************
2 * vdec_motion_inner_mmxext.c : motion compensation inner routines optimized
4 *****************************************************************************
5 * Copyright (C) 1999, 2000 VideoLAN
6 * $Id: vdec_motion_inner_mmxext.c,v 1.3 2001/06/07 22:14:55 sam Exp $
8 * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9 * work done by the livid project <http://www.linuxvideo.org/>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
24 *****************************************************************************/
26 #define MODULE_NAME motionmmxext
27 #include "modules_inner.h"
29 /*****************************************************************************
31 *****************************************************************************/
41 #include "attributes.h"
44 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
46 static mmx_t mask_one = {0x0101010101010101LL};
52 #define pavg_r2r(src,dest) pavgb_r2r (src, dest);
53 #define pavg_m2r(src,dest) pavgb_m2r (src, dest);
55 #define __MotionComponent_x_y_copy(width,height) \
56 void _M(MotionComponent_x_y_copy_##width##_##height)(yuv_data_t * p_src, \
57 yuv_data_t * p_dest, \
62 pxor_r2r (mm0, mm0); \
63 pxor_r2r (mm1, mm1); \
64 pxor_r2r (mm2, mm2); \
65 pxor_r2r (mm3, mm3); \
66 pxor_r2r (mm4, mm4); \
67 pxor_r2r (mm5, mm5); \
68 pxor_r2r (mm6, mm6); \
69 pxor_r2r (mm7, mm7); \
71 for( i_y = 0; i_y < height; i_y ++ ) \
73 movq_m2r( *p_src, mm0 ); /* load 8 ref bytes */ \
75 movq_m2r( *(p_src + 8), mm1 ); \
78 movq_r2m( mm0, *p_dest ); /* store 8 bytes at curr */ \
80 movq_r2m( mm1, *(p_dest + 8) ); \
85 #define __MotionComponent_X_y_copy(width,height) \
86 void _M(MotionComponent_X_y_copy_##width##_##height)(yuv_data_t * p_src, \
87 yuv_data_t * p_dest, \
92 for( i_y = 0; i_y < height; i_y ++ ) \
94 movq_m2r (*p_src, mm0); \
96 movq_m2r (*(p_src + 8), mm1); \
97 pavg_m2r (*(p_src + 1), mm0); \
99 pavg_m2r (*(p_src + 9), mm1); \
100 movq_r2m (mm0, *p_dest); \
103 movq_r2m (mm1, *(p_dest + 8)); \
104 p_dest += i_stride; \
108 #define __MotionComponent_x_Y_copy(width,height) \
109 void _M(MotionComponent_x_Y_copy_##width##_##height)(yuv_data_t * p_src, \
110 yuv_data_t * p_dest, \
114 yuv_data_t * p_next_src = p_src + i_stride; \
116 for( i_y = 0; i_y < height; i_y ++ ) \
118 movq_m2r (*p_src, mm0); \
120 movq_m2r (*(p_src + 8), mm1); \
121 pavg_m2r (*(p_next_src), mm0); \
123 pavg_m2r (*(p_next_src + 8), mm1); \
124 movq_r2m (mm0, *p_dest); \
126 p_next_src += i_stride; \
128 movq_r2m (mm1, *(p_dest + 8)); \
129 p_dest += i_stride; \
133 #define __MotionComponent_X_Y_copy(width,height) \
134 void _M(MotionComponent_X_Y_copy_##width##_##height)(yuv_data_t * p_src, \
135 yuv_data_t * p_dest, \
142 for( i_y = 0; i_y < height; i_y ++ ) \
144 movq_m2r (*p_src, mm0); \
145 movq_m2r (*(p_src+i_stride+1), mm1); \
146 movq_r2r (mm0, mm7); \
147 movq_m2r (*(p_src+1), mm2); \
148 pxor_r2r (mm1, mm7); \
149 movq_m2r (*(p_src + i_stride), mm3); \
150 movq_r2r (mm2, mm6); \
151 pxor_r2r (mm3, mm6); \
152 pavg_r2r (mm1, mm0); \
153 pavg_r2r (mm3, mm2); \
154 por_r2r (mm6, mm7); \
155 movq_r2r (mm0, mm6); \
156 pxor_r2r (mm2, mm6); \
157 pand_r2r (mm6, mm7); \
158 pand_m2r (mask_one, mm7); \
159 pavg_r2r (mm2, mm0); \
160 psubusb_r2r (mm7, mm0); \
161 movq_r2m (mm0, *p_dest); \
163 movq_m2r (*(p_src+8), mm0); \
164 movq_m2r (*(p_src+i_stride+9), mm1); \
165 movq_r2r (mm0, mm7); \
166 movq_m2r (*(p_src+9), mm2); \
167 pxor_r2r (mm1, mm7); \
168 movq_m2r (*(p_src+i_stride+8), mm3); \
169 movq_r2r (mm2, mm6); \
170 pxor_r2r (mm3, mm6); \
171 pavg_r2r (mm1, mm0); \
172 pavg_r2r (mm3, mm2); \
173 por_r2r (mm6, mm7); \
174 movq_r2r (mm0, mm6); \
175 pxor_r2r (mm2, mm6); \
176 pand_r2r (mm6, mm7); \
177 pand_m2r (mask_one, mm7); \
178 pavg_r2r (mm2, mm0); \
179 psubusb_r2r (mm7, mm0); \
181 movq_r2m (mm0, *(p_dest+8)); \
182 p_dest += i_stride; \
187 movq_m2r (*p_src, mm0); \
188 movq_m2r (*(p_src+1), mm1); \
189 movq_r2r (mm0, mm7); \
190 pxor_r2r (mm1, mm7); \
191 pavg_r2r (mm1, mm0); \
194 for( i_y = 0; i_y < height; i_y ++ ) \
196 movq_m2r (*p_src, mm2); \
197 movq_r2r (mm0, mm5); \
198 movq_m2r (*(p_src+1), mm3); \
199 movq_r2r (mm2, mm6); \
200 pxor_r2r (mm3, mm6); \
201 pavg_r2r (mm3, mm2); \
202 por_r2r (mm6, mm7); \
203 pxor_r2r (mm2, mm5); \
204 pand_r2r (mm5, mm7); \
205 pavg_r2r (mm2, mm0); \
206 pand_m2r (mask_one, mm7); \
207 psubusb_r2r (mm7, mm0); \
209 movq_r2m (mm0, *p_dest); \
210 p_dest += i_stride; \
211 movq_r2r (mm6, mm7); \
212 movq_r2r (mm2, mm0); \
217 #define __MotionComponent_x_y_avg(width,height) \
218 void _M(MotionComponent_x_y_avg_##width##_##height)(yuv_data_t * p_src, \
219 yuv_data_t * p_dest, \
224 for( i_y = 0; i_y < height; i_y ++ ) \
226 movq_m2r( *p_src, mm0 ); \
228 movq_m2r( *(p_src + 8), mm1 ); \
229 pavg_m2r( *p_dest, mm0 ); \
231 pavg_m2r( *(p_dest + 8), mm1 ); \
232 movq_r2m( mm0, *p_dest ); \
235 movq_r2m( mm1, *(p_dest + 8) ); \
236 p_dest += i_stride; \
240 #define __MotionComponent_X_y_avg(width,height) \
241 void _M(MotionComponent_X_y_avg_##width##_##height)(yuv_data_t * p_src, \
242 yuv_data_t * p_dest, \
247 for( i_y = 0; i_y < height; i_y ++ ) \
249 movq_m2r (*p_src, mm0); \
251 movq_m2r (*(p_src + 8), mm1); \
252 pavg_m2r (*(p_src + 1), mm0); \
254 pavg_m2r (*(p_src + 9), mm1); \
255 pavg_m2r (*p_dest, mm0); \
257 pavg_m2r (*(p_dest + 8), mm1); \
259 movq_r2m (mm0, *p_dest); \
261 movq_r2m (mm1, *(p_dest + 8)); \
262 p_dest += i_stride; \
266 #define __MotionComponent_x_Y_avg(width,height) \
267 void _M(MotionComponent_x_Y_avg_##width##_##height)(yuv_data_t * p_src, \
268 yuv_data_t * p_dest, \
272 yuv_data_t * p_next_src = p_src + i_stride; \
274 for( i_y = 0; i_y < height; i_y ++ ) \
276 movq_m2r (*p_src, mm0); \
278 movq_m2r (*(p_src + 8), mm1); \
279 pavg_m2r (*(p_next_src), mm0); \
281 pavg_m2r (*(p_next_src + 8), mm1); \
282 pavg_m2r (*p_dest, mm0); \
284 pavg_m2r (*(p_dest + 8), mm1); \
286 p_next_src += i_stride; \
287 movq_r2m (mm0, *p_dest); \
289 movq_r2m (mm1, *(p_dest + 8)); \
290 p_dest += i_stride; \
294 #define __MotionComponent_X_Y_avg(width,height) \
295 void _M(MotionComponent_X_Y_avg_##width##_##height)(yuv_data_t * p_src, \
296 yuv_data_t * p_dest, \
303 for( i_y = 0; i_y < height; i_y ++ ) \
305 movq_m2r (*p_src, mm0); \
306 movq_m2r (*(p_src+i_stride+1), mm1); \
307 movq_r2r (mm0, mm7); \
308 movq_m2r (*(p_src+1), mm2); \
309 pxor_r2r (mm1, mm7); \
310 movq_m2r (*(p_src+i_stride), mm3); \
311 movq_r2r (mm2, mm6); \
312 pxor_r2r (mm3, mm6); \
313 pavg_r2r (mm1, mm0); \
314 pavg_r2r (mm3, mm2); \
315 por_r2r (mm6, mm7); \
316 movq_r2r (mm0, mm6); \
317 pxor_r2r (mm2, mm6); \
318 pand_r2r (mm6, mm7); \
319 pand_m2r (mask_one, mm7); \
320 pavg_r2r (mm2, mm0); \
321 psubusb_r2r (mm7, mm0); \
322 movq_m2r (*p_dest, mm1); \
323 pavg_r2r (mm1, mm0); \
324 movq_r2m (mm0, *p_dest); \
326 movq_m2r (*(p_src+8), mm0); \
327 movq_m2r (*(p_src+i_stride+9), mm1); \
328 movq_r2r (mm0, mm7); \
329 movq_m2r (*(p_src+9), mm2); \
330 pxor_r2r (mm1, mm7); \
331 movq_m2r (*(p_src+i_stride+8), mm3); \
332 movq_r2r (mm2, mm6); \
333 pxor_r2r (mm3, mm6); \
334 pavg_r2r (mm1, mm0); \
335 pavg_r2r (mm3, mm2); \
336 por_r2r (mm6, mm7); \
337 movq_r2r (mm0, mm6); \
338 pxor_r2r (mm2, mm6); \
339 pand_r2r (mm6, mm7); \
340 pand_m2r (mask_one, mm7); \
341 pavg_r2r (mm2, mm0); \
342 psubusb_r2r (mm7, mm0); \
343 movq_m2r (*(p_dest+8), mm1); \
344 pavg_r2r (mm1, mm0); \
346 movq_r2m (mm0, *(p_dest+8)); \
347 p_dest += i_stride; \
352 for( i_y = 0; i_y < height; i_y ++ ) \
354 movq_m2r (*p_src, mm0); \
355 movq_m2r (*(p_src+i_stride+1), mm1); \
356 movq_r2r (mm0, mm7); \
357 movq_m2r (*(p_src+1), mm2); \
358 pxor_r2r (mm1, mm7); \
359 movq_m2r (*(p_src+i_stride), mm3); \
360 movq_r2r (mm2, mm6); \
361 pxor_r2r (mm3, mm6); \
362 pavg_r2r (mm1, mm0); \
363 pavg_r2r (mm3, mm2); \
364 por_r2r (mm6, mm7); \
365 movq_r2r (mm0, mm6); \
366 pxor_r2r (mm2, mm6); \
367 pand_r2r (mm6, mm7); \
368 pand_m2r (mask_one, mm7); \
369 pavg_r2r (mm2, mm0); \
370 psubusb_r2r (mm7, mm0); \
371 movq_m2r (*p_dest, mm1); \
372 pavg_r2r (mm1, mm0); \
374 movq_r2m (mm0, *p_dest); \
375 p_dest += i_stride; \
380 #define __MotionComponents(width,height) \
381 __MotionComponent_x_y_copy(width,height) \
382 __MotionComponent_X_y_copy(width,height) \
383 __MotionComponent_x_Y_copy(width,height) \
384 __MotionComponent_X_Y_copy(width,height) \
385 __MotionComponent_x_y_avg(width,height) \
386 __MotionComponent_X_y_avg(width,height) \
387 __MotionComponent_x_Y_avg(width,height) \
388 __MotionComponent_X_Y_avg(width,height)
390 __MotionComponents (16,16) /* 444, 422, 420 */
391 __MotionComponents (16,8) /* 444, 422, 420 */
392 __MotionComponents (8,8) /* 422, 420 */
393 __MotionComponents (8,4) /* 420 */
395 __MotionComponents (8,16) /* 422 */