1 /*****************************************************************************
2 * motionaltivec.c : Altivec motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionaltivec.c,v 1.1 2001/09/05 16:07:49 massiot Exp $
7 * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
8 * Paul Mackerras <paulus@linuxcare.com.au>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #define MODULE_NAME motionaltivec
26 #include "modules_inner.h"
28 /*****************************************************************************
30 *****************************************************************************/
33 #include <stdlib.h> /* malloc(), free() */
36 #include "common.h" /* boolean_t, byte_t */
42 #include "modules_export.h"
44 /*****************************************************************************
45 * Local and extern prototypes.
46 *****************************************************************************/
47 static void motion_getfunctions( function_list_t * p_function_list );
49 /*****************************************************************************
50 * Build configuration tree.
51 *****************************************************************************/
53 ADD_WINDOW( "Configuration for Altivec motion compensation module" )
54 ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
58 p_module->i_capabilities = MODULE_CAPABILITY_NULL
59 | MODULE_CAPABILITY_MOTION;
60 p_module->psz_longname = "MMX motion compensation module";
64 motion_getfunctions( &p_module->p_functions->motion );
67 MODULE_DEACTIVATE_START
68 MODULE_DEACTIVATE_STOP
70 /*****************************************************************************
71 * motion_Probe: tests probe the CPU and return a score
72 *****************************************************************************/
73 static int motion_Probe( probedata_t *p_data )
75 if( !TestCPU( CPU_CAPABILITY_ALTIVEC ) )
80 if( TestMethod( MOTION_METHOD_VAR, "motionaltivec" )
81 || TestMethod( MOTION_METHOD_VAR, "altivec" ) )
89 /*****************************************************************************
90 * Motion compensation in Altivec
91 *****************************************************************************/
93 #define COPY_8(d, s) (*(long long *)(d) = *(long long *)(s))
94 #define COPY_16(d, s) (((long long *)(d))[0] = ((long long *)(s))[0], \
95 ((long long *)(d))[1] = ((long long *)(s))[1])
98 MC_put_16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
100 vector unsigned char rshift, refw0, refw1, d;
103 rshift = vec_lvsl(0, ref);
104 refw0 = vec_ld(0, ref);
105 refw1 = vec_ld(16, ref);
106 d = vec_perm(refw0, refw1, rshift);
107 if ((unsigned long)dest & 15) {
108 /* unaligned store, yuck */
109 vector unsigned char x = d;
119 MC_avg_16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
121 vector unsigned char rshift, refw0, refw1;
122 vector unsigned char r, d;
125 rshift = vec_lvsl(0, ref);
126 refw0 = vec_ld(0, ref);
127 refw1 = vec_ld(16, ref);
128 r = vec_perm(refw0, refw1, rshift);
129 if ((unsigned long)dest & 15) {
130 /* unaligned load/store, yuck */
131 vector unsigned char dw0, dw1, dshift, mask;
133 dshift = vec_lvsr(0, dest);
134 dw0 = vec_ld(0, dest);
135 dw1 = vec_ld(16, dest);
136 d = vec_perm(r, r, dshift);
137 mask = vec_perm((vector unsigned char)(0),
138 (vector unsigned char)(255), dshift);
139 dw0 = vec_sel(dw0, vec_avg(dw0, d), mask);
140 dw1 = vec_sel(vec_avg(dw1, d), dw1, mask);
141 vec_st(dw0, 0, dest);
142 vec_st(dw1, 16, dest);
154 MC_put_x16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
156 vector unsigned char rshift0, rshift1, refw0, refw1;
157 vector unsigned char t0, t1, d, one;
159 one = (vector unsigned char)(1);
161 rshift0 = vec_lvsl(0, ref);
162 rshift1 = vec_add(rshift0, one);
163 refw0 = vec_ld(0, ref);
164 refw1 = vec_ld(16, ref);
165 t0 = vec_perm(refw0, refw1, rshift0);
166 t1 = vec_perm(refw0, refw1, rshift1);
168 if ((unsigned long)dest & 15) {
169 /* unaligned store, yuck */
170 vector unsigned char x = d;
180 MC_avg_x16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
182 vector unsigned char rshift0, rshift1, refw0, refw1;
183 vector unsigned char t0, t1, r, d, one;
185 one = (vector unsigned char)(1);
187 rshift0 = vec_lvsl(0, ref);
188 rshift1 = vec_add(rshift0, one);
189 refw0 = vec_ld(0, ref);
190 refw1 = vec_ld(16, ref);
191 t0 = vec_perm(refw0, refw1, rshift0);
192 t1 = vec_perm(refw0, refw1, rshift1);
194 if ((unsigned long)dest & 15) {
195 /* unaligned load/store, yuck */
196 vector unsigned char dw0, dw1, dshift, mask;
198 dshift = vec_lvsr(0, dest);
199 dw0 = vec_ld(0, dest);
200 dw1 = vec_ld(16, dest);
201 d = vec_perm(r, r, dshift);
202 mask = vec_perm((vector unsigned char)(0),
203 (vector unsigned char)(255), dshift);
204 dw0 = vec_sel(dw0, vec_avg(dw0, d), mask);
205 dw1 = vec_sel(vec_avg(dw1, d), dw1, mask);
206 vec_st(dw0, 0, dest);
207 vec_st(dw1, 16, dest);
219 MC_put_y16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
221 vector unsigned char rshift, refw0, refw1;
222 vector unsigned char r0, r1, d;
224 rshift = vec_lvsl(0, ref);
225 refw0 = vec_ld(0, ref);
226 refw1 = vec_ld(16, ref);
227 r0 = vec_perm(refw0, refw1, rshift);
230 rshift = vec_lvsl(0, ref);
231 refw0 = vec_ld(0, ref);
232 refw1 = vec_ld(16, ref);
233 r1 = vec_perm(refw0, refw1, rshift);
236 if ((unsigned long)dest & 15) {
237 /* unaligned store, yuck */
238 vector unsigned char x = d;
247 MC_avg_y16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
249 vector unsigned char rshift, refw0, refw1;
250 vector unsigned char r0, r1, r, d;
252 rshift = vec_lvsl(0, ref);
253 refw0 = vec_ld(0, ref);
254 refw1 = vec_ld(16, ref);
255 r0 = vec_perm(refw0, refw1, rshift);
258 rshift = vec_lvsl(0, ref);
259 refw0 = vec_ld(0, ref);
260 refw1 = vec_ld(16, ref);
261 r1 = vec_perm(refw0, refw1, rshift);
264 if ((unsigned long)dest & 15) {
265 /* unaligned load/store, yuck */
266 vector unsigned char dw0, dw1, dshift, mask;
268 dshift = vec_lvsr(0, dest);
269 dw0 = vec_ld(0, dest);
270 dw1 = vec_ld(16, dest);
271 d = vec_perm(r, r, dshift);
272 mask = vec_perm((vector unsigned char)(0),
273 (vector unsigned char)(255), dshift);
274 dw0 = vec_sel(dw0, vec_avg(dw0, d), mask);
275 dw1 = vec_sel(vec_avg(dw1, d), dw1, mask);
276 vec_st(dw0, 0, dest);
277 vec_st(dw1, 16, dest);
288 MC_put_xy16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
290 vector unsigned char rshift0, rshift1, refw0, refw1;
291 vector unsigned char t0, t1, r0, r1, d, one;
293 rshift0 = vec_lvsl(0, ref);
294 one = (vector unsigned char)(1);
295 rshift1 = vec_add(rshift0, one);
296 refw0 = vec_ld(0, ref);
297 refw1 = vec_ld(16, ref);
298 t0 = vec_perm(refw0, refw1, rshift0);
299 t1 = vec_perm(refw0, refw1, rshift1);
300 r0 = vec_avg(t0, t1);
303 rshift0 = vec_lvsl(0, ref);
304 rshift1 = vec_add(rshift0, one);
305 refw0 = vec_ld(0, ref);
306 refw1 = vec_ld(16, ref);
307 t0 = vec_perm(refw0, refw1, rshift0);
308 t1 = vec_perm(refw0, refw1, rshift1);
309 r1 = vec_avg(t0, t1);
312 if ((unsigned long)dest & 15) {
313 /* unaligned store, yuck */
314 vector unsigned char x = d;
323 MC_avg_xy16_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
325 vector unsigned char rshift0, rshift1, refw0, refw1;
326 vector unsigned char t0, t1, r0, r1, r, d, one;
328 rshift0 = vec_lvsl(0, ref);
329 one = (vector unsigned char)(1);
330 rshift1 = vec_add(rshift0, one);
331 refw0 = vec_ld(0, ref);
332 refw1 = vec_ld(16, ref);
333 t0 = vec_perm(refw0, refw1, rshift0);
334 t1 = vec_perm(refw0, refw1, rshift1);
335 r0 = vec_avg(t0, t1);
338 rshift0 = vec_lvsl(0, ref);
339 rshift1 = vec_add(rshift0, one);
340 refw0 = vec_ld(0, ref);
341 refw1 = vec_ld(16, ref);
342 t0 = vec_perm(refw0, refw1, rshift0);
343 t1 = vec_perm(refw0, refw1, rshift1);
344 r1 = vec_avg(t0, t1);
347 if ((unsigned long)dest & 15) {
348 /* unaligned load/store, yuck */
349 vector unsigned char dw0, dw1, dshift, mask;
351 dshift = vec_lvsr(0, dest);
352 dw0 = vec_ld(0, dest);
353 dw1 = vec_ld(16, dest);
354 d = vec_perm(r, r, dshift);
355 mask = vec_perm((vector unsigned char)(0),
356 (vector unsigned char)(255), dshift);
357 dw0 = vec_sel(dw0, vec_avg(dw0, d), mask);
358 dw1 = vec_sel(vec_avg(dw1, d), dw1, mask);
359 vec_st(dw0, 0, dest);
360 vec_st(dw1, 16, dest);
371 MC_put_8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
373 vector unsigned char rshift, refw0, refw1, d;
376 rshift = vec_lvsl(0, ref);
377 refw0 = vec_ld(0, ref);
378 refw1 = vec_ld(16, ref);
379 d = vec_perm(refw0, refw1, rshift);
387 MC_avg_8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
389 vector unsigned char rshift, refw0, refw1;
390 vector unsigned char r, d;
393 rshift = vec_lvsl(0, ref);
394 refw0 = vec_ld(0, ref);
395 refw1 = vec_ld(16, ref);
396 r = vec_perm(refw0, refw1, rshift);
406 MC_put_x8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
408 vector unsigned char rshift0, rshift1, refw0, refw1;
409 vector unsigned char t0, t1, d, one;
411 one = (vector unsigned char)(1);
413 rshift0 = vec_lvsl(0, ref);
414 rshift1 = vec_add(rshift0, one);
415 refw0 = vec_ld(0, ref);
416 refw1 = vec_ld(16, ref);
417 t0 = vec_perm(refw0, refw1, rshift0);
418 t1 = vec_perm(refw0, refw1, rshift1);
427 MC_avg_x8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
429 vector unsigned char rshift0, rshift1, refw0, refw1;
430 vector unsigned char t0, t1, r, d, one;
432 one = (vector unsigned char)(1);
434 rshift0 = vec_lvsl(0, ref);
435 rshift1 = vec_add(rshift0, one);
436 refw0 = vec_ld(0, ref);
437 refw1 = vec_ld(16, ref);
438 t0 = vec_perm(refw0, refw1, rshift0);
439 t1 = vec_perm(refw0, refw1, rshift1);
450 MC_put_y8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
452 vector unsigned char rshift, refw0, refw1;
453 vector unsigned char r0, r1, d;
455 rshift = vec_lvsl(0, ref);
456 refw0 = vec_ld(0, ref);
457 refw1 = vec_ld(16, ref);
458 r0 = vec_perm(refw0, refw1, rshift);
461 rshift = vec_lvsl(0, ref);
462 refw0 = vec_ld(0, ref);
463 refw1 = vec_ld(16, ref);
464 r1 = vec_perm(refw0, refw1, rshift);
473 MC_avg_y8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
475 vector unsigned char rshift, refw0, refw1;
476 vector unsigned char r0, r1, r, d;
478 rshift = vec_lvsl(0, ref);
479 refw0 = vec_ld(0, ref);
480 refw1 = vec_ld(16, ref);
481 r0 = vec_perm(refw0, refw1, rshift);
484 rshift = vec_lvsl(0, ref);
485 refw0 = vec_ld(0, ref);
486 refw1 = vec_ld(16, ref);
487 r1 = vec_perm(refw0, refw1, rshift);
498 MC_put_xy8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
500 vector unsigned char rshift0, rshift1, refw0, refw1;
501 vector unsigned char t0, t1, r0, r1, d, one;
503 rshift0 = vec_lvsl(0, ref);
504 one = (vector unsigned char)(1);
505 rshift1 = vec_add(rshift0, one);
506 refw0 = vec_ld(0, ref);
507 refw1 = vec_ld(16, ref);
508 t0 = vec_perm(refw0, refw1, rshift0);
509 t1 = vec_perm(refw0, refw1, rshift1);
510 r0 = vec_avg(t0, t1);
513 rshift0 = vec_lvsl(0, ref);
514 rshift1 = vec_add(rshift0, one);
515 refw0 = vec_ld(0, ref);
516 refw1 = vec_ld(16, ref);
517 t0 = vec_perm(refw0, refw1, rshift0);
518 t1 = vec_perm(refw0, refw1, rshift1);
519 r1 = vec_avg(t0, t1);
528 MC_avg_xy8_altivec(uint8_t * dest, uint8_t * ref, int stride, int height)
530 vector unsigned char rshift0, rshift1, refw0, refw1;
531 vector unsigned char t0, t1, r0, r1, r, d, one;
533 rshift0 = vec_lvsl(0, ref);
534 one = (vector unsigned char)(1);
535 rshift1 = vec_add(rshift0, one);
536 refw0 = vec_ld(0, ref);
537 refw1 = vec_ld(16, ref);
538 t0 = vec_perm(refw0, refw1, rshift0);
539 t1 = vec_perm(refw0, refw1, rshift1);
540 r0 = vec_avg(t0, t1);
543 rshift0 = vec_lvsl(0, ref);
544 rshift1 = vec_add(rshift0, one);
545 refw0 = vec_ld(0, ref);
546 refw1 = vec_ld(16, ref);
547 t0 = vec_perm(refw0, refw1, rshift0);
548 t1 = vec_perm(refw0, refw1, rshift1);
549 r1 = vec_avg(t0, t1);
559 /*****************************************************************************
560 * Functions exported as capabilities. They are declared as static so that
561 * we don't pollute the namespace too much.
562 *****************************************************************************/
563 static void motion_getfunctions( function_list_t * p_function_list )
565 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
569 /* Copying functions */
572 MC_put_16_altivec, MC_put_x16_altivec, MC_put_y16_altivec, MC_put_xy16_altivec
576 MC_put_8_altivec, MC_put_x8_altivec, MC_put_y8_altivec, MC_put_xy8_altivec
580 /* Averaging functions */
583 MC_avg_16_altivec, MC_avg_x16_altivec, MC_avg_y16_altivec, MC_avg_xy16_altivec
587 MC_avg_8_altivec, MC_avg_x8_altivec, MC_avg_y8_altivec, MC_avg_xy8_altivec
592 p_function_list->pf_probe = motion_Probe;
594 #define list p_function_list->functions.motion
595 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );