1 /*****************************************************************************
2 * motionaltivec.c : Altivec motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionaltivec.c,v 1.10 2002/02/15 13:32:53 sam Exp $
7 * Authors: Michel Lespinasse <walken@zoy.org>
8 * Paul Mackerras <paulus@linuxcare.com.au>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #ifndef __BUILD_ALTIVEC_ASM__
27 /*****************************************************************************
29 *****************************************************************************/
30 #include <stdlib.h> /* malloc(), free() */
34 #include <videolan/vlc.h>
36 /*****************************************************************************
37 * Local and extern prototypes.
38 *****************************************************************************/
39 static void motion_getfunctions( function_list_t * p_function_list );
41 /*****************************************************************************
42 * Build configuration tree.
43 *****************************************************************************/
48 SET_DESCRIPTION( "Altivec motion compensation module" )
49 ADD_CAPABILITY( MOTION, 150 )
50 ADD_REQUIREMENT( ALTIVEC )
51 ADD_SHORTCUT( "altivec" )
52 ADD_SHORTCUT( "motionaltivec" )
56 motion_getfunctions( &p_module->p_functions->motion );
59 MODULE_DEACTIVATE_START
60 MODULE_DEACTIVATE_STOP
62 /*****************************************************************************
63 * Motion compensation in Altivec
64 *****************************************************************************/
66 #ifndef CAN_COMPILE_C_ALTIVEC
69 * The asm code is generated with:
71 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S
72 * motion_comp_altivec.c
74 * sed 's/.L/._L/g' motion_comp_altivec.s |
75 * awk '{args=""; len=split ($2, arg, ",");
76 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
77 * args = args sprintf ("%-6s", a) }
78 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
82 static void MC_put_o_16_altivec (uint8_t * dest, uint8_t * ref,
83 int stride, int height)
86 " srawi %r6, %r6, 1 \n"
88 " addi %r6, %r6, -1 \n"
89 " lvsl %v12, 0, %r4 \n"
92 " lvx %v0, %r9, %r4 \n"
93 " add %r0, %r5, %r5 \n"
94 " vperm %v13, %v1, %v0, %v12 \n"
95 " add %r4, %r4, %r5 \n"
99 " lvx %v0, %r9, %r4 \n"
100 " stvx %v13, 0, %r3 \n"
101 " vperm %v13, %v1, %v0, %v12 \n"
102 " add %r4, %r4, %r5 \n"
103 " lvx %v1, 0, %r4 \n"
104 " lvx %v0, %r9, %r4 \n"
105 " stvx %v13, %r5, %r3 \n"
106 " vperm %v13, %v1, %v0, %v12 \n"
107 " add %r4, %r4, %r5 \n"
108 " add %r3, %r3, %r0 \n"
110 " lvx %v0, %r9, %r4 \n"
111 " lvx %v1, 0, %r4 \n"
112 " stvx %v13, 0, %r3 \n"
113 " vperm %v13, %v1, %v0, %v12 \n"
114 " stvx %v13, %r5, %r3 \n"
118 static void MC_put_o_8_altivec (uint8_t * dest, uint8_t * ref,
119 int stride, int height)
122 " lvsl %v12, 0, %r4 \n"
123 " lvsl %v1, %r5, %r4 \n"
124 " vmrghb %v12, %v12, %v12 \n"
125 " srawi %r6, %r6, 1 \n"
127 " vmrghb %v1, %v1, %v1 \n"
128 " addi %r6, %r6, -1 \n"
129 " vpkuhum %v10, %v12, %v12 \n"
130 " lvx %v13, 0, %r4 \n"
132 " vpkuhum %v11, %v1, %v1 \n"
133 " lvx %v0, %r9, %r4 \n"
134 " add %r4, %r4, %r5 \n"
135 " vperm %v12, %v13, %v0, %v10 \n"
138 " lvx %v0, %r9, %r4 \n"
139 " lvx %v13, 0, %r4 \n"
140 " stvewx %v12, 0, %r3 \n"
142 " vperm %v1, %v13, %v0, %v11 \n"
143 " stvewx %v12, %r9, %r3 \n"
144 " add %r4, %r4, %r5 \n"
146 " lvx %v0, %r9, %r4 \n"
147 " lvx %v13, 0, %r4 \n"
148 " add %r3, %r3, %r5 \n"
149 " stvewx %v1, 0, %r3 \n"
150 " vperm %v12, %v13, %v0, %v10 \n"
152 " stvewx %v1, %r9, %r3 \n"
153 " add %r4, %r4, %r5 \n"
154 " add %r3, %r3, %r5 \n"
157 " lvx %v0, %r9, %r4 \n"
158 " lvx %v13, 0, %r4 \n"
159 " stvewx %v12, 0, %r3 \n"
161 " vperm %v1, %v13, %v0, %v11 \n"
162 " stvewx %v12, %r9, %r3 \n"
163 " add %r3, %r3, %r5 \n"
164 " stvewx %v1, 0, %r3 \n"
165 " stvewx %v1, %r9, %r3 \n"
169 static void MC_put_x_16_altivec (uint8_t * dest, uint8_t * ref,
170 int stride, int height)
173 " lvsl %v11, 0, %r4 \n"
174 " vspltisb %v0, 1 \n"
176 " lvx %v12, 0, %r4 \n"
177 " vaddubm %v10, %v11, %v0 \n"
178 " lvx %v13, %r9, %r4 \n"
179 " srawi %r6, %r6, 1 \n"
180 " addi %r6, %r6, -1 \n"
181 " vperm %v1, %v12, %v13, %v10 \n"
182 " vperm %v0, %v12, %v13, %v11 \n"
184 " add %r0, %r5, %r5 \n"
185 " add %r4, %r4, %r5 \n"
186 " vavgub %v0, %v0, %v1 \n"
189 " lvx %v12, 0, %r4 \n"
190 " lvx %v13, %r9, %r4 \n"
191 " stvx %v0, 0, %r3 \n"
192 " vperm %v1, %v12, %v13, %v10 \n"
193 " add %r4, %r4, %r5 \n"
194 " vperm %v0, %v12, %v13, %v11 \n"
195 " lvx %v12, 0, %r4 \n"
196 " lvx %v13, %r9, %r4 \n"
197 " vavgub %v0, %v0, %v1 \n"
198 " stvx %v0, %r5, %r3 \n"
199 " vperm %v1, %v12, %v13, %v10 \n"
200 " add %r4, %r4, %r5 \n"
201 " vperm %v0, %v12, %v13, %v11 \n"
202 " add %r3, %r3, %r0 \n"
203 " vavgub %v0, %v0, %v1 \n"
205 " lvx %v13, %r9, %r4 \n"
206 " lvx %v12, 0, %r4 \n"
207 " stvx %v0, 0, %r3 \n"
208 " vperm %v1, %v12, %v13, %v10 \n"
209 " vperm %v0, %v12, %v13, %v11 \n"
210 " vavgub %v0, %v0, %v1 \n"
211 " stvx %v0, %r5, %r3 \n"
215 static void MC_put_x_8_altivec (uint8_t * dest, uint8_t * ref,
216 int stride, int height)
219 " lvsl %v0, 0, %r4 \n"
220 " vspltisb %v13, 1 \n"
221 " lvsl %v10, %r5, %r4 \n"
222 " vmrghb %v0, %v0, %v0 \n"
224 " lvx %v11, 0, %r4 \n"
225 " vmrghb %v10, %v10, %v10 \n"
226 " vpkuhum %v8, %v0, %v0 \n"
227 " lvx %v12, %r9, %r4 \n"
228 " srawi %r6, %r6, 1 \n"
229 " vpkuhum %v9, %v10, %v10 \n"
230 " vaddubm %v7, %v8, %v13 \n"
231 " addi %r6, %r6, -1 \n"
232 " vperm %v1, %v11, %v12, %v8 \n"
234 " vaddubm %v13, %v9, %v13 \n"
235 " add %r4, %r4, %r5 \n"
236 " vperm %v0, %v11, %v12, %v7 \n"
237 " vavgub %v0, %v1, %v0 \n"
240 " lvx %v12, %r9, %r4 \n"
241 " lvx %v11, 0, %r4 \n"
242 " stvewx %v0, 0, %r3 \n"
244 " vperm %v1, %v11, %v12, %v13 \n"
245 " stvewx %v0, %r9, %r3 \n"
246 " vperm %v0, %v11, %v12, %v9 \n"
247 " add %r4, %r4, %r5 \n"
249 " lvx %v12, %r9, %r4 \n"
250 " vavgub %v10, %v0, %v1 \n"
251 " lvx %v11, 0, %r4 \n"
252 " add %r3, %r3, %r5 \n"
253 " stvewx %v10, 0, %r3 \n"
254 " vperm %v1, %v11, %v12, %v7 \n"
255 " vperm %v0, %v11, %v12, %v8 \n"
257 " stvewx %v10, %r9, %r3 \n"
258 " add %r4, %r4, %r5 \n"
259 " vavgub %v0, %v0, %v1 \n"
260 " add %r3, %r3, %r5 \n"
263 " lvx %v12, %r9, %r4 \n"
264 " lvx %v11, 0, %r4 \n"
265 " stvewx %v0, 0, %r3 \n"
267 " vperm %v1, %v11, %v12, %v13 \n"
268 " stvewx %v0, %r9, %r3 \n"
269 " vperm %v0, %v11, %v12, %v9 \n"
270 " add %r3, %r3, %r5 \n"
271 " vavgub %v10, %v0, %v1 \n"
272 " stvewx %v10, 0, %r3 \n"
273 " stvewx %v10, %r9, %r3 \n"
277 static void MC_put_y_16_altivec (uint8_t * dest, uint8_t * ref,
278 int stride, int height)
282 " lvsl %v10, 0, %r4 \n"
283 " lvx %v13, 0, %r4 \n"
284 " lvx %v1, %r9, %r4 \n"
285 " add %r4, %r4, %r5 \n"
286 " vperm %v12, %v13, %v1, %v10 \n"
287 " srawi %r6, %r6, 1 \n"
288 " lvx %v13, 0, %r4 \n"
289 " lvx %v1, %r9, %r4 \n"
290 " addi %r6, %r6, -1 \n"
291 " vperm %v11, %v13, %v1, %v10 \n"
293 " add %r0, %r5, %r5 \n"
294 " add %r4, %r4, %r5 \n"
295 " vavgub %v0, %v12, %v11 \n"
298 " lvx %v13, 0, %r4 \n"
299 " lvx %v1, %r9, %r4 \n"
300 " stvx %v0, 0, %r3 \n"
301 " vperm %v12, %v13, %v1, %v10 \n"
302 " add %r4, %r4, %r5 \n"
303 " lvx %v13, 0, %r4 \n"
304 " lvx %v1, %r9, %r4 \n"
305 " vavgub %v0, %v12, %v11 \n"
306 " stvx %v0, %r5, %r3 \n"
307 " vperm %v11, %v13, %v1, %v10 \n"
308 " add %r4, %r4, %r5 \n"
309 " add %r3, %r3, %r0 \n"
310 " vavgub %v0, %v12, %v11 \n"
312 " lvx %v1, %r9, %r4 \n"
313 " lvx %v13, 0, %r4 \n"
314 " stvx %v0, 0, %r3 \n"
315 " vperm %v12, %v13, %v1, %v10 \n"
316 " vavgub %v0, %v12, %v11 \n"
317 " stvx %v0, %r5, %r3 \n"
321 static void MC_put_y_8_altivec (uint8_t * dest, uint8_t * ref,
322 int stride, int height)
325 " lvsl %v13, 0, %r4 \n"
326 " lvsl %v11, %r5, %r4 \n"
327 " vmrghb %v13, %v13, %v13 \n"
329 " lvx %v12, 0, %r4 \n"
330 " vmrghb %v11, %v11, %v11 \n"
331 " lvx %v1, %r9, %r4 \n"
332 " vpkuhum %v9, %v13, %v13 \n"
333 " add %r4, %r4, %r5 \n"
334 " vpkuhum %v10, %v11, %v11 \n"
335 " vperm %v13, %v12, %v1, %v9 \n"
336 " srawi %r6, %r6, 1 \n"
337 " lvx %v12, 0, %r4 \n"
338 " lvx %v1, %r9, %r4 \n"
339 " addi %r6, %r6, -1 \n"
340 " vperm %v11, %v12, %v1, %v10 \n"
342 " add %r4, %r4, %r5 \n"
343 " vavgub %v0, %v13, %v11 \n"
346 " lvx %v1, %r9, %r4 \n"
347 " lvx %v12, 0, %r4 \n"
348 " stvewx %v0, 0, %r3 \n"
350 " vperm %v13, %v12, %v1, %v9 \n"
351 " stvewx %v0, %r9, %r3 \n"
352 " add %r4, %r4, %r5 \n"
353 " vavgub %v0, %v13, %v11 \n"
355 " lvx %v1, %r9, %r4 \n"
356 " lvx %v12, 0, %r4 \n"
357 " add %r3, %r3, %r5 \n"
358 " stvewx %v0, 0, %r3 \n"
359 " vperm %v11, %v12, %v1, %v10 \n"
361 " stvewx %v0, %r9, %r3 \n"
362 " vavgub %v0, %v13, %v11 \n"
363 " add %r4, %r4, %r5 \n"
364 " add %r3, %r3, %r5 \n"
367 " lvx %v1, %r9, %r4 \n"
368 " lvx %v12, 0, %r4 \n"
369 " stvewx %v0, 0, %r3 \n"
371 " vperm %v13, %v12, %v1, %v9 \n"
372 " stvewx %v0, %r9, %r3 \n"
373 " add %r3, %r3, %r5 \n"
374 " vavgub %v0, %v13, %v11 \n"
375 " stvewx %v0, 0, %r3 \n"
376 " stvewx %v0, %r9, %r3 \n"
380 static void MC_put_xy_16_altivec (uint8_t * dest, uint8_t * ref,
381 int stride, int height)
384 " lvsl %v5, 0, %r4 \n"
385 " vspltisb %v3, 1 \n"
387 " lvx %v1, 0, %r4 \n"
388 " vaddubm %v4, %v5, %v3 \n"
389 " lvx %v0, %r9, %r4 \n"
390 " add %r4, %r4, %r5 \n"
391 " vperm %v10, %v1, %v0, %v4 \n"
392 " srawi %r6, %r6, 1 \n"
393 " vperm %v11, %v1, %v0, %v5 \n"
394 " addi %r6, %r6, -1 \n"
395 " lvx %v1, 0, %r4 \n"
397 " lvx %v0, %r9, %r4 \n"
398 " vavgub %v9, %v11, %v10 \n"
399 " vxor %v8, %v11, %v10 \n"
400 " add %r0, %r5, %r5 \n"
401 " vperm %v10, %v1, %v0, %v4 \n"
402 " add %r4, %r4, %r5 \n"
403 " vperm %v11, %v1, %v0, %v5 \n"
404 " vxor %v6, %v11, %v10 \n"
405 " vavgub %v7, %v11, %v10 \n"
406 " vor %v0, %v8, %v6 \n"
407 " vxor %v13, %v9, %v7 \n"
408 " vand %v0, %v3, %v0 \n"
409 " vavgub %v1, %v9, %v7 \n"
410 " vand %v0, %v0, %v13 \n"
411 " vsububm %v13, %v1, %v0 \n"
414 " lvx %v1, 0, %r4 \n"
415 " lvx %v0, %r9, %r4 \n"
416 " stvx %v13, 0, %r3 \n"
417 " vperm %v10, %v1, %v0, %v4 \n"
418 " add %r4, %r4, %r5 \n"
419 " vperm %v11, %v1, %v0, %v5 \n"
420 " lvx %v1, 0, %r4 \n"
421 " lvx %v0, %r9, %r4 \n"
422 " vavgub %v9, %v11, %v10 \n"
423 " vxor %v8, %v11, %v10 \n"
424 " add %r4, %r4, %r5 \n"
425 " vperm %v10, %v1, %v0, %v4 \n"
426 " vavgub %v12, %v9, %v7 \n"
427 " vperm %v11, %v1, %v0, %v5 \n"
428 " vor %v13, %v8, %v6 \n"
429 " vxor %v0, %v9, %v7 \n"
430 " vxor %v6, %v11, %v10 \n"
431 " vand %v13, %v3, %v13 \n"
432 " vavgub %v7, %v11, %v10 \n"
433 " vor %v1, %v8, %v6 \n"
434 " vand %v13, %v13, %v0 \n"
435 " vxor %v0, %v9, %v7 \n"
436 " vand %v1, %v3, %v1 \n"
437 " vsububm %v13, %v12, %v13 \n"
438 " vand %v1, %v1, %v0 \n"
439 " stvx %v13, %r5, %r3 \n"
440 " vavgub %v0, %v9, %v7 \n"
441 " add %r3, %r3, %r0 \n"
442 " vsububm %v13, %v0, %v1 \n"
444 " lvx %v0, %r9, %r4 \n"
445 " lvx %v1, 0, %r4 \n"
446 " stvx %v13, 0, %r3 \n"
447 " vperm %v10, %v1, %v0, %v4 \n"
448 " vperm %v11, %v1, %v0, %v5 \n"
449 " vxor %v8, %v11, %v10 \n"
450 " vavgub %v9, %v11, %v10 \n"
451 " vor %v0, %v8, %v6 \n"
452 " vxor %v13, %v9, %v7 \n"
453 " vand %v0, %v3, %v0 \n"
454 " vavgub %v1, %v9, %v7 \n"
455 " vand %v0, %v0, %v13 \n"
456 " vsububm %v13, %v1, %v0 \n"
457 " stvx %v13, %r5, %r3 \n"
461 static void MC_put_xy_8_altivec (uint8_t * dest, uint8_t * ref,
462 int stride, int height)
465 " lvsl %v4, 0, %r4 \n"
466 " vspltisb %v3, 1 \n"
467 " lvsl %v5, %r5, %r4 \n"
468 " vmrghb %v4, %v4, %v4 \n"
470 " vmrghb %v5, %v5, %v5 \n"
471 " lvx %v1, 0, %r4 \n"
472 " vpkuhum %v4, %v4, %v4 \n"
473 " lvx %v0, %r9, %r4 \n"
474 " vpkuhum %v5, %v5, %v5 \n"
475 " add %r4, %r4, %r5 \n"
476 " vaddubm %v2, %v4, %v3 \n"
477 " vperm %v11, %v1, %v0, %v4 \n"
478 " srawi %r6, %r6, 1 \n"
479 " vaddubm %v19, %v5, %v3 \n"
480 " addi %r6, %r6, -1 \n"
481 " vperm %v10, %v1, %v0, %v2 \n"
483 " lvx %v1, 0, %r4 \n"
484 " lvx %v0, %r9, %r4 \n"
485 " vavgub %v9, %v11, %v10 \n"
486 " vxor %v8, %v11, %v10 \n"
487 " add %r4, %r4, %r5 \n"
488 " vperm %v10, %v1, %v0, %v19 \n"
489 " vperm %v11, %v1, %v0, %v5 \n"
490 " vxor %v6, %v11, %v10 \n"
491 " vavgub %v7, %v11, %v10 \n"
492 " vor %v0, %v8, %v6 \n"
493 " vxor %v13, %v9, %v7 \n"
494 " vand %v0, %v3, %v0 \n"
495 " vavgub %v1, %v9, %v7 \n"
496 " vand %v0, %v0, %v13 \n"
497 " vsububm %v13, %v1, %v0 \n"
500 " lvx %v0, %r9, %r4 \n"
501 " lvx %v1, 0, %r4 \n"
502 " stvewx %v13, 0, %r3 \n"
504 " vperm %v10, %v1, %v0, %v2 \n"
505 " stvewx %v13, %r9, %r3 \n"
506 " vperm %v11, %v1, %v0, %v4 \n"
507 " add %r4, %r4, %r5 \n"
509 " vavgub %v9, %v11, %v10 \n"
510 " lvx %v0, %r9, %r4 \n"
511 " vxor %v8, %v11, %v10 \n"
512 " lvx %v1, 0, %r4 \n"
513 " vavgub %v12, %v9, %v7 \n"
514 " vor %v13, %v8, %v6 \n"
515 " add %r3, %r3, %r5 \n"
516 " vperm %v10, %v1, %v0, %v19 \n"
518 " vperm %v11, %v1, %v0, %v5 \n"
519 " vand %v13, %v3, %v13 \n"
520 " add %r4, %r4, %r5 \n"
521 " vxor %v0, %v9, %v7 \n"
522 " vxor %v6, %v11, %v10 \n"
523 " vavgub %v7, %v11, %v10 \n"
524 " vor %v1, %v8, %v6 \n"
525 " vand %v13, %v13, %v0 \n"
526 " vxor %v0, %v9, %v7 \n"
527 " vand %v1, %v3, %v1 \n"
528 " vsububm %v13, %v12, %v13 \n"
529 " vand %v1, %v1, %v0 \n"
530 " stvewx %v13, 0, %r3 \n"
531 " vavgub %v0, %v9, %v7 \n"
532 " stvewx %v13, %r9, %r3 \n"
533 " add %r3, %r3, %r5 \n"
534 " vsububm %v13, %v0, %v1 \n"
537 " lvx %v0, %r9, %r4 \n"
538 " lvx %v1, 0, %r4 \n"
539 " stvewx %v13, 0, %r3 \n"
540 " vperm %v10, %v1, %v0, %v2 \n"
542 " vperm %v11, %v1, %v0, %v4 \n"
543 " stvewx %v13, %r9, %r3 \n"
544 " add %r3, %r3, %r5 \n"
545 " vxor %v8, %v11, %v10 \n"
546 " vavgub %v9, %v11, %v10 \n"
547 " vor %v0, %v8, %v6 \n"
548 " vxor %v13, %v9, %v7 \n"
549 " vand %v0, %v3, %v0 \n"
550 " vavgub %v1, %v9, %v7 \n"
551 " vand %v0, %v0, %v13 \n"
552 " vsububm %v13, %v1, %v0 \n"
553 " stvewx %v13, 0, %r3 \n"
554 " stvewx %v13, %r9, %r3 \n"
558 static void MC_avg_o_16_altivec (uint8_t * dest, uint8_t * ref,
559 int stride, int height)
563 " lvx %v0, %r9, %r4 \n"
564 " lvsl %v11, 0, %r4 \n"
565 " lvx %v1, 0, %r4 \n"
566 " srawi %r6, %r6, 1 \n"
567 " addi %r6, %r6, -1 \n"
568 " vperm %v0, %v1, %v0, %v11 \n"
569 " lvx %v13, 0, %r3 \n"
571 " add %r9, %r5, %r5 \n"
572 " vavgub %v12, %v13, %v0 \n"
573 " add %r4, %r4, %r5 \n"
576 " lvx %v1, 0, %r4 \n"
577 " lvx %v0, %r11, %r4 \n"
578 " lvx %v13, %r5, %r3 \n"
579 " vperm %v0, %v1, %v0, %v11 \n"
580 " stvx %v12, 0, %r3 \n"
581 " add %r4, %r4, %r5 \n"
582 " vavgub %v12, %v13, %v0 \n"
583 " lvx %v1, 0, %r4 \n"
584 " lvx %v0, %r11, %r4 \n"
585 " lvx %v13, %r9, %r3 \n"
586 " vperm %v0, %v1, %v0, %v11 \n"
587 " stvx %v12, %r5, %r3 \n"
588 " add %r4, %r4, %r5 \n"
589 " vavgub %v12, %v13, %v0 \n"
590 " add %r3, %r3, %r9 \n"
592 " lvx %v0, %r11, %r4 \n"
593 " lvx %v1, 0, %r4 \n"
594 " lvx %v13, %r5, %r3 \n"
595 " vperm %v0, %v1, %v0, %v11 \n"
596 " stvx %v12, 0, %r3 \n"
597 " vavgub %v12, %v13, %v0 \n"
598 " stvx %v12, %r5, %r3 \n"
602 static void MC_avg_o_8_altivec (uint8_t * dest, uint8_t * ref,
603 int stride, int height)
606 " lvsl %v12, 0, %r4 \n"
608 " vmrghb %v12, %v12, %v12 \n"
609 " lvsl %v1, %r5, %r4 \n"
610 " lvx %v13, 0, %r4 \n"
611 " vpkuhum %v9, %v12, %v12 \n"
612 " lvx %v0, %r9, %r4 \n"
613 " srawi %r6, %r6, 1 \n"
614 " vmrghb %v1, %v1, %v1 \n"
615 " addi %r6, %r6, -1 \n"
616 " vperm %v0, %v13, %v0, %v9 \n"
617 " lvx %v11, 0, %r3 \n"
619 " vpkuhum %v10, %v1, %v1 \n"
620 " add %r4, %r4, %r5 \n"
621 " vavgub %v12, %v11, %v0 \n"
624 " lvx %v0, %r9, %r4 \n"
625 " lvx %v13, 0, %r4 \n"
626 " lvx %v11, %r5, %r3 \n"
627 " stvewx %v12, 0, %r3 \n"
628 " vperm %v0, %v13, %v0, %v10 \n"
630 " stvewx %v12, %r9, %r3 \n"
631 " vavgub %v1, %v11, %v0 \n"
632 " add %r4, %r4, %r5 \n"
634 " lvx %v0, %r9, %r4 \n"
635 " add %r3, %r3, %r5 \n"
636 " lvx %v13, 0, %r4 \n"
637 " lvx %v11, %r5, %r3 \n"
638 " stvewx %v1, 0, %r3 \n"
639 " vperm %v0, %v13, %v0, %v9 \n"
641 " stvewx %v1, %r9, %r3 \n"
642 " vavgub %v12, %v11, %v0 \n"
643 " add %r4, %r4, %r5 \n"
644 " add %r3, %r3, %r5 \n"
647 " lvx %v0, %r9, %r4 \n"
648 " lvx %v13, 0, %r4 \n"
649 " lvx %v11, %r5, %r3 \n"
650 " stvewx %v12, 0, %r3 \n"
651 " vperm %v0, %v13, %v0, %v10 \n"
653 " stvewx %v12, %r9, %r3 \n"
654 " vavgub %v1, %v11, %v0 \n"
655 " add %r3, %r3, %r5 \n"
656 " stvewx %v1, 0, %r3 \n"
657 " stvewx %v1, %r9, %r3 \n"
661 static void MC_avg_x_16_altivec (uint8_t * dest, uint8_t * ref,
662 int stride, int height)
665 " lvsl %v8, 0, %r4 \n"
666 " vspltisb %v0, 1 \n"
668 " lvx %v12, %r9, %r4 \n"
669 " vaddubm %v7, %v8, %v0 \n"
670 " lvx %v11, 0, %r4 \n"
671 " srawi %r6, %r6, 1 \n"
672 " vperm %v1, %v11, %v12, %v7 \n"
673 " addi %r6, %r6, -1 \n"
674 " vperm %v0, %v11, %v12, %v8 \n"
675 " lvx %v9, 0, %r3 \n"
677 " add %r9, %r5, %r5 \n"
678 " vavgub %v0, %v0, %v1 \n"
679 " add %r4, %r4, %r5 \n"
680 " vavgub %v10, %v9, %v0 \n"
683 " lvx %v11, 0, %r4 \n"
684 " lvx %v12, %r11, %r4 \n"
685 " lvx %v9, %r5, %r3 \n"
686 " stvx %v10, 0, %r3 \n"
687 " vperm %v0, %v11, %v12, %v7 \n"
688 " add %r4, %r4, %r5 \n"
689 " vperm %v1, %v11, %v12, %v8 \n"
690 " lvx %v11, 0, %r4 \n"
691 " lvx %v12, %r11, %r4 \n"
692 " vavgub %v1, %v1, %v0 \n"
693 " add %r4, %r4, %r5 \n"
694 " vperm %v13, %v11, %v12, %v7 \n"
695 " vavgub %v10, %v9, %v1 \n"
696 " vperm %v0, %v11, %v12, %v8 \n"
697 " lvx %v9, %r9, %r3 \n"
698 " stvx %v10, %r5, %r3 \n"
699 " vavgub %v0, %v0, %v13 \n"
700 " add %r3, %r3, %r9 \n"
701 " vavgub %v10, %v9, %v0 \n"
703 " lvx %v12, %r11, %r4 \n"
704 " lvx %v11, 0, %r4 \n"
705 " lvx %v9, %r5, %r3 \n"
706 " vperm %v1, %v11, %v12, %v7 \n"
707 " stvx %v10, 0, %r3 \n"
708 " vperm %v0, %v11, %v12, %v8 \n"
709 " vavgub %v0, %v0, %v1 \n"
710 " vavgub %v10, %v9, %v0 \n"
711 " stvx %v10, %r5, %r3 \n"
715 static void MC_avg_x_8_altivec (uint8_t * dest, uint8_t * ref,
716 int stride, int height)
719 " lvsl %v10, 0, %r4 \n"
720 " vspltisb %v13, 1 \n"
722 " vmrghb %v10, %v10, %v10 \n"
723 " lvx %v11, 0, %r4 \n"
724 " lvx %v12, %r9, %r4 \n"
725 " vpkuhum %v7, %v10, %v10 \n"
726 " srawi %r6, %r6, 1 \n"
727 " lvsl %v10, %r5, %r4 \n"
728 " vaddubm %v6, %v7, %v13 \n"
729 " vperm %v0, %v11, %v12, %v7 \n"
730 " addi %r6, %r6, -1 \n"
731 " vmrghb %v10, %v10, %v10 \n"
732 " lvx %v9, 0, %r3 \n"
734 " vperm %v1, %v11, %v12, %v6 \n"
735 " add %r4, %r4, %r5 \n"
736 " vpkuhum %v8, %v10, %v10 \n"
737 " vavgub %v0, %v0, %v1 \n"
738 " vaddubm %v13, %v8, %v13 \n"
739 " vavgub %v10, %v9, %v0 \n"
742 " lvx %v12, %r9, %r4 \n"
743 " lvx %v11, 0, %r4 \n"
744 " lvx %v9, %r5, %r3 \n"
745 " stvewx %v10, 0, %r3 \n"
746 " vperm %v1, %v11, %v12, %v13 \n"
747 " vperm %v0, %v11, %v12, %v8 \n"
749 " stvewx %v10, %r9, %r3 \n"
750 " add %r4, %r4, %r5 \n"
751 " vavgub %v0, %v0, %v1 \n"
753 " lvx %v12, %r9, %r4 \n"
754 " vavgub %v10, %v9, %v0 \n"
755 " lvx %v11, 0, %r4 \n"
756 " add %r3, %r3, %r5 \n"
757 " vperm %v1, %v11, %v12, %v6 \n"
758 " lvx %v9, %r5, %r3 \n"
759 " vperm %v0, %v11, %v12, %v7 \n"
760 " stvewx %v10, 0, %r3 \n"
762 " vavgub %v0, %v0, %v1 \n"
763 " stvewx %v10, %r9, %r3 \n"
764 " add %r4, %r4, %r5 \n"
765 " add %r3, %r3, %r5 \n"
766 " vavgub %v10, %v9, %v0 \n"
769 " lvx %v12, %r9, %r4 \n"
770 " lvx %v11, 0, %r4 \n"
771 " lvx %v9, %r5, %r3 \n"
772 " vperm %v1, %v11, %v12, %v13 \n"
773 " stvewx %v10, 0, %r3 \n"
774 " vperm %v0, %v11, %v12, %v8 \n"
776 " stvewx %v10, %r9, %r3 \n"
777 " vavgub %v0, %v0, %v1 \n"
778 " add %r3, %r3, %r5 \n"
779 " vavgub %v10, %v9, %v0 \n"
780 " stvewx %v10, 0, %r3 \n"
781 " stvewx %v10, %r9, %r3 \n"
785 static void MC_avg_y_16_altivec (uint8_t * dest, uint8_t * ref,
786 int stride, int height)
790 " lvx %v1, %r9, %r4 \n"
791 " lvsl %v9, 0, %r4 \n"
792 " lvx %v13, 0, %r4 \n"
793 " add %r4, %r4, %r5 \n"
794 " vperm %v11, %v13, %v1, %v9 \n"
796 " lvx %v13, 0, %r4 \n"
797 " lvx %v1, %r11, %r4 \n"
798 " srawi %r6, %r6, 1 \n"
799 " vperm %v10, %v13, %v1, %v9 \n"
800 " addi %r6, %r6, -1 \n"
801 " lvx %v12, 0, %r3 \n"
803 " vavgub %v0, %v11, %v10 \n"
804 " add %r9, %r5, %r5 \n"
805 " add %r4, %r4, %r5 \n"
806 " vavgub %v0, %v12, %v0 \n"
809 " lvx %v13, 0, %r4 \n"
810 " lvx %v1, %r11, %r4 \n"
811 " lvx %v12, %r5, %r3 \n"
812 " vperm %v11, %v13, %v1, %v9 \n"
813 " stvx %v0, 0, %r3 \n"
814 " add %r4, %r4, %r5 \n"
815 " vavgub %v0, %v11, %v10 \n"
816 " lvx %v13, 0, %r4 \n"
817 " lvx %v1, %r11, %r4 \n"
818 " vavgub %v0, %v12, %v0 \n"
819 " add %r4, %r4, %r5 \n"
820 " lvx %v12, %r9, %r3 \n"
821 " vperm %v10, %v13, %v1, %v9 \n"
822 " stvx %v0, %r5, %r3 \n"
823 " vavgub %v0, %v11, %v10 \n"
824 " add %r3, %r3, %r9 \n"
825 " vavgub %v0, %v12, %v0 \n"
827 " lvx %v1, %r11, %r4 \n"
828 " lvx %v13, 0, %r4 \n"
829 " lvx %v12, %r5, %r3 \n"
830 " vperm %v11, %v13, %v1, %v9 \n"
831 " stvx %v0, 0, %r3 \n"
832 " vavgub %v0, %v11, %v10 \n"
833 " vavgub %v0, %v12, %v0 \n"
834 " stvx %v0, %r5, %r3 \n"
838 static void MC_avg_y_8_altivec (uint8_t * dest, uint8_t * ref,
839 int stride, int height)
842 " lvsl %v12, 0, %r4 \n"
843 " lvsl %v9, %r5, %r4 \n"
844 " vmrghb %v12, %v12, %v12 \n"
846 " lvx %v11, 0, %r4 \n"
847 " vmrghb %v9, %v9, %v9 \n"
848 " lvx %v13, %r9, %r4 \n"
849 " vpkuhum %v7, %v12, %v12 \n"
850 " add %r4, %r4, %r5 \n"
851 " vpkuhum %v8, %v9, %v9 \n"
852 " vperm %v12, %v11, %v13, %v7 \n"
853 " srawi %r6, %r6, 1 \n"
854 " lvx %v11, 0, %r4 \n"
855 " lvx %v13, %r9, %r4 \n"
856 " addi %r6, %r6, -1 \n"
857 " vperm %v9, %v11, %v13, %v8 \n"
858 " lvx %v10, 0, %r3 \n"
860 " add %r4, %r4, %r5 \n"
861 " vavgub %v0, %v12, %v9 \n"
862 " vavgub %v1, %v10, %v0 \n"
865 " lvx %v13, %r9, %r4 \n"
866 " lvx %v11, 0, %r4 \n"
867 " lvx %v10, %r5, %r3 \n"
868 " stvewx %v1, 0, %r3 \n"
869 " vperm %v12, %v11, %v13, %v7 \n"
871 " stvewx %v1, %r9, %r3 \n"
872 " vavgub %v0, %v12, %v9 \n"
873 " add %r4, %r4, %r5 \n"
875 " vavgub %v1, %v10, %v0 \n"
876 " lvx %v13, %r9, %r4 \n"
877 " lvx %v11, 0, %r4 \n"
878 " add %r3, %r3, %r5 \n"
879 " vperm %v9, %v11, %v13, %v8 \n"
880 " lvx %v10, %r5, %r3 \n"
881 " stvewx %v1, 0, %r3 \n"
882 " vavgub %v0, %v12, %v9 \n"
884 " stvewx %v1, %r9, %r3 \n"
885 " add %r4, %r4, %r5 \n"
886 " vavgub %v1, %v10, %v0 \n"
887 " add %r3, %r3, %r5 \n"
890 " lvx %v13, %r9, %r4 \n"
891 " lvx %v11, 0, %r4 \n"
892 " lvx %v10, %r5, %r3 \n"
893 " vperm %v12, %v11, %v13, %v7 \n"
894 " stvewx %v1, 0, %r3 \n"
896 " vavgub %v0, %v12, %v9 \n"
897 " stvewx %v1, %r9, %r3 \n"
898 " add %r3, %r3, %r5 \n"
899 " vavgub %v1, %v10, %v0 \n"
900 " stvewx %v1, 0, %r3 \n"
901 " stvewx %v1, %r9, %r3 \n"
905 static void MC_avg_xy_16_altivec (uint8_t * dest, uint8_t * ref,
906 int stride, int height)
909 " lvsl %v4, 0, %r4 \n"
910 " vspltisb %v2, 1 \n"
912 " lvx %v1, %r9, %r4 \n"
913 " vaddubm %v3, %v4, %v2 \n"
914 " lvx %v13, 0, %r4 \n"
915 " add %r4, %r4, %r5 \n"
916 " vperm %v10, %v13, %v1, %v3 \n"
918 " vperm %v11, %v13, %v1, %v4 \n"
919 " srawi %r6, %r6, 1 \n"
920 " lvx %v13, 0, %r4 \n"
921 " lvx %v1, %r11, %r4 \n"
922 " vavgub %v9, %v11, %v10 \n"
923 " vxor %v8, %v11, %v10 \n"
924 " addi %r6, %r6, -1 \n"
925 " vperm %v10, %v13, %v1, %v3 \n"
926 " lvx %v6, 0, %r3 \n"
928 " vperm %v11, %v13, %v1, %v4 \n"
929 " add %r9, %r5, %r5 \n"
930 " add %r4, %r4, %r5 \n"
931 " vxor %v5, %v11, %v10 \n"
932 " vavgub %v7, %v11, %v10 \n"
933 " vor %v1, %v8, %v5 \n"
934 " vxor %v13, %v9, %v7 \n"
935 " vand %v1, %v2, %v1 \n"
936 " vavgub %v0, %v9, %v7 \n"
937 " vand %v1, %v1, %v13 \n"
938 " vsububm %v0, %v0, %v1 \n"
939 " vavgub %v12, %v6, %v0 \n"
942 " lvx %v13, 0, %r4 \n"
943 " lvx %v1, %r11, %r4 \n"
944 " lvx %v6, %r5, %r3 \n"
945 " stvx %v12, 0, %r3 \n"
946 " vperm %v10, %v13, %v1, %v3 \n"
947 " vperm %v11, %v13, %v1, %v4 \n"
948 " add %r4, %r4, %r5 \n"
949 " lvx %v13, 0, %r4 \n"
950 " lvx %v1, %r11, %r4 \n"
951 " vavgub %v9, %v11, %v10 \n"
952 " vxor %v8, %v11, %v10 \n"
953 " add %r4, %r4, %r5 \n"
954 " vperm %v10, %v13, %v1, %v3 \n"
955 " vavgub %v12, %v9, %v7 \n"
956 " vperm %v11, %v13, %v1, %v4 \n"
957 " vor %v0, %v8, %v5 \n"
958 " vxor %v13, %v9, %v7 \n"
959 " vxor %v5, %v11, %v10 \n"
960 " vand %v0, %v2, %v0 \n"
961 " vavgub %v7, %v11, %v10 \n"
962 " vor %v1, %v8, %v5 \n"
963 " vand %v0, %v0, %v13 \n"
964 " vand %v1, %v2, %v1 \n"
965 " vxor %v13, %v9, %v7 \n"
966 " vsububm %v12, %v12, %v0 \n"
967 " vand %v1, %v1, %v13 \n"
968 " vavgub %v0, %v9, %v7 \n"
969 " vavgub %v12, %v6, %v12 \n"
970 " lvx %v6, %r9, %r3 \n"
971 " vsububm %v0, %v0, %v1 \n"
972 " stvx %v12, %r5, %r3 \n"
973 " vavgub %v12, %v6, %v0 \n"
974 " add %r3, %r3, %r9 \n"
976 " lvx %v1, %r11, %r4 \n"
977 " lvx %v13, 0, %r4 \n"
978 " lvx %v6, %r5, %r3 \n"
979 " vperm %v10, %v13, %v1, %v3 \n"
980 " stvx %v12, 0, %r3 \n"
981 " vperm %v11, %v13, %v1, %v4 \n"
982 " vxor %v8, %v11, %v10 \n"
983 " vavgub %v9, %v11, %v10 \n"
984 " vor %v0, %v8, %v5 \n"
985 " vxor %v13, %v9, %v7 \n"
986 " vand %v0, %v2, %v0 \n"
987 " vavgub %v1, %v9, %v7 \n"
988 " vand %v0, %v0, %v13 \n"
989 " vsububm %v1, %v1, %v0 \n"
990 " vavgub %v12, %v6, %v1 \n"
991 " stvx %v12, %r5, %r3 \n"
995 static void MC_avg_xy_8_altivec (uint8_t * dest, uint8_t * ref,
996 int stride, int height)
999 " lvsl %v2, 0, %r4 \n"
1000 " vspltisb %v19, 1 \n"
1001 " lvsl %v3, %r5, %r4 \n"
1002 " vmrghb %v2, %v2, %v2 \n"
1004 " vmrghb %v3, %v3, %v3 \n"
1005 " lvx %v9, 0, %r4 \n"
1006 " vpkuhum %v2, %v2, %v2 \n"
1007 " lvx %v1, %r9, %r4 \n"
1008 " vpkuhum %v3, %v3, %v3 \n"
1009 " add %r4, %r4, %r5 \n"
1010 " vaddubm %v18, %v2, %v19 \n"
1011 " vperm %v11, %v9, %v1, %v2 \n"
1012 " srawi %r6, %r6, 1 \n"
1013 " vaddubm %v17, %v3, %v19 \n"
1014 " addi %r6, %r6, -1 \n"
1015 " vperm %v10, %v9, %v1, %v18 \n"
1016 " lvx %v4, 0, %r3 \n"
1018 " lvx %v1, %r9, %r4 \n"
1019 " lvx %v9, 0, %r4 \n"
1020 " vavgub %v8, %v11, %v10 \n"
1021 " vxor %v7, %v11, %v10 \n"
1022 " add %r4, %r4, %r5 \n"
1023 " vperm %v10, %v9, %v1, %v17 \n"
1024 " vperm %v11, %v9, %v1, %v3 \n"
1025 " vxor %v5, %v11, %v10 \n"
1026 " vavgub %v6, %v11, %v10 \n"
1027 " vor %v1, %v7, %v5 \n"
1028 " vxor %v13, %v8, %v6 \n"
1029 " vand %v1, %v19, %v1 \n"
1030 " vavgub %v0, %v8, %v6 \n"
1031 " vand %v1, %v1, %v13 \n"
1032 " vsububm %v0, %v0, %v1 \n"
1033 " vavgub %v13, %v4, %v0 \n"
1036 " lvx %v1, %r9, %r4 \n"
1037 " lvx %v9, 0, %r4 \n"
1038 " lvx %v4, %r5, %r3 \n"
1039 " stvewx %v13, 0, %r3 \n"
1040 " vperm %v10, %v9, %v1, %v18 \n"
1041 " vperm %v11, %v9, %v1, %v2 \n"
1043 " stvewx %v13, %r9, %r3 \n"
1044 " vxor %v7, %v11, %v10 \n"
1045 " add %r4, %r4, %r5 \n"
1047 " vavgub %v8, %v11, %v10 \n"
1048 " lvx %v1, %r9, %r4 \n"
1049 " vor %v0, %v7, %v5 \n"
1050 " lvx %v9, 0, %r4 \n"
1051 " vxor %v12, %v8, %v6 \n"
1052 " vand %v0, %v19, %v0 \n"
1053 " add %r3, %r3, %r5 \n"
1054 " vperm %v10, %v9, %v1, %v17 \n"
1055 " vavgub %v13, %v8, %v6 \n"
1057 " vperm %v11, %v9, %v1, %v3 \n"
1058 " vand %v0, %v0, %v12 \n"
1059 " add %r4, %r4, %r5 \n"
1060 " vxor %v5, %v11, %v10 \n"
1061 " vavgub %v6, %v11, %v10 \n"
1062 " vor %v1, %v7, %v5 \n"
1063 " vsububm %v13, %v13, %v0 \n"
1064 " vxor %v0, %v8, %v6 \n"
1065 " vand %v1, %v19, %v1 \n"
1066 " vavgub %v13, %v4, %v13 \n"
1067 " vand %v1, %v1, %v0 \n"
1068 " lvx %v4, %r5, %r3 \n"
1069 " vavgub %v0, %v8, %v6 \n"
1070 " stvewx %v13, 0, %r3 \n"
1071 " stvewx %v13, %r9, %r3 \n"
1072 " vsububm %v0, %v0, %v1 \n"
1073 " add %r3, %r3, %r5 \n"
1074 " vavgub %v13, %v4, %v0 \n"
1077 " lvx %v1, %r9, %r4 \n"
1078 " lvx %v9, 0, %r4 \n"
1079 " lvx %v4, %r5, %r3 \n"
1080 " vperm %v10, %v9, %v1, %v18 \n"
1081 " stvewx %v13, 0, %r3 \n"
1082 " vperm %v11, %v9, %v1, %v2 \n"
1084 " stvewx %v13, %r9, %r3 \n"
1085 " vxor %v7, %v11, %v10 \n"
1086 " add %r3, %r3, %r5 \n"
1087 " vavgub %v8, %v11, %v10 \n"
1088 " vor %v0, %v7, %v5 \n"
1089 " vxor %v13, %v8, %v6 \n"
1090 " vand %v0, %v19, %v0 \n"
1091 " vavgub %v1, %v8, %v6 \n"
1092 " vand %v0, %v0, %v13 \n"
1093 " vsububm %v1, %v1, %v0 \n"
1094 " vavgub %v13, %v4, %v1 \n"
1095 " stvewx %v13, 0, %r3 \n"
1096 " stvewx %v13, %r9, %r3 \n"
1100 #endif /* !CAN_COMPILE_C_ALTIVEC */
1101 #endif /* __BUILD_ALTIVEC_ASM__ */
1103 #if defined(CAN_COMPILE_C_ALTIVEC) || defined(__BUILD_ALTIVEC_ASM__)
1105 #define vector_s16_t vector signed short
1106 #define vector_u16_t vector unsigned short
1107 #define vector_s8_t vector signed char
1108 #define vector_u8_t vector unsigned char
1109 #define vector_s32_t vector signed int
1110 #define vector_u32_t vector unsigned int
1112 void MC_put_o_16_altivec (unsigned char * dest, unsigned char * ref,
1113 int stride, int height)
1115 vector_u8_t perm, ref0, ref1, tmp;
1117 perm = vec_lvsl (0, ref);
1119 height = (height >> 1) - 1;
1121 ref0 = vec_ld (0, ref);
1122 ref1 = vec_ld (15, ref);
1124 tmp = vec_perm (ref0, ref1, perm);
1127 ref0 = vec_ld (0, ref);
1128 ref1 = vec_ld (15, ref);
1130 vec_st (tmp, 0, dest);
1131 tmp = vec_perm (ref0, ref1, perm);
1133 ref0 = vec_ld (0, ref);
1134 ref1 = vec_ld (15, ref);
1136 vec_st (tmp, stride, dest);
1138 tmp = vec_perm (ref0, ref1, perm);
1141 ref0 = vec_ld (0, ref);
1142 ref1 = vec_ld (15, ref);
1143 vec_st (tmp, 0, dest);
1144 tmp = vec_perm (ref0, ref1, perm);
1145 vec_st (tmp, stride, dest);
1148 void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref,
1149 int stride, int height)
1151 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
1153 tmp0 = vec_lvsl (0, ref);
1154 tmp0 = vec_mergeh (tmp0, tmp0);
1155 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1156 tmp1 = vec_lvsl (stride, ref);
1157 tmp1 = vec_mergeh (tmp1, tmp1);
1158 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1160 height = (height >> 1) - 1;
1162 ref0 = vec_ld (0, ref);
1163 ref1 = vec_ld (7, ref);
1165 tmp0 = vec_perm (ref0, ref1, perm0);
1168 ref0 = vec_ld (0, ref);
1169 ref1 = vec_ld (7, ref);
1171 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1172 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1174 tmp1 = vec_perm (ref0, ref1, perm1);
1176 ref0 = vec_ld (0, ref);
1177 ref1 = vec_ld (7, ref);
1179 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1180 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1182 tmp0 = vec_perm (ref0, ref1, perm0);
1185 ref0 = vec_ld (0, ref);
1186 ref1 = vec_ld (7, ref);
1187 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1188 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1190 tmp1 = vec_perm (ref0, ref1, perm1);
1191 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1192 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1195 void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref,
1196 int stride, int height)
1198 vector_u8_t permA, permB, ref0, ref1, tmp;
1200 permA = vec_lvsl (0, ref);
1201 permB = vec_add (permA, vec_splat_u8 (1));
1203 height = (height >> 1) - 1;
1205 ref0 = vec_ld (0, ref);
1206 ref1 = vec_ld (16, ref);
1208 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1209 vec_perm (ref0, ref1, permB));
1212 ref0 = vec_ld (0, ref);
1213 ref1 = vec_ld (16, ref);
1215 vec_st (tmp, 0, dest);
1216 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1217 vec_perm (ref0, ref1, permB));
1219 ref0 = vec_ld (0, ref);
1220 ref1 = vec_ld (16, ref);
1222 vec_st (tmp, stride, dest);
1224 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1225 vec_perm (ref0, ref1, permB));
1228 ref0 = vec_ld (0, ref);
1229 ref1 = vec_ld (16, ref);
1230 vec_st (tmp, 0, dest);
1231 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1232 vec_perm (ref0, ref1, permB));
1233 vec_st (tmp, stride, dest);
1236 void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref,
1237 int stride, int height)
1239 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1241 ones = vec_splat_u8 (1);
1242 tmp0 = vec_lvsl (0, ref);
1243 tmp0 = vec_mergeh (tmp0, tmp0);
1244 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1245 perm0B = vec_add (perm0A, ones);
1246 tmp1 = vec_lvsl (stride, ref);
1247 tmp1 = vec_mergeh (tmp1, tmp1);
1248 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1249 perm1B = vec_add (perm1A, ones);
1251 height = (height >> 1) - 1;
1253 ref0 = vec_ld (0, ref);
1254 ref1 = vec_ld (8, ref);
1256 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1257 vec_perm (ref0, ref1, perm0B));
1260 ref0 = vec_ld (0, ref);
1261 ref1 = vec_ld (8, ref);
1263 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1264 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1266 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1267 vec_perm (ref0, ref1, perm1B));
1269 ref0 = vec_ld (0, ref);
1270 ref1 = vec_ld (8, ref);
1272 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1273 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1275 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1276 vec_perm (ref0, ref1, perm0B));
1279 ref0 = vec_ld (0, ref);
1280 ref1 = vec_ld (8, ref);
1281 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1282 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1284 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1285 vec_perm (ref0, ref1, perm1B));
1286 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1287 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1290 void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref,
1291 int stride, int height)
1293 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
1295 perm = vec_lvsl (0, ref);
1297 height = (height >> 1) - 1;
1299 ref0 = vec_ld (0, ref);
1300 ref1 = vec_ld (15, ref);
1302 tmp0 = vec_perm (ref0, ref1, perm);
1303 ref0 = vec_ld (0, ref);
1304 ref1 = vec_ld (15, ref);
1306 tmp1 = vec_perm (ref0, ref1, perm);
1307 tmp = vec_avg (tmp0, tmp1);
1310 ref0 = vec_ld (0, ref);
1311 ref1 = vec_ld (15, ref);
1313 vec_st (tmp, 0, dest);
1314 tmp0 = vec_perm (ref0, ref1, perm);
1315 tmp = vec_avg (tmp0, tmp1);
1317 ref0 = vec_ld (0, ref);
1318 ref1 = vec_ld (15, ref);
1320 vec_st (tmp, stride, dest);
1322 tmp1 = vec_perm (ref0, ref1, perm);
1323 tmp = vec_avg (tmp0, tmp1);
1326 ref0 = vec_ld (0, ref);
1327 ref1 = vec_ld (15, ref);
1328 vec_st (tmp, 0, dest);
1329 tmp0 = vec_perm (ref0, ref1, perm);
1330 tmp = vec_avg (tmp0, tmp1);
1331 vec_st (tmp, stride, dest);
1334 void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref,
1335 int stride, int height)
1337 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
1339 tmp0 = vec_lvsl (0, ref);
1340 tmp0 = vec_mergeh (tmp0, tmp0);
1341 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1342 tmp1 = vec_lvsl (stride, ref);
1343 tmp1 = vec_mergeh (tmp1, tmp1);
1344 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1346 height = (height >> 1) - 1;
1348 ref0 = vec_ld (0, ref);
1349 ref1 = vec_ld (7, ref);
1351 tmp0 = vec_perm (ref0, ref1, perm0);
1352 ref0 = vec_ld (0, ref);
1353 ref1 = vec_ld (7, ref);
1355 tmp1 = vec_perm (ref0, ref1, perm1);
1356 tmp = vec_avg (tmp0, tmp1);
1359 ref0 = vec_ld (0, ref);
1360 ref1 = vec_ld (7, ref);
1362 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1363 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1365 tmp0 = vec_perm (ref0, ref1, perm0);
1366 tmp = vec_avg (tmp0, tmp1);
1368 ref0 = vec_ld (0, ref);
1369 ref1 = vec_ld (7, ref);
1371 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1372 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1374 tmp1 = vec_perm (ref0, ref1, perm1);
1375 tmp = vec_avg (tmp0, tmp1);
1378 ref0 = vec_ld (0, ref);
1379 ref1 = vec_ld (7, ref);
1380 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1381 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1383 tmp0 = vec_perm (ref0, ref1, perm0);
1384 tmp = vec_avg (tmp0, tmp1);
1385 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1386 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1389 void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1390 int stride, int height)
1392 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1395 ones = vec_splat_u8 (1);
1396 permA = vec_lvsl (0, ref);
1397 permB = vec_add (permA, ones);
1399 height = (height >> 1) - 1;
1401 ref0 = vec_ld (0, ref);
1402 ref1 = vec_ld (16, ref);
1404 A = vec_perm (ref0, ref1, permA);
1405 B = vec_perm (ref0, ref1, permB);
1406 avg0 = vec_avg (A, B);
1407 xor0 = vec_xor (A, B);
1409 ref0 = vec_ld (0, ref);
1410 ref1 = vec_ld (16, ref);
1412 A = vec_perm (ref0, ref1, permA);
1413 B = vec_perm (ref0, ref1, permB);
1414 avg1 = vec_avg (A, B);
1415 xor1 = vec_xor (A, B);
1416 tmp = vec_sub (vec_avg (avg0, avg1),
1417 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1418 vec_xor (avg0, avg1)));
1422 ref0 = vec_ld (0, ref);
1423 ref1 = vec_ld (16, ref);
1425 vec_st (tmp, 0, dest);
1426 A = vec_perm (ref0, ref1, permA);
1427 B = vec_perm (ref0, ref1, permB);
1428 avg0 = vec_avg (A, B);
1429 xor0 = vec_xor (A, B);
1430 tmp = vec_sub (vec_avg (avg0, avg1),
1431 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1432 vec_xor (avg0, avg1)));
1434 ref0 = vec_ld (0, ref);
1435 ref1 = vec_ld (16, ref);
1437 vec_st (tmp, stride, dest);
1439 A = vec_perm (ref0, ref1, permA);
1440 B = vec_perm (ref0, ref1, permB);
1441 avg1 = vec_avg (A, B);
1442 xor1 = vec_xor (A, B);
1443 tmp = vec_sub (vec_avg (avg0, avg1),
1444 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1445 vec_xor (avg0, avg1)));
1448 ref0 = vec_ld (0, ref);
1449 ref1 = vec_ld (16, ref);
1450 vec_st (tmp, 0, dest);
1451 A = vec_perm (ref0, ref1, permA);
1452 B = vec_perm (ref0, ref1, permB);
1453 avg0 = vec_avg (A, B);
1454 xor0 = vec_xor (A, B);
1455 tmp = vec_sub (vec_avg (avg0, avg1),
1456 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1457 vec_xor (avg0, avg1)));
1458 vec_st (tmp, stride, dest);
1461 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1462 int stride, int height)
1464 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1465 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
1467 ones = vec_splat_u8 (1);
1468 perm0A = vec_lvsl (0, ref);
1469 perm0A = vec_mergeh (perm0A, perm0A);
1470 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1471 perm0B = vec_add (perm0A, ones);
1472 perm1A = vec_lvsl (stride, ref);
1473 perm1A = vec_mergeh (perm1A, perm1A);
1474 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1475 perm1B = vec_add (perm1A, ones);
1477 height = (height >> 1) - 1;
1479 ref0 = vec_ld (0, ref);
1480 ref1 = vec_ld (16, ref);
1482 A = vec_perm (ref0, ref1, perm0A);
1483 B = vec_perm (ref0, ref1, perm0B);
1484 avg0 = vec_avg (A, B);
1485 xor0 = vec_xor (A, B);
1487 ref0 = vec_ld (0, ref);
1488 ref1 = vec_ld (16, ref);
1490 A = vec_perm (ref0, ref1, perm1A);
1491 B = vec_perm (ref0, ref1, perm1B);
1492 avg1 = vec_avg (A, B);
1493 xor1 = vec_xor (A, B);
1494 tmp = vec_sub (vec_avg (avg0, avg1),
1495 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1496 vec_xor (avg0, avg1)));
1500 ref0 = vec_ld (0, ref);
1501 ref1 = vec_ld (16, ref);
1503 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1504 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1506 A = vec_perm (ref0, ref1, perm0A);
1507 B = vec_perm (ref0, ref1, perm0B);
1508 avg0 = vec_avg (A, B);
1509 xor0 = vec_xor (A, B);
1510 tmp = vec_sub (vec_avg (avg0, avg1),
1511 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1512 vec_xor (avg0, avg1)));
1514 ref0 = vec_ld (0, ref);
1515 ref1 = vec_ld (16, ref);
1517 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1518 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1520 A = vec_perm (ref0, ref1, perm1A);
1521 B = vec_perm (ref0, ref1, perm1B);
1522 avg1 = vec_avg (A, B);
1523 xor1 = vec_xor (A, B);
1524 tmp = vec_sub (vec_avg (avg0, avg1),
1525 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1526 vec_xor (avg0, avg1)));
1529 ref0 = vec_ld (0, ref);
1530 ref1 = vec_ld (16, ref);
1531 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1532 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1534 A = vec_perm (ref0, ref1, perm0A);
1535 B = vec_perm (ref0, ref1, perm0B);
1536 avg0 = vec_avg (A, B);
1537 xor0 = vec_xor (A, B);
1538 tmp = vec_sub (vec_avg (avg0, avg1),
1539 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1540 vec_xor (avg0, avg1)));
1541 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1542 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1546 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1547 int stride, int height)
1549 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
1550 vector_u16_t splat2, temp;
1552 ones = vec_splat_u8 (1);
1553 permA = vec_lvsl (0, ref);
1554 permB = vec_add (permA, ones);
1556 zero = vec_splat_u8 (0);
1557 splat2 = vec_splat_u16 (2);
1560 ref0 = vec_ld (0, ref);
1561 ref1 = vec_ld (16, ref);
1563 A = vec_perm (ref0, ref1, permA);
1564 B = vec_perm (ref0, ref1, permB);
1565 ref0 = vec_ld (0, ref);
1566 ref1 = vec_ld (16, ref);
1567 C = vec_perm (ref0, ref1, permA);
1568 D = vec_perm (ref0, ref1, permB);
1570 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
1571 (vector_u16_t)vec_mergeh (zero, B)),
1572 vec_add ((vector_u16_t)vec_mergeh (zero, C),
1573 (vector_u16_t)vec_mergeh (zero, D)));
1574 temp = vec_sr (vec_add (temp, splat2), splat2);
1575 tmp = vec_pack (temp, temp);
1577 vec_st (tmp, 0, dest);
1579 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1580 vec_perm (ref0, ref1, permB));
1585 void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref,
1586 int stride, int height)
1588 vector_u8_t perm, ref0, ref1, tmp, prev;
1590 perm = vec_lvsl (0, ref);
1592 height = (height >> 1) - 1;
1594 ref0 = vec_ld (0, ref);
1595 ref1 = vec_ld (15, ref);
1597 prev = vec_ld (0, dest);
1598 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1601 ref0 = vec_ld (0, ref);
1602 ref1 = vec_ld (15, ref);
1604 prev = vec_ld (stride, dest);
1605 vec_st (tmp, 0, dest);
1606 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1608 ref0 = vec_ld (0, ref);
1609 ref1 = vec_ld (15, ref);
1611 prev = vec_ld (2*stride, dest);
1612 vec_st (tmp, stride, dest);
1614 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1617 ref0 = vec_ld (0, ref);
1618 ref1 = vec_ld (15, ref);
1619 prev = vec_ld (stride, dest);
1620 vec_st (tmp, 0, dest);
1621 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1622 vec_st (tmp, stride, dest);
1625 void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref,
1626 int stride, int height)
1628 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
1630 tmp0 = vec_lvsl (0, ref);
1631 tmp0 = vec_mergeh (tmp0, tmp0);
1632 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1633 tmp1 = vec_lvsl (stride, ref);
1634 tmp1 = vec_mergeh (tmp1, tmp1);
1635 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1637 height = (height >> 1) - 1;
1639 ref0 = vec_ld (0, ref);
1640 ref1 = vec_ld (7, ref);
1642 prev = vec_ld (0, dest);
1643 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1646 ref0 = vec_ld (0, ref);
1647 ref1 = vec_ld (7, ref);
1649 prev = vec_ld (stride, dest);
1650 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1651 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1653 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1655 ref0 = vec_ld (0, ref);
1656 ref1 = vec_ld (7, ref);
1658 prev = vec_ld (stride, dest);
1659 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1660 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1662 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1665 ref0 = vec_ld (0, ref);
1666 ref1 = vec_ld (7, ref);
1667 prev = vec_ld (stride, dest);
1668 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1669 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1671 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1672 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1673 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1676 void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref,
1677 int stride, int height)
1679 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
1681 permA = vec_lvsl (0, ref);
1682 permB = vec_add (permA, vec_splat_u8 (1));
1684 height = (height >> 1) - 1;
1686 ref0 = vec_ld (0, ref);
1687 ref1 = vec_ld (16, ref);
1688 prev = vec_ld (0, dest);
1690 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1691 vec_perm (ref0, ref1, permB)));
1694 ref0 = vec_ld (0, ref);
1695 ref1 = vec_ld (16, ref);
1697 prev = vec_ld (stride, dest);
1698 vec_st (tmp, 0, dest);
1699 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1700 vec_perm (ref0, ref1, permB)));
1702 ref0 = vec_ld (0, ref);
1703 ref1 = vec_ld (16, ref);
1705 prev = vec_ld (2*stride, dest);
1706 vec_st (tmp, stride, dest);
1708 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1709 vec_perm (ref0, ref1, permB)));
1712 ref0 = vec_ld (0, ref);
1713 ref1 = vec_ld (16, ref);
1714 prev = vec_ld (stride, dest);
1715 vec_st (tmp, 0, dest);
1716 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1717 vec_perm (ref0, ref1, permB)));
1718 vec_st (tmp, stride, dest);
1721 void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref,
1722 int stride, int height)
1724 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1727 ones = vec_splat_u8 (1);
1728 tmp0 = vec_lvsl (0, ref);
1729 tmp0 = vec_mergeh (tmp0, tmp0);
1730 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1731 perm0B = vec_add (perm0A, ones);
1732 tmp1 = vec_lvsl (stride, ref);
1733 tmp1 = vec_mergeh (tmp1, tmp1);
1734 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1735 perm1B = vec_add (perm1A, ones);
1737 height = (height >> 1) - 1;
1739 ref0 = vec_ld (0, ref);
1740 ref1 = vec_ld (8, ref);
1741 prev = vec_ld (0, dest);
1743 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1744 vec_perm (ref0, ref1, perm0B)));
1747 ref0 = vec_ld (0, ref);
1748 ref1 = vec_ld (8, ref);
1750 prev = vec_ld (stride, dest);
1751 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1752 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1754 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1755 vec_perm (ref0, ref1, perm1B)));
1757 ref0 = vec_ld (0, ref);
1758 ref1 = vec_ld (8, ref);
1760 prev = vec_ld (stride, dest);
1761 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1762 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1764 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1765 vec_perm (ref0, ref1, perm0B)));
1768 ref0 = vec_ld (0, ref);
1769 ref1 = vec_ld (8, ref);
1770 prev = vec_ld (stride, dest);
1771 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1772 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1774 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1775 vec_perm (ref0, ref1, perm1B)));
1776 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1777 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1780 void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref,
1781 int stride, int height)
1783 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
1785 perm = vec_lvsl (0, ref);
1787 height = (height >> 1) - 1;
1789 ref0 = vec_ld (0, ref);
1790 ref1 = vec_ld (15, ref);
1792 tmp0 = vec_perm (ref0, ref1, perm);
1793 ref0 = vec_ld (0, ref);
1794 ref1 = vec_ld (15, ref);
1796 prev = vec_ld (0, dest);
1797 tmp1 = vec_perm (ref0, ref1, perm);
1798 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1801 ref0 = vec_ld (0, ref);
1802 ref1 = vec_ld (15, ref);
1804 prev = vec_ld (stride, dest);
1805 vec_st (tmp, 0, dest);
1806 tmp0 = vec_perm (ref0, ref1, perm);
1807 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1809 ref0 = vec_ld (0, ref);
1810 ref1 = vec_ld (15, ref);
1812 prev = vec_ld (2*stride, dest);
1813 vec_st (tmp, stride, dest);
1815 tmp1 = vec_perm (ref0, ref1, perm);
1816 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1819 ref0 = vec_ld (0, ref);
1820 ref1 = vec_ld (15, ref);
1821 prev = vec_ld (stride, dest);
1822 vec_st (tmp, 0, dest);
1823 tmp0 = vec_perm (ref0, ref1, perm);
1824 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1825 vec_st (tmp, stride, dest);
1828 void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref,
1829 int stride, int height)
1831 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
1833 tmp0 = vec_lvsl (0, ref);
1834 tmp0 = vec_mergeh (tmp0, tmp0);
1835 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1836 tmp1 = vec_lvsl (stride, ref);
1837 tmp1 = vec_mergeh (tmp1, tmp1);
1838 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1840 height = (height >> 1) - 1;
1842 ref0 = vec_ld (0, ref);
1843 ref1 = vec_ld (7, ref);
1845 tmp0 = vec_perm (ref0, ref1, perm0);
1846 ref0 = vec_ld (0, ref);
1847 ref1 = vec_ld (7, ref);
1849 prev = vec_ld (0, dest);
1850 tmp1 = vec_perm (ref0, ref1, perm1);
1851 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1854 ref0 = vec_ld (0, ref);
1855 ref1 = vec_ld (7, ref);
1857 prev = vec_ld (stride, dest);
1858 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1859 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1861 tmp0 = vec_perm (ref0, ref1, perm0);
1862 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1864 ref0 = vec_ld (0, ref);
1865 ref1 = vec_ld (7, ref);
1867 prev = vec_ld (stride, dest);
1868 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1869 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1871 tmp1 = vec_perm (ref0, ref1, perm1);
1872 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1875 ref0 = vec_ld (0, ref);
1876 ref1 = vec_ld (7, ref);
1877 prev = vec_ld (stride, dest);
1878 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1879 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1881 tmp0 = vec_perm (ref0, ref1, perm0);
1882 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1883 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1884 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1887 void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1888 int stride, int height)
1890 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1891 vector_u8_t ones, prev;
1893 ones = vec_splat_u8 (1);
1894 permA = vec_lvsl (0, ref);
1895 permB = vec_add (permA, ones);
1897 height = (height >> 1) - 1;
1899 ref0 = vec_ld (0, ref);
1900 ref1 = vec_ld (16, ref);
1902 A = vec_perm (ref0, ref1, permA);
1903 B = vec_perm (ref0, ref1, permB);
1904 avg0 = vec_avg (A, B);
1905 xor0 = vec_xor (A, B);
1907 ref0 = vec_ld (0, ref);
1908 ref1 = vec_ld (16, ref);
1910 prev = vec_ld (0, dest);
1911 A = vec_perm (ref0, ref1, permA);
1912 B = vec_perm (ref0, ref1, permB);
1913 avg1 = vec_avg (A, B);
1914 xor1 = vec_xor (A, B);
1915 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1916 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1917 vec_xor (avg0, avg1))));
1921 ref0 = vec_ld (0, ref);
1922 ref1 = vec_ld (16, ref);
1924 prev = vec_ld (stride, dest);
1925 vec_st (tmp, 0, dest);
1926 A = vec_perm (ref0, ref1, permA);
1927 B = vec_perm (ref0, ref1, permB);
1928 avg0 = vec_avg (A, B);
1929 xor0 = vec_xor (A, B);
1930 tmp = vec_avg (prev,
1931 vec_sub (vec_avg (avg0, avg1),
1932 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1933 vec_xor (avg0, avg1))));
1935 ref0 = vec_ld (0, ref);
1936 ref1 = vec_ld (16, ref);
1938 prev = vec_ld (2*stride, dest);
1939 vec_st (tmp, stride, dest);
1941 A = vec_perm (ref0, ref1, permA);
1942 B = vec_perm (ref0, ref1, permB);
1943 avg1 = vec_avg (A, B);
1944 xor1 = vec_xor (A, B);
1945 tmp = vec_avg (prev,
1946 vec_sub (vec_avg (avg0, avg1),
1947 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1948 vec_xor (avg0, avg1))));
1951 ref0 = vec_ld (0, ref);
1952 ref1 = vec_ld (16, ref);
1953 prev = vec_ld (stride, dest);
1954 vec_st (tmp, 0, dest);
1955 A = vec_perm (ref0, ref1, permA);
1956 B = vec_perm (ref0, ref1, permB);
1957 avg0 = vec_avg (A, B);
1958 xor0 = vec_xor (A, B);
1959 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1960 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1961 vec_xor (avg0, avg1))));
1962 vec_st (tmp, stride, dest);
1965 void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1966 int stride, int height)
1968 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1969 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
1971 ones = vec_splat_u8 (1);
1972 perm0A = vec_lvsl (0, ref);
1973 perm0A = vec_mergeh (perm0A, perm0A);
1974 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1975 perm0B = vec_add (perm0A, ones);
1976 perm1A = vec_lvsl (stride, ref);
1977 perm1A = vec_mergeh (perm1A, perm1A);
1978 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1979 perm1B = vec_add (perm1A, ones);
1981 height = (height >> 1) - 1;
1983 ref0 = vec_ld (0, ref);
1984 ref1 = vec_ld (16, ref);
1986 A = vec_perm (ref0, ref1, perm0A);
1987 B = vec_perm (ref0, ref1, perm0B);
1988 avg0 = vec_avg (A, B);
1989 xor0 = vec_xor (A, B);
1991 ref0 = vec_ld (0, ref);
1992 ref1 = vec_ld (16, ref);
1994 prev = vec_ld (0, dest);
1995 A = vec_perm (ref0, ref1, perm1A);
1996 B = vec_perm (ref0, ref1, perm1B);
1997 avg1 = vec_avg (A, B);
1998 xor1 = vec_xor (A, B);
1999 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
2000 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2001 vec_xor (avg0, avg1))));
2005 ref0 = vec_ld (0, ref);
2006 ref1 = vec_ld (16, ref);
2008 prev = vec_ld (stride, dest);
2009 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2010 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2012 A = vec_perm (ref0, ref1, perm0A);
2013 B = vec_perm (ref0, ref1, perm0B);
2014 avg0 = vec_avg (A, B);
2015 xor0 = vec_xor (A, B);
2016 tmp = vec_avg (prev,
2017 vec_sub (vec_avg (avg0, avg1),
2018 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2019 vec_xor (avg0, avg1))));
2021 ref0 = vec_ld (0, ref);
2022 ref1 = vec_ld (16, ref);
2024 prev = vec_ld (stride, dest);
2025 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2026 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2028 A = vec_perm (ref0, ref1, perm1A);
2029 B = vec_perm (ref0, ref1, perm1B);
2030 avg1 = vec_avg (A, B);
2031 xor1 = vec_xor (A, B);
2032 tmp = vec_avg (prev,
2033 vec_sub (vec_avg (avg0, avg1),
2034 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2035 vec_xor (avg0, avg1))));
2038 ref0 = vec_ld (0, ref);
2039 ref1 = vec_ld (16, ref);
2040 prev = vec_ld (stride, dest);
2041 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2042 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2044 A = vec_perm (ref0, ref1, perm0A);
2045 B = vec_perm (ref0, ref1, perm0B);
2046 avg0 = vec_avg (A, B);
2047 xor0 = vec_xor (A, B);
2048 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
2049 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2050 vec_xor (avg0, avg1))));
2051 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2052 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2055 #endif /* CAN_COMPILE_C_ALTIVEC || __BUILD_ALTIVEC_ASM__ */
2056 #ifndef __BUILD_ALTIVEC_ASM__
2058 /*****************************************************************************
2059 * Functions exported as capabilities. They are declared as static so that
2060 * we don't pollute the namespace too much.
2061 *****************************************************************************/
2062 static void motion_getfunctions( function_list_t * p_function_list )
2064 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
2068 /* Copying functions */
2071 MC_put_o_16_altivec, MC_put_x_16_altivec, MC_put_y_16_altivec, MC_put_xy_16_altivec
2075 MC_put_o_8_altivec, MC_put_x_8_altivec, MC_put_y_8_altivec, MC_put_xy_8_altivec
2079 /* Averaging functions */
2082 MC_avg_o_16_altivec, MC_avg_x_16_altivec, MC_avg_y_16_altivec, MC_avg_xy_16_altivec
2086 MC_avg_o_8_altivec, MC_avg_x_8_altivec, MC_avg_y_8_altivec, MC_avg_xy_8_altivec
2091 #define list p_function_list->functions.motion
2092 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );
2098 #endif /* __BUILD_ALTIVEC_ASM__ */