1 /*****************************************************************************
2 * motionaltivec.c : AltiVec motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionaltivec.c,v 1.13 2002/06/01 12:32:00 sam Exp $
7 * Authors: Michel Lespinasse <walken@zoy.org>
8 * Paul Mackerras <paulus@linuxcare.com.au>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #ifndef __BUILD_ALTIVEC_ASM__
27 /*****************************************************************************
29 *****************************************************************************/
30 #include <stdlib.h> /* malloc(), free() */
36 /*****************************************************************************
37 * Local and extern prototypes.
38 *****************************************************************************/
39 static void motion_getfunctions( function_list_t * p_function_list );
41 /*****************************************************************************
42 * Build configuration tree.
43 *****************************************************************************/
48 SET_DESCRIPTION( _("AltiVec motion compensation module") )
49 ADD_CAPABILITY( MOTION, 150 )
50 ADD_REQUIREMENT( ALTIVEC )
51 ADD_SHORTCUT( "altivec" )
55 motion_getfunctions( &p_module->p_functions->motion );
58 MODULE_DEACTIVATE_START
59 MODULE_DEACTIVATE_STOP
61 /*****************************************************************************
62 * Motion compensation in AltiVec
63 *****************************************************************************/
65 #ifndef CAN_COMPILE_C_ALTIVEC
68 * The asm code is generated with:
70 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S
71 * motion_comp_altivec.c
73 * sed 's/.L/._L/g' motion_comp_altivec.s |
74 * awk '{args=""; len=split ($2, arg, ",");
75 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
76 * args = args sprintf ("%-6s", a) }
77 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
81 static void MC_put_o_16_altivec (uint8_t * dest, uint8_t * ref,
82 int stride, int height)
85 " srawi %r6, %r6, 1 \n"
87 " addi %r6, %r6, -1 \n"
88 " lvsl %v12, 0, %r4 \n"
91 " lvx %v0, %r9, %r4 \n"
92 " add %r0, %r5, %r5 \n"
93 " vperm %v13, %v1, %v0, %v12 \n"
94 " add %r4, %r4, %r5 \n"
98 " lvx %v0, %r9, %r4 \n"
99 " stvx %v13, 0, %r3 \n"
100 " vperm %v13, %v1, %v0, %v12 \n"
101 " add %r4, %r4, %r5 \n"
102 " lvx %v1, 0, %r4 \n"
103 " lvx %v0, %r9, %r4 \n"
104 " stvx %v13, %r5, %r3 \n"
105 " vperm %v13, %v1, %v0, %v12 \n"
106 " add %r4, %r4, %r5 \n"
107 " add %r3, %r3, %r0 \n"
109 " lvx %v0, %r9, %r4 \n"
110 " lvx %v1, 0, %r4 \n"
111 " stvx %v13, 0, %r3 \n"
112 " vperm %v13, %v1, %v0, %v12 \n"
113 " stvx %v13, %r5, %r3 \n"
117 static void MC_put_o_8_altivec (uint8_t * dest, uint8_t * ref,
118 int stride, int height)
121 " lvsl %v12, 0, %r4 \n"
122 " lvsl %v1, %r5, %r4 \n"
123 " vmrghb %v12, %v12, %v12 \n"
124 " srawi %r6, %r6, 1 \n"
126 " vmrghb %v1, %v1, %v1 \n"
127 " addi %r6, %r6, -1 \n"
128 " vpkuhum %v10, %v12, %v12 \n"
129 " lvx %v13, 0, %r4 \n"
131 " vpkuhum %v11, %v1, %v1 \n"
132 " lvx %v0, %r9, %r4 \n"
133 " add %r4, %r4, %r5 \n"
134 " vperm %v12, %v13, %v0, %v10 \n"
137 " lvx %v0, %r9, %r4 \n"
138 " lvx %v13, 0, %r4 \n"
139 " stvewx %v12, 0, %r3 \n"
141 " vperm %v1, %v13, %v0, %v11 \n"
142 " stvewx %v12, %r9, %r3 \n"
143 " add %r4, %r4, %r5 \n"
145 " lvx %v0, %r9, %r4 \n"
146 " lvx %v13, 0, %r4 \n"
147 " add %r3, %r3, %r5 \n"
148 " stvewx %v1, 0, %r3 \n"
149 " vperm %v12, %v13, %v0, %v10 \n"
151 " stvewx %v1, %r9, %r3 \n"
152 " add %r4, %r4, %r5 \n"
153 " add %r3, %r3, %r5 \n"
156 " lvx %v0, %r9, %r4 \n"
157 " lvx %v13, 0, %r4 \n"
158 " stvewx %v12, 0, %r3 \n"
160 " vperm %v1, %v13, %v0, %v11 \n"
161 " stvewx %v12, %r9, %r3 \n"
162 " add %r3, %r3, %r5 \n"
163 " stvewx %v1, 0, %r3 \n"
164 " stvewx %v1, %r9, %r3 \n"
168 static void MC_put_x_16_altivec (uint8_t * dest, uint8_t * ref,
169 int stride, int height)
172 " lvsl %v11, 0, %r4 \n"
173 " vspltisb %v0, 1 \n"
175 " lvx %v12, 0, %r4 \n"
176 " vaddubm %v10, %v11, %v0 \n"
177 " lvx %v13, %r9, %r4 \n"
178 " srawi %r6, %r6, 1 \n"
179 " addi %r6, %r6, -1 \n"
180 " vperm %v1, %v12, %v13, %v10 \n"
181 " vperm %v0, %v12, %v13, %v11 \n"
183 " add %r0, %r5, %r5 \n"
184 " add %r4, %r4, %r5 \n"
185 " vavgub %v0, %v0, %v1 \n"
188 " lvx %v12, 0, %r4 \n"
189 " lvx %v13, %r9, %r4 \n"
190 " stvx %v0, 0, %r3 \n"
191 " vperm %v1, %v12, %v13, %v10 \n"
192 " add %r4, %r4, %r5 \n"
193 " vperm %v0, %v12, %v13, %v11 \n"
194 " lvx %v12, 0, %r4 \n"
195 " lvx %v13, %r9, %r4 \n"
196 " vavgub %v0, %v0, %v1 \n"
197 " stvx %v0, %r5, %r3 \n"
198 " vperm %v1, %v12, %v13, %v10 \n"
199 " add %r4, %r4, %r5 \n"
200 " vperm %v0, %v12, %v13, %v11 \n"
201 " add %r3, %r3, %r0 \n"
202 " vavgub %v0, %v0, %v1 \n"
204 " lvx %v13, %r9, %r4 \n"
205 " lvx %v12, 0, %r4 \n"
206 " stvx %v0, 0, %r3 \n"
207 " vperm %v1, %v12, %v13, %v10 \n"
208 " vperm %v0, %v12, %v13, %v11 \n"
209 " vavgub %v0, %v0, %v1 \n"
210 " stvx %v0, %r5, %r3 \n"
214 static void MC_put_x_8_altivec (uint8_t * dest, uint8_t * ref,
215 int stride, int height)
218 " lvsl %v0, 0, %r4 \n"
219 " vspltisb %v13, 1 \n"
220 " lvsl %v10, %r5, %r4 \n"
221 " vmrghb %v0, %v0, %v0 \n"
223 " lvx %v11, 0, %r4 \n"
224 " vmrghb %v10, %v10, %v10 \n"
225 " vpkuhum %v8, %v0, %v0 \n"
226 " lvx %v12, %r9, %r4 \n"
227 " srawi %r6, %r6, 1 \n"
228 " vpkuhum %v9, %v10, %v10 \n"
229 " vaddubm %v7, %v8, %v13 \n"
230 " addi %r6, %r6, -1 \n"
231 " vperm %v1, %v11, %v12, %v8 \n"
233 " vaddubm %v13, %v9, %v13 \n"
234 " add %r4, %r4, %r5 \n"
235 " vperm %v0, %v11, %v12, %v7 \n"
236 " vavgub %v0, %v1, %v0 \n"
239 " lvx %v12, %r9, %r4 \n"
240 " lvx %v11, 0, %r4 \n"
241 " stvewx %v0, 0, %r3 \n"
243 " vperm %v1, %v11, %v12, %v13 \n"
244 " stvewx %v0, %r9, %r3 \n"
245 " vperm %v0, %v11, %v12, %v9 \n"
246 " add %r4, %r4, %r5 \n"
248 " lvx %v12, %r9, %r4 \n"
249 " vavgub %v10, %v0, %v1 \n"
250 " lvx %v11, 0, %r4 \n"
251 " add %r3, %r3, %r5 \n"
252 " stvewx %v10, 0, %r3 \n"
253 " vperm %v1, %v11, %v12, %v7 \n"
254 " vperm %v0, %v11, %v12, %v8 \n"
256 " stvewx %v10, %r9, %r3 \n"
257 " add %r4, %r4, %r5 \n"
258 " vavgub %v0, %v0, %v1 \n"
259 " add %r3, %r3, %r5 \n"
262 " lvx %v12, %r9, %r4 \n"
263 " lvx %v11, 0, %r4 \n"
264 " stvewx %v0, 0, %r3 \n"
266 " vperm %v1, %v11, %v12, %v13 \n"
267 " stvewx %v0, %r9, %r3 \n"
268 " vperm %v0, %v11, %v12, %v9 \n"
269 " add %r3, %r3, %r5 \n"
270 " vavgub %v10, %v0, %v1 \n"
271 " stvewx %v10, 0, %r3 \n"
272 " stvewx %v10, %r9, %r3 \n"
276 static void MC_put_y_16_altivec (uint8_t * dest, uint8_t * ref,
277 int stride, int height)
281 " lvsl %v10, 0, %r4 \n"
282 " lvx %v13, 0, %r4 \n"
283 " lvx %v1, %r9, %r4 \n"
284 " add %r4, %r4, %r5 \n"
285 " vperm %v12, %v13, %v1, %v10 \n"
286 " srawi %r6, %r6, 1 \n"
287 " lvx %v13, 0, %r4 \n"
288 " lvx %v1, %r9, %r4 \n"
289 " addi %r6, %r6, -1 \n"
290 " vperm %v11, %v13, %v1, %v10 \n"
292 " add %r0, %r5, %r5 \n"
293 " add %r4, %r4, %r5 \n"
294 " vavgub %v0, %v12, %v11 \n"
297 " lvx %v13, 0, %r4 \n"
298 " lvx %v1, %r9, %r4 \n"
299 " stvx %v0, 0, %r3 \n"
300 " vperm %v12, %v13, %v1, %v10 \n"
301 " add %r4, %r4, %r5 \n"
302 " lvx %v13, 0, %r4 \n"
303 " lvx %v1, %r9, %r4 \n"
304 " vavgub %v0, %v12, %v11 \n"
305 " stvx %v0, %r5, %r3 \n"
306 " vperm %v11, %v13, %v1, %v10 \n"
307 " add %r4, %r4, %r5 \n"
308 " add %r3, %r3, %r0 \n"
309 " vavgub %v0, %v12, %v11 \n"
311 " lvx %v1, %r9, %r4 \n"
312 " lvx %v13, 0, %r4 \n"
313 " stvx %v0, 0, %r3 \n"
314 " vperm %v12, %v13, %v1, %v10 \n"
315 " vavgub %v0, %v12, %v11 \n"
316 " stvx %v0, %r5, %r3 \n"
320 static void MC_put_y_8_altivec (uint8_t * dest, uint8_t * ref,
321 int stride, int height)
324 " lvsl %v13, 0, %r4 \n"
325 " lvsl %v11, %r5, %r4 \n"
326 " vmrghb %v13, %v13, %v13 \n"
328 " lvx %v12, 0, %r4 \n"
329 " vmrghb %v11, %v11, %v11 \n"
330 " lvx %v1, %r9, %r4 \n"
331 " vpkuhum %v9, %v13, %v13 \n"
332 " add %r4, %r4, %r5 \n"
333 " vpkuhum %v10, %v11, %v11 \n"
334 " vperm %v13, %v12, %v1, %v9 \n"
335 " srawi %r6, %r6, 1 \n"
336 " lvx %v12, 0, %r4 \n"
337 " lvx %v1, %r9, %r4 \n"
338 " addi %r6, %r6, -1 \n"
339 " vperm %v11, %v12, %v1, %v10 \n"
341 " add %r4, %r4, %r5 \n"
342 " vavgub %v0, %v13, %v11 \n"
345 " lvx %v1, %r9, %r4 \n"
346 " lvx %v12, 0, %r4 \n"
347 " stvewx %v0, 0, %r3 \n"
349 " vperm %v13, %v12, %v1, %v9 \n"
350 " stvewx %v0, %r9, %r3 \n"
351 " add %r4, %r4, %r5 \n"
352 " vavgub %v0, %v13, %v11 \n"
354 " lvx %v1, %r9, %r4 \n"
355 " lvx %v12, 0, %r4 \n"
356 " add %r3, %r3, %r5 \n"
357 " stvewx %v0, 0, %r3 \n"
358 " vperm %v11, %v12, %v1, %v10 \n"
360 " stvewx %v0, %r9, %r3 \n"
361 " vavgub %v0, %v13, %v11 \n"
362 " add %r4, %r4, %r5 \n"
363 " add %r3, %r3, %r5 \n"
366 " lvx %v1, %r9, %r4 \n"
367 " lvx %v12, 0, %r4 \n"
368 " stvewx %v0, 0, %r3 \n"
370 " vperm %v13, %v12, %v1, %v9 \n"
371 " stvewx %v0, %r9, %r3 \n"
372 " add %r3, %r3, %r5 \n"
373 " vavgub %v0, %v13, %v11 \n"
374 " stvewx %v0, 0, %r3 \n"
375 " stvewx %v0, %r9, %r3 \n"
379 static void MC_put_xy_16_altivec (uint8_t * dest, uint8_t * ref,
380 int stride, int height)
383 " lvsl %v5, 0, %r4 \n"
384 " vspltisb %v3, 1 \n"
386 " lvx %v1, 0, %r4 \n"
387 " vaddubm %v4, %v5, %v3 \n"
388 " lvx %v0, %r9, %r4 \n"
389 " add %r4, %r4, %r5 \n"
390 " vperm %v10, %v1, %v0, %v4 \n"
391 " srawi %r6, %r6, 1 \n"
392 " vperm %v11, %v1, %v0, %v5 \n"
393 " addi %r6, %r6, -1 \n"
394 " lvx %v1, 0, %r4 \n"
396 " lvx %v0, %r9, %r4 \n"
397 " vavgub %v9, %v11, %v10 \n"
398 " vxor %v8, %v11, %v10 \n"
399 " add %r0, %r5, %r5 \n"
400 " vperm %v10, %v1, %v0, %v4 \n"
401 " add %r4, %r4, %r5 \n"
402 " vperm %v11, %v1, %v0, %v5 \n"
403 " vxor %v6, %v11, %v10 \n"
404 " vavgub %v7, %v11, %v10 \n"
405 " vor %v0, %v8, %v6 \n"
406 " vxor %v13, %v9, %v7 \n"
407 " vand %v0, %v3, %v0 \n"
408 " vavgub %v1, %v9, %v7 \n"
409 " vand %v0, %v0, %v13 \n"
410 " vsububm %v13, %v1, %v0 \n"
413 " lvx %v1, 0, %r4 \n"
414 " lvx %v0, %r9, %r4 \n"
415 " stvx %v13, 0, %r3 \n"
416 " vperm %v10, %v1, %v0, %v4 \n"
417 " add %r4, %r4, %r5 \n"
418 " vperm %v11, %v1, %v0, %v5 \n"
419 " lvx %v1, 0, %r4 \n"
420 " lvx %v0, %r9, %r4 \n"
421 " vavgub %v9, %v11, %v10 \n"
422 " vxor %v8, %v11, %v10 \n"
423 " add %r4, %r4, %r5 \n"
424 " vperm %v10, %v1, %v0, %v4 \n"
425 " vavgub %v12, %v9, %v7 \n"
426 " vperm %v11, %v1, %v0, %v5 \n"
427 " vor %v13, %v8, %v6 \n"
428 " vxor %v0, %v9, %v7 \n"
429 " vxor %v6, %v11, %v10 \n"
430 " vand %v13, %v3, %v13 \n"
431 " vavgub %v7, %v11, %v10 \n"
432 " vor %v1, %v8, %v6 \n"
433 " vand %v13, %v13, %v0 \n"
434 " vxor %v0, %v9, %v7 \n"
435 " vand %v1, %v3, %v1 \n"
436 " vsububm %v13, %v12, %v13 \n"
437 " vand %v1, %v1, %v0 \n"
438 " stvx %v13, %r5, %r3 \n"
439 " vavgub %v0, %v9, %v7 \n"
440 " add %r3, %r3, %r0 \n"
441 " vsububm %v13, %v0, %v1 \n"
443 " lvx %v0, %r9, %r4 \n"
444 " lvx %v1, 0, %r4 \n"
445 " stvx %v13, 0, %r3 \n"
446 " vperm %v10, %v1, %v0, %v4 \n"
447 " vperm %v11, %v1, %v0, %v5 \n"
448 " vxor %v8, %v11, %v10 \n"
449 " vavgub %v9, %v11, %v10 \n"
450 " vor %v0, %v8, %v6 \n"
451 " vxor %v13, %v9, %v7 \n"
452 " vand %v0, %v3, %v0 \n"
453 " vavgub %v1, %v9, %v7 \n"
454 " vand %v0, %v0, %v13 \n"
455 " vsububm %v13, %v1, %v0 \n"
456 " stvx %v13, %r5, %r3 \n"
460 static void MC_put_xy_8_altivec (uint8_t * dest, uint8_t * ref,
461 int stride, int height)
464 " lvsl %v4, 0, %r4 \n"
465 " vspltisb %v3, 1 \n"
466 " lvsl %v5, %r5, %r4 \n"
467 " vmrghb %v4, %v4, %v4 \n"
469 " vmrghb %v5, %v5, %v5 \n"
470 " lvx %v1, 0, %r4 \n"
471 " vpkuhum %v4, %v4, %v4 \n"
472 " lvx %v0, %r9, %r4 \n"
473 " vpkuhum %v5, %v5, %v5 \n"
474 " add %r4, %r4, %r5 \n"
475 " vaddubm %v2, %v4, %v3 \n"
476 " vperm %v11, %v1, %v0, %v4 \n"
477 " srawi %r6, %r6, 1 \n"
478 " vaddubm %v19, %v5, %v3 \n"
479 " addi %r6, %r6, -1 \n"
480 " vperm %v10, %v1, %v0, %v2 \n"
482 " lvx %v1, 0, %r4 \n"
483 " lvx %v0, %r9, %r4 \n"
484 " vavgub %v9, %v11, %v10 \n"
485 " vxor %v8, %v11, %v10 \n"
486 " add %r4, %r4, %r5 \n"
487 " vperm %v10, %v1, %v0, %v19 \n"
488 " vperm %v11, %v1, %v0, %v5 \n"
489 " vxor %v6, %v11, %v10 \n"
490 " vavgub %v7, %v11, %v10 \n"
491 " vor %v0, %v8, %v6 \n"
492 " vxor %v13, %v9, %v7 \n"
493 " vand %v0, %v3, %v0 \n"
494 " vavgub %v1, %v9, %v7 \n"
495 " vand %v0, %v0, %v13 \n"
496 " vsububm %v13, %v1, %v0 \n"
499 " lvx %v0, %r9, %r4 \n"
500 " lvx %v1, 0, %r4 \n"
501 " stvewx %v13, 0, %r3 \n"
503 " vperm %v10, %v1, %v0, %v2 \n"
504 " stvewx %v13, %r9, %r3 \n"
505 " vperm %v11, %v1, %v0, %v4 \n"
506 " add %r4, %r4, %r5 \n"
508 " vavgub %v9, %v11, %v10 \n"
509 " lvx %v0, %r9, %r4 \n"
510 " vxor %v8, %v11, %v10 \n"
511 " lvx %v1, 0, %r4 \n"
512 " vavgub %v12, %v9, %v7 \n"
513 " vor %v13, %v8, %v6 \n"
514 " add %r3, %r3, %r5 \n"
515 " vperm %v10, %v1, %v0, %v19 \n"
517 " vperm %v11, %v1, %v0, %v5 \n"
518 " vand %v13, %v3, %v13 \n"
519 " add %r4, %r4, %r5 \n"
520 " vxor %v0, %v9, %v7 \n"
521 " vxor %v6, %v11, %v10 \n"
522 " vavgub %v7, %v11, %v10 \n"
523 " vor %v1, %v8, %v6 \n"
524 " vand %v13, %v13, %v0 \n"
525 " vxor %v0, %v9, %v7 \n"
526 " vand %v1, %v3, %v1 \n"
527 " vsububm %v13, %v12, %v13 \n"
528 " vand %v1, %v1, %v0 \n"
529 " stvewx %v13, 0, %r3 \n"
530 " vavgub %v0, %v9, %v7 \n"
531 " stvewx %v13, %r9, %r3 \n"
532 " add %r3, %r3, %r5 \n"
533 " vsububm %v13, %v0, %v1 \n"
536 " lvx %v0, %r9, %r4 \n"
537 " lvx %v1, 0, %r4 \n"
538 " stvewx %v13, 0, %r3 \n"
539 " vperm %v10, %v1, %v0, %v2 \n"
541 " vperm %v11, %v1, %v0, %v4 \n"
542 " stvewx %v13, %r9, %r3 \n"
543 " add %r3, %r3, %r5 \n"
544 " vxor %v8, %v11, %v10 \n"
545 " vavgub %v9, %v11, %v10 \n"
546 " vor %v0, %v8, %v6 \n"
547 " vxor %v13, %v9, %v7 \n"
548 " vand %v0, %v3, %v0 \n"
549 " vavgub %v1, %v9, %v7 \n"
550 " vand %v0, %v0, %v13 \n"
551 " vsububm %v13, %v1, %v0 \n"
552 " stvewx %v13, 0, %r3 \n"
553 " stvewx %v13, %r9, %r3 \n"
557 static void MC_avg_o_16_altivec (uint8_t * dest, uint8_t * ref,
558 int stride, int height)
562 " lvx %v0, %r9, %r4 \n"
563 " lvsl %v11, 0, %r4 \n"
564 " lvx %v1, 0, %r4 \n"
565 " srawi %r6, %r6, 1 \n"
566 " addi %r6, %r6, -1 \n"
567 " vperm %v0, %v1, %v0, %v11 \n"
568 " lvx %v13, 0, %r3 \n"
570 " add %r9, %r5, %r5 \n"
571 " vavgub %v12, %v13, %v0 \n"
572 " add %r4, %r4, %r5 \n"
575 " lvx %v1, 0, %r4 \n"
576 " lvx %v0, %r11, %r4 \n"
577 " lvx %v13, %r5, %r3 \n"
578 " vperm %v0, %v1, %v0, %v11 \n"
579 " stvx %v12, 0, %r3 \n"
580 " add %r4, %r4, %r5 \n"
581 " vavgub %v12, %v13, %v0 \n"
582 " lvx %v1, 0, %r4 \n"
583 " lvx %v0, %r11, %r4 \n"
584 " lvx %v13, %r9, %r3 \n"
585 " vperm %v0, %v1, %v0, %v11 \n"
586 " stvx %v12, %r5, %r3 \n"
587 " add %r4, %r4, %r5 \n"
588 " vavgub %v12, %v13, %v0 \n"
589 " add %r3, %r3, %r9 \n"
591 " lvx %v0, %r11, %r4 \n"
592 " lvx %v1, 0, %r4 \n"
593 " lvx %v13, %r5, %r3 \n"
594 " vperm %v0, %v1, %v0, %v11 \n"
595 " stvx %v12, 0, %r3 \n"
596 " vavgub %v12, %v13, %v0 \n"
597 " stvx %v12, %r5, %r3 \n"
601 static void MC_avg_o_8_altivec (uint8_t * dest, uint8_t * ref,
602 int stride, int height)
605 " lvsl %v12, 0, %r4 \n"
607 " vmrghb %v12, %v12, %v12 \n"
608 " lvsl %v1, %r5, %r4 \n"
609 " lvx %v13, 0, %r4 \n"
610 " vpkuhum %v9, %v12, %v12 \n"
611 " lvx %v0, %r9, %r4 \n"
612 " srawi %r6, %r6, 1 \n"
613 " vmrghb %v1, %v1, %v1 \n"
614 " addi %r6, %r6, -1 \n"
615 " vperm %v0, %v13, %v0, %v9 \n"
616 " lvx %v11, 0, %r3 \n"
618 " vpkuhum %v10, %v1, %v1 \n"
619 " add %r4, %r4, %r5 \n"
620 " vavgub %v12, %v11, %v0 \n"
623 " lvx %v0, %r9, %r4 \n"
624 " lvx %v13, 0, %r4 \n"
625 " lvx %v11, %r5, %r3 \n"
626 " stvewx %v12, 0, %r3 \n"
627 " vperm %v0, %v13, %v0, %v10 \n"
629 " stvewx %v12, %r9, %r3 \n"
630 " vavgub %v1, %v11, %v0 \n"
631 " add %r4, %r4, %r5 \n"
633 " lvx %v0, %r9, %r4 \n"
634 " add %r3, %r3, %r5 \n"
635 " lvx %v13, 0, %r4 \n"
636 " lvx %v11, %r5, %r3 \n"
637 " stvewx %v1, 0, %r3 \n"
638 " vperm %v0, %v13, %v0, %v9 \n"
640 " stvewx %v1, %r9, %r3 \n"
641 " vavgub %v12, %v11, %v0 \n"
642 " add %r4, %r4, %r5 \n"
643 " add %r3, %r3, %r5 \n"
646 " lvx %v0, %r9, %r4 \n"
647 " lvx %v13, 0, %r4 \n"
648 " lvx %v11, %r5, %r3 \n"
649 " stvewx %v12, 0, %r3 \n"
650 " vperm %v0, %v13, %v0, %v10 \n"
652 " stvewx %v12, %r9, %r3 \n"
653 " vavgub %v1, %v11, %v0 \n"
654 " add %r3, %r3, %r5 \n"
655 " stvewx %v1, 0, %r3 \n"
656 " stvewx %v1, %r9, %r3 \n"
660 static void MC_avg_x_16_altivec (uint8_t * dest, uint8_t * ref,
661 int stride, int height)
664 " lvsl %v8, 0, %r4 \n"
665 " vspltisb %v0, 1 \n"
667 " lvx %v12, %r9, %r4 \n"
668 " vaddubm %v7, %v8, %v0 \n"
669 " lvx %v11, 0, %r4 \n"
670 " srawi %r6, %r6, 1 \n"
671 " vperm %v1, %v11, %v12, %v7 \n"
672 " addi %r6, %r6, -1 \n"
673 " vperm %v0, %v11, %v12, %v8 \n"
674 " lvx %v9, 0, %r3 \n"
676 " add %r9, %r5, %r5 \n"
677 " vavgub %v0, %v0, %v1 \n"
678 " add %r4, %r4, %r5 \n"
679 " vavgub %v10, %v9, %v0 \n"
682 " lvx %v11, 0, %r4 \n"
683 " lvx %v12, %r11, %r4 \n"
684 " lvx %v9, %r5, %r3 \n"
685 " stvx %v10, 0, %r3 \n"
686 " vperm %v0, %v11, %v12, %v7 \n"
687 " add %r4, %r4, %r5 \n"
688 " vperm %v1, %v11, %v12, %v8 \n"
689 " lvx %v11, 0, %r4 \n"
690 " lvx %v12, %r11, %r4 \n"
691 " vavgub %v1, %v1, %v0 \n"
692 " add %r4, %r4, %r5 \n"
693 " vperm %v13, %v11, %v12, %v7 \n"
694 " vavgub %v10, %v9, %v1 \n"
695 " vperm %v0, %v11, %v12, %v8 \n"
696 " lvx %v9, %r9, %r3 \n"
697 " stvx %v10, %r5, %r3 \n"
698 " vavgub %v0, %v0, %v13 \n"
699 " add %r3, %r3, %r9 \n"
700 " vavgub %v10, %v9, %v0 \n"
702 " lvx %v12, %r11, %r4 \n"
703 " lvx %v11, 0, %r4 \n"
704 " lvx %v9, %r5, %r3 \n"
705 " vperm %v1, %v11, %v12, %v7 \n"
706 " stvx %v10, 0, %r3 \n"
707 " vperm %v0, %v11, %v12, %v8 \n"
708 " vavgub %v0, %v0, %v1 \n"
709 " vavgub %v10, %v9, %v0 \n"
710 " stvx %v10, %r5, %r3 \n"
714 static void MC_avg_x_8_altivec (uint8_t * dest, uint8_t * ref,
715 int stride, int height)
718 " lvsl %v10, 0, %r4 \n"
719 " vspltisb %v13, 1 \n"
721 " vmrghb %v10, %v10, %v10 \n"
722 " lvx %v11, 0, %r4 \n"
723 " lvx %v12, %r9, %r4 \n"
724 " vpkuhum %v7, %v10, %v10 \n"
725 " srawi %r6, %r6, 1 \n"
726 " lvsl %v10, %r5, %r4 \n"
727 " vaddubm %v6, %v7, %v13 \n"
728 " vperm %v0, %v11, %v12, %v7 \n"
729 " addi %r6, %r6, -1 \n"
730 " vmrghb %v10, %v10, %v10 \n"
731 " lvx %v9, 0, %r3 \n"
733 " vperm %v1, %v11, %v12, %v6 \n"
734 " add %r4, %r4, %r5 \n"
735 " vpkuhum %v8, %v10, %v10 \n"
736 " vavgub %v0, %v0, %v1 \n"
737 " vaddubm %v13, %v8, %v13 \n"
738 " vavgub %v10, %v9, %v0 \n"
741 " lvx %v12, %r9, %r4 \n"
742 " lvx %v11, 0, %r4 \n"
743 " lvx %v9, %r5, %r3 \n"
744 " stvewx %v10, 0, %r3 \n"
745 " vperm %v1, %v11, %v12, %v13 \n"
746 " vperm %v0, %v11, %v12, %v8 \n"
748 " stvewx %v10, %r9, %r3 \n"
749 " add %r4, %r4, %r5 \n"
750 " vavgub %v0, %v0, %v1 \n"
752 " lvx %v12, %r9, %r4 \n"
753 " vavgub %v10, %v9, %v0 \n"
754 " lvx %v11, 0, %r4 \n"
755 " add %r3, %r3, %r5 \n"
756 " vperm %v1, %v11, %v12, %v6 \n"
757 " lvx %v9, %r5, %r3 \n"
758 " vperm %v0, %v11, %v12, %v7 \n"
759 " stvewx %v10, 0, %r3 \n"
761 " vavgub %v0, %v0, %v1 \n"
762 " stvewx %v10, %r9, %r3 \n"
763 " add %r4, %r4, %r5 \n"
764 " add %r3, %r3, %r5 \n"
765 " vavgub %v10, %v9, %v0 \n"
768 " lvx %v12, %r9, %r4 \n"
769 " lvx %v11, 0, %r4 \n"
770 " lvx %v9, %r5, %r3 \n"
771 " vperm %v1, %v11, %v12, %v13 \n"
772 " stvewx %v10, 0, %r3 \n"
773 " vperm %v0, %v11, %v12, %v8 \n"
775 " stvewx %v10, %r9, %r3 \n"
776 " vavgub %v0, %v0, %v1 \n"
777 " add %r3, %r3, %r5 \n"
778 " vavgub %v10, %v9, %v0 \n"
779 " stvewx %v10, 0, %r3 \n"
780 " stvewx %v10, %r9, %r3 \n"
784 static void MC_avg_y_16_altivec (uint8_t * dest, uint8_t * ref,
785 int stride, int height)
789 " lvx %v1, %r9, %r4 \n"
790 " lvsl %v9, 0, %r4 \n"
791 " lvx %v13, 0, %r4 \n"
792 " add %r4, %r4, %r5 \n"
793 " vperm %v11, %v13, %v1, %v9 \n"
795 " lvx %v13, 0, %r4 \n"
796 " lvx %v1, %r11, %r4 \n"
797 " srawi %r6, %r6, 1 \n"
798 " vperm %v10, %v13, %v1, %v9 \n"
799 " addi %r6, %r6, -1 \n"
800 " lvx %v12, 0, %r3 \n"
802 " vavgub %v0, %v11, %v10 \n"
803 " add %r9, %r5, %r5 \n"
804 " add %r4, %r4, %r5 \n"
805 " vavgub %v0, %v12, %v0 \n"
808 " lvx %v13, 0, %r4 \n"
809 " lvx %v1, %r11, %r4 \n"
810 " lvx %v12, %r5, %r3 \n"
811 " vperm %v11, %v13, %v1, %v9 \n"
812 " stvx %v0, 0, %r3 \n"
813 " add %r4, %r4, %r5 \n"
814 " vavgub %v0, %v11, %v10 \n"
815 " lvx %v13, 0, %r4 \n"
816 " lvx %v1, %r11, %r4 \n"
817 " vavgub %v0, %v12, %v0 \n"
818 " add %r4, %r4, %r5 \n"
819 " lvx %v12, %r9, %r3 \n"
820 " vperm %v10, %v13, %v1, %v9 \n"
821 " stvx %v0, %r5, %r3 \n"
822 " vavgub %v0, %v11, %v10 \n"
823 " add %r3, %r3, %r9 \n"
824 " vavgub %v0, %v12, %v0 \n"
826 " lvx %v1, %r11, %r4 \n"
827 " lvx %v13, 0, %r4 \n"
828 " lvx %v12, %r5, %r3 \n"
829 " vperm %v11, %v13, %v1, %v9 \n"
830 " stvx %v0, 0, %r3 \n"
831 " vavgub %v0, %v11, %v10 \n"
832 " vavgub %v0, %v12, %v0 \n"
833 " stvx %v0, %r5, %r3 \n"
837 static void MC_avg_y_8_altivec (uint8_t * dest, uint8_t * ref,
838 int stride, int height)
841 " lvsl %v12, 0, %r4 \n"
842 " lvsl %v9, %r5, %r4 \n"
843 " vmrghb %v12, %v12, %v12 \n"
845 " lvx %v11, 0, %r4 \n"
846 " vmrghb %v9, %v9, %v9 \n"
847 " lvx %v13, %r9, %r4 \n"
848 " vpkuhum %v7, %v12, %v12 \n"
849 " add %r4, %r4, %r5 \n"
850 " vpkuhum %v8, %v9, %v9 \n"
851 " vperm %v12, %v11, %v13, %v7 \n"
852 " srawi %r6, %r6, 1 \n"
853 " lvx %v11, 0, %r4 \n"
854 " lvx %v13, %r9, %r4 \n"
855 " addi %r6, %r6, -1 \n"
856 " vperm %v9, %v11, %v13, %v8 \n"
857 " lvx %v10, 0, %r3 \n"
859 " add %r4, %r4, %r5 \n"
860 " vavgub %v0, %v12, %v9 \n"
861 " vavgub %v1, %v10, %v0 \n"
864 " lvx %v13, %r9, %r4 \n"
865 " lvx %v11, 0, %r4 \n"
866 " lvx %v10, %r5, %r3 \n"
867 " stvewx %v1, 0, %r3 \n"
868 " vperm %v12, %v11, %v13, %v7 \n"
870 " stvewx %v1, %r9, %r3 \n"
871 " vavgub %v0, %v12, %v9 \n"
872 " add %r4, %r4, %r5 \n"
874 " vavgub %v1, %v10, %v0 \n"
875 " lvx %v13, %r9, %r4 \n"
876 " lvx %v11, 0, %r4 \n"
877 " add %r3, %r3, %r5 \n"
878 " vperm %v9, %v11, %v13, %v8 \n"
879 " lvx %v10, %r5, %r3 \n"
880 " stvewx %v1, 0, %r3 \n"
881 " vavgub %v0, %v12, %v9 \n"
883 " stvewx %v1, %r9, %r3 \n"
884 " add %r4, %r4, %r5 \n"
885 " vavgub %v1, %v10, %v0 \n"
886 " add %r3, %r3, %r5 \n"
889 " lvx %v13, %r9, %r4 \n"
890 " lvx %v11, 0, %r4 \n"
891 " lvx %v10, %r5, %r3 \n"
892 " vperm %v12, %v11, %v13, %v7 \n"
893 " stvewx %v1, 0, %r3 \n"
895 " vavgub %v0, %v12, %v9 \n"
896 " stvewx %v1, %r9, %r3 \n"
897 " add %r3, %r3, %r5 \n"
898 " vavgub %v1, %v10, %v0 \n"
899 " stvewx %v1, 0, %r3 \n"
900 " stvewx %v1, %r9, %r3 \n"
904 static void MC_avg_xy_16_altivec (uint8_t * dest, uint8_t * ref,
905 int stride, int height)
908 " lvsl %v4, 0, %r4 \n"
909 " vspltisb %v2, 1 \n"
911 " lvx %v1, %r9, %r4 \n"
912 " vaddubm %v3, %v4, %v2 \n"
913 " lvx %v13, 0, %r4 \n"
914 " add %r4, %r4, %r5 \n"
915 " vperm %v10, %v13, %v1, %v3 \n"
917 " vperm %v11, %v13, %v1, %v4 \n"
918 " srawi %r6, %r6, 1 \n"
919 " lvx %v13, 0, %r4 \n"
920 " lvx %v1, %r11, %r4 \n"
921 " vavgub %v9, %v11, %v10 \n"
922 " vxor %v8, %v11, %v10 \n"
923 " addi %r6, %r6, -1 \n"
924 " vperm %v10, %v13, %v1, %v3 \n"
925 " lvx %v6, 0, %r3 \n"
927 " vperm %v11, %v13, %v1, %v4 \n"
928 " add %r9, %r5, %r5 \n"
929 " add %r4, %r4, %r5 \n"
930 " vxor %v5, %v11, %v10 \n"
931 " vavgub %v7, %v11, %v10 \n"
932 " vor %v1, %v8, %v5 \n"
933 " vxor %v13, %v9, %v7 \n"
934 " vand %v1, %v2, %v1 \n"
935 " vavgub %v0, %v9, %v7 \n"
936 " vand %v1, %v1, %v13 \n"
937 " vsububm %v0, %v0, %v1 \n"
938 " vavgub %v12, %v6, %v0 \n"
941 " lvx %v13, 0, %r4 \n"
942 " lvx %v1, %r11, %r4 \n"
943 " lvx %v6, %r5, %r3 \n"
944 " stvx %v12, 0, %r3 \n"
945 " vperm %v10, %v13, %v1, %v3 \n"
946 " vperm %v11, %v13, %v1, %v4 \n"
947 " add %r4, %r4, %r5 \n"
948 " lvx %v13, 0, %r4 \n"
949 " lvx %v1, %r11, %r4 \n"
950 " vavgub %v9, %v11, %v10 \n"
951 " vxor %v8, %v11, %v10 \n"
952 " add %r4, %r4, %r5 \n"
953 " vperm %v10, %v13, %v1, %v3 \n"
954 " vavgub %v12, %v9, %v7 \n"
955 " vperm %v11, %v13, %v1, %v4 \n"
956 " vor %v0, %v8, %v5 \n"
957 " vxor %v13, %v9, %v7 \n"
958 " vxor %v5, %v11, %v10 \n"
959 " vand %v0, %v2, %v0 \n"
960 " vavgub %v7, %v11, %v10 \n"
961 " vor %v1, %v8, %v5 \n"
962 " vand %v0, %v0, %v13 \n"
963 " vand %v1, %v2, %v1 \n"
964 " vxor %v13, %v9, %v7 \n"
965 " vsububm %v12, %v12, %v0 \n"
966 " vand %v1, %v1, %v13 \n"
967 " vavgub %v0, %v9, %v7 \n"
968 " vavgub %v12, %v6, %v12 \n"
969 " lvx %v6, %r9, %r3 \n"
970 " vsububm %v0, %v0, %v1 \n"
971 " stvx %v12, %r5, %r3 \n"
972 " vavgub %v12, %v6, %v0 \n"
973 " add %r3, %r3, %r9 \n"
975 " lvx %v1, %r11, %r4 \n"
976 " lvx %v13, 0, %r4 \n"
977 " lvx %v6, %r5, %r3 \n"
978 " vperm %v10, %v13, %v1, %v3 \n"
979 " stvx %v12, 0, %r3 \n"
980 " vperm %v11, %v13, %v1, %v4 \n"
981 " vxor %v8, %v11, %v10 \n"
982 " vavgub %v9, %v11, %v10 \n"
983 " vor %v0, %v8, %v5 \n"
984 " vxor %v13, %v9, %v7 \n"
985 " vand %v0, %v2, %v0 \n"
986 " vavgub %v1, %v9, %v7 \n"
987 " vand %v0, %v0, %v13 \n"
988 " vsububm %v1, %v1, %v0 \n"
989 " vavgub %v12, %v6, %v1 \n"
990 " stvx %v12, %r5, %r3 \n"
994 static void MC_avg_xy_8_altivec (uint8_t * dest, uint8_t * ref,
995 int stride, int height)
998 " lvsl %v2, 0, %r4 \n"
999 " vspltisb %v19, 1 \n"
1000 " lvsl %v3, %r5, %r4 \n"
1001 " vmrghb %v2, %v2, %v2 \n"
1003 " vmrghb %v3, %v3, %v3 \n"
1004 " lvx %v9, 0, %r4 \n"
1005 " vpkuhum %v2, %v2, %v2 \n"
1006 " lvx %v1, %r9, %r4 \n"
1007 " vpkuhum %v3, %v3, %v3 \n"
1008 " add %r4, %r4, %r5 \n"
1009 " vaddubm %v18, %v2, %v19 \n"
1010 " vperm %v11, %v9, %v1, %v2 \n"
1011 " srawi %r6, %r6, 1 \n"
1012 " vaddubm %v17, %v3, %v19 \n"
1013 " addi %r6, %r6, -1 \n"
1014 " vperm %v10, %v9, %v1, %v18 \n"
1015 " lvx %v4, 0, %r3 \n"
1017 " lvx %v1, %r9, %r4 \n"
1018 " lvx %v9, 0, %r4 \n"
1019 " vavgub %v8, %v11, %v10 \n"
1020 " vxor %v7, %v11, %v10 \n"
1021 " add %r4, %r4, %r5 \n"
1022 " vperm %v10, %v9, %v1, %v17 \n"
1023 " vperm %v11, %v9, %v1, %v3 \n"
1024 " vxor %v5, %v11, %v10 \n"
1025 " vavgub %v6, %v11, %v10 \n"
1026 " vor %v1, %v7, %v5 \n"
1027 " vxor %v13, %v8, %v6 \n"
1028 " vand %v1, %v19, %v1 \n"
1029 " vavgub %v0, %v8, %v6 \n"
1030 " vand %v1, %v1, %v13 \n"
1031 " vsububm %v0, %v0, %v1 \n"
1032 " vavgub %v13, %v4, %v0 \n"
1035 " lvx %v1, %r9, %r4 \n"
1036 " lvx %v9, 0, %r4 \n"
1037 " lvx %v4, %r5, %r3 \n"
1038 " stvewx %v13, 0, %r3 \n"
1039 " vperm %v10, %v9, %v1, %v18 \n"
1040 " vperm %v11, %v9, %v1, %v2 \n"
1042 " stvewx %v13, %r9, %r3 \n"
1043 " vxor %v7, %v11, %v10 \n"
1044 " add %r4, %r4, %r5 \n"
1046 " vavgub %v8, %v11, %v10 \n"
1047 " lvx %v1, %r9, %r4 \n"
1048 " vor %v0, %v7, %v5 \n"
1049 " lvx %v9, 0, %r4 \n"
1050 " vxor %v12, %v8, %v6 \n"
1051 " vand %v0, %v19, %v0 \n"
1052 " add %r3, %r3, %r5 \n"
1053 " vperm %v10, %v9, %v1, %v17 \n"
1054 " vavgub %v13, %v8, %v6 \n"
1056 " vperm %v11, %v9, %v1, %v3 \n"
1057 " vand %v0, %v0, %v12 \n"
1058 " add %r4, %r4, %r5 \n"
1059 " vxor %v5, %v11, %v10 \n"
1060 " vavgub %v6, %v11, %v10 \n"
1061 " vor %v1, %v7, %v5 \n"
1062 " vsububm %v13, %v13, %v0 \n"
1063 " vxor %v0, %v8, %v6 \n"
1064 " vand %v1, %v19, %v1 \n"
1065 " vavgub %v13, %v4, %v13 \n"
1066 " vand %v1, %v1, %v0 \n"
1067 " lvx %v4, %r5, %r3 \n"
1068 " vavgub %v0, %v8, %v6 \n"
1069 " stvewx %v13, 0, %r3 \n"
1070 " stvewx %v13, %r9, %r3 \n"
1071 " vsububm %v0, %v0, %v1 \n"
1072 " add %r3, %r3, %r5 \n"
1073 " vavgub %v13, %v4, %v0 \n"
1076 " lvx %v1, %r9, %r4 \n"
1077 " lvx %v9, 0, %r4 \n"
1078 " lvx %v4, %r5, %r3 \n"
1079 " vperm %v10, %v9, %v1, %v18 \n"
1080 " stvewx %v13, 0, %r3 \n"
1081 " vperm %v11, %v9, %v1, %v2 \n"
1083 " stvewx %v13, %r9, %r3 \n"
1084 " vxor %v7, %v11, %v10 \n"
1085 " add %r3, %r3, %r5 \n"
1086 " vavgub %v8, %v11, %v10 \n"
1087 " vor %v0, %v7, %v5 \n"
1088 " vxor %v13, %v8, %v6 \n"
1089 " vand %v0, %v19, %v0 \n"
1090 " vavgub %v1, %v8, %v6 \n"
1091 " vand %v0, %v0, %v13 \n"
1092 " vsububm %v1, %v1, %v0 \n"
1093 " vavgub %v13, %v4, %v1 \n"
1094 " stvewx %v13, 0, %r3 \n"
1095 " stvewx %v13, %r9, %r3 \n"
1099 #endif /* !CAN_COMPILE_C_ALTIVEC */
1100 #endif /* __BUILD_ALTIVEC_ASM__ */
1102 #if defined(CAN_COMPILE_C_ALTIVEC) || defined(__BUILD_ALTIVEC_ASM__)
1104 #define vector_s16_t vector signed short
1105 #define vector_u16_t vector unsigned short
1106 #define vector_s8_t vector signed char
1107 #define vector_u8_t vector unsigned char
1108 #define vector_s32_t vector signed int
1109 #define vector_u32_t vector unsigned int
1111 void MC_put_o_16_altivec (unsigned char * dest, unsigned char * ref,
1112 int stride, int height)
1114 vector_u8_t perm, ref0, ref1, tmp;
1116 perm = vec_lvsl (0, ref);
1118 height = (height >> 1) - 1;
1120 ref0 = vec_ld (0, ref);
1121 ref1 = vec_ld (15, ref);
1123 tmp = vec_perm (ref0, ref1, perm);
1126 ref0 = vec_ld (0, ref);
1127 ref1 = vec_ld (15, ref);
1129 vec_st (tmp, 0, dest);
1130 tmp = vec_perm (ref0, ref1, perm);
1132 ref0 = vec_ld (0, ref);
1133 ref1 = vec_ld (15, ref);
1135 vec_st (tmp, stride, dest);
1137 tmp = vec_perm (ref0, ref1, perm);
1140 ref0 = vec_ld (0, ref);
1141 ref1 = vec_ld (15, ref);
1142 vec_st (tmp, 0, dest);
1143 tmp = vec_perm (ref0, ref1, perm);
1144 vec_st (tmp, stride, dest);
1147 void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref,
1148 int stride, int height)
1150 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
1152 tmp0 = vec_lvsl (0, ref);
1153 tmp0 = vec_mergeh (tmp0, tmp0);
1154 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1155 tmp1 = vec_lvsl (stride, ref);
1156 tmp1 = vec_mergeh (tmp1, tmp1);
1157 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1159 height = (height >> 1) - 1;
1161 ref0 = vec_ld (0, ref);
1162 ref1 = vec_ld (7, ref);
1164 tmp0 = vec_perm (ref0, ref1, perm0);
1167 ref0 = vec_ld (0, ref);
1168 ref1 = vec_ld (7, ref);
1170 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1171 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1173 tmp1 = vec_perm (ref0, ref1, perm1);
1175 ref0 = vec_ld (0, ref);
1176 ref1 = vec_ld (7, ref);
1178 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1179 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1181 tmp0 = vec_perm (ref0, ref1, perm0);
1184 ref0 = vec_ld (0, ref);
1185 ref1 = vec_ld (7, ref);
1186 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1187 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1189 tmp1 = vec_perm (ref0, ref1, perm1);
1190 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1191 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1194 void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref,
1195 int stride, int height)
1197 vector_u8_t permA, permB, ref0, ref1, tmp;
1199 permA = vec_lvsl (0, ref);
1200 permB = vec_add (permA, vec_splat_u8 (1));
1202 height = (height >> 1) - 1;
1204 ref0 = vec_ld (0, ref);
1205 ref1 = vec_ld (16, ref);
1207 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1208 vec_perm (ref0, ref1, permB));
1211 ref0 = vec_ld (0, ref);
1212 ref1 = vec_ld (16, ref);
1214 vec_st (tmp, 0, dest);
1215 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1216 vec_perm (ref0, ref1, permB));
1218 ref0 = vec_ld (0, ref);
1219 ref1 = vec_ld (16, ref);
1221 vec_st (tmp, stride, dest);
1223 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1224 vec_perm (ref0, ref1, permB));
1227 ref0 = vec_ld (0, ref);
1228 ref1 = vec_ld (16, ref);
1229 vec_st (tmp, 0, dest);
1230 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1231 vec_perm (ref0, ref1, permB));
1232 vec_st (tmp, stride, dest);
1235 void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref,
1236 int stride, int height)
1238 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1240 ones = vec_splat_u8 (1);
1241 tmp0 = vec_lvsl (0, ref);
1242 tmp0 = vec_mergeh (tmp0, tmp0);
1243 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1244 perm0B = vec_add (perm0A, ones);
1245 tmp1 = vec_lvsl (stride, ref);
1246 tmp1 = vec_mergeh (tmp1, tmp1);
1247 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1248 perm1B = vec_add (perm1A, ones);
1250 height = (height >> 1) - 1;
1252 ref0 = vec_ld (0, ref);
1253 ref1 = vec_ld (8, ref);
1255 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1256 vec_perm (ref0, ref1, perm0B));
1259 ref0 = vec_ld (0, ref);
1260 ref1 = vec_ld (8, ref);
1262 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1263 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1265 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1266 vec_perm (ref0, ref1, perm1B));
1268 ref0 = vec_ld (0, ref);
1269 ref1 = vec_ld (8, ref);
1271 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1272 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1274 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1275 vec_perm (ref0, ref1, perm0B));
1278 ref0 = vec_ld (0, ref);
1279 ref1 = vec_ld (8, ref);
1280 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1281 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1283 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1284 vec_perm (ref0, ref1, perm1B));
1285 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1286 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1289 void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref,
1290 int stride, int height)
1292 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
1294 perm = vec_lvsl (0, ref);
1296 height = (height >> 1) - 1;
1298 ref0 = vec_ld (0, ref);
1299 ref1 = vec_ld (15, ref);
1301 tmp0 = vec_perm (ref0, ref1, perm);
1302 ref0 = vec_ld (0, ref);
1303 ref1 = vec_ld (15, ref);
1305 tmp1 = vec_perm (ref0, ref1, perm);
1306 tmp = vec_avg (tmp0, tmp1);
1309 ref0 = vec_ld (0, ref);
1310 ref1 = vec_ld (15, ref);
1312 vec_st (tmp, 0, dest);
1313 tmp0 = vec_perm (ref0, ref1, perm);
1314 tmp = vec_avg (tmp0, tmp1);
1316 ref0 = vec_ld (0, ref);
1317 ref1 = vec_ld (15, ref);
1319 vec_st (tmp, stride, dest);
1321 tmp1 = vec_perm (ref0, ref1, perm);
1322 tmp = vec_avg (tmp0, tmp1);
1325 ref0 = vec_ld (0, ref);
1326 ref1 = vec_ld (15, ref);
1327 vec_st (tmp, 0, dest);
1328 tmp0 = vec_perm (ref0, ref1, perm);
1329 tmp = vec_avg (tmp0, tmp1);
1330 vec_st (tmp, stride, dest);
1333 void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref,
1334 int stride, int height)
1336 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
1338 tmp0 = vec_lvsl (0, ref);
1339 tmp0 = vec_mergeh (tmp0, tmp0);
1340 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1341 tmp1 = vec_lvsl (stride, ref);
1342 tmp1 = vec_mergeh (tmp1, tmp1);
1343 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1345 height = (height >> 1) - 1;
1347 ref0 = vec_ld (0, ref);
1348 ref1 = vec_ld (7, ref);
1350 tmp0 = vec_perm (ref0, ref1, perm0);
1351 ref0 = vec_ld (0, ref);
1352 ref1 = vec_ld (7, ref);
1354 tmp1 = vec_perm (ref0, ref1, perm1);
1355 tmp = vec_avg (tmp0, tmp1);
1358 ref0 = vec_ld (0, ref);
1359 ref1 = vec_ld (7, ref);
1361 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1362 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1364 tmp0 = vec_perm (ref0, ref1, perm0);
1365 tmp = vec_avg (tmp0, tmp1);
1367 ref0 = vec_ld (0, ref);
1368 ref1 = vec_ld (7, ref);
1370 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1371 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1373 tmp1 = vec_perm (ref0, ref1, perm1);
1374 tmp = vec_avg (tmp0, tmp1);
1377 ref0 = vec_ld (0, ref);
1378 ref1 = vec_ld (7, ref);
1379 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1380 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1382 tmp0 = vec_perm (ref0, ref1, perm0);
1383 tmp = vec_avg (tmp0, tmp1);
1384 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1385 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1388 void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1389 int stride, int height)
1391 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1394 ones = vec_splat_u8 (1);
1395 permA = vec_lvsl (0, ref);
1396 permB = vec_add (permA, ones);
1398 height = (height >> 1) - 1;
1400 ref0 = vec_ld (0, ref);
1401 ref1 = vec_ld (16, ref);
1403 A = vec_perm (ref0, ref1, permA);
1404 B = vec_perm (ref0, ref1, permB);
1405 avg0 = vec_avg (A, B);
1406 xor0 = vec_xor (A, B);
1408 ref0 = vec_ld (0, ref);
1409 ref1 = vec_ld (16, ref);
1411 A = vec_perm (ref0, ref1, permA);
1412 B = vec_perm (ref0, ref1, permB);
1413 avg1 = vec_avg (A, B);
1414 xor1 = vec_xor (A, B);
1415 tmp = vec_sub (vec_avg (avg0, avg1),
1416 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1417 vec_xor (avg0, avg1)));
1420 ref0 = vec_ld (0, ref);
1421 ref1 = vec_ld (16, ref);
1423 vec_st (tmp, 0, dest);
1424 A = vec_perm (ref0, ref1, permA);
1425 B = vec_perm (ref0, ref1, permB);
1426 avg0 = vec_avg (A, B);
1427 xor0 = vec_xor (A, B);
1428 tmp = vec_sub (vec_avg (avg0, avg1),
1429 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1430 vec_xor (avg0, avg1)));
1432 ref0 = vec_ld (0, ref);
1433 ref1 = vec_ld (16, ref);
1435 vec_st (tmp, stride, dest);
1437 A = vec_perm (ref0, ref1, permA);
1438 B = vec_perm (ref0, ref1, permB);
1439 avg1 = vec_avg (A, B);
1440 xor1 = vec_xor (A, B);
1441 tmp = vec_sub (vec_avg (avg0, avg1),
1442 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1443 vec_xor (avg0, avg1)));
1446 ref0 = vec_ld (0, ref);
1447 ref1 = vec_ld (16, ref);
1448 vec_st (tmp, 0, dest);
1449 A = vec_perm (ref0, ref1, permA);
1450 B = vec_perm (ref0, ref1, permB);
1451 avg0 = vec_avg (A, B);
1452 xor0 = vec_xor (A, B);
1453 tmp = vec_sub (vec_avg (avg0, avg1),
1454 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1455 vec_xor (avg0, avg1)));
1456 vec_st (tmp, stride, dest);
1459 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1460 int stride, int height)
1462 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1463 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
1465 ones = vec_splat_u8 (1);
1466 perm0A = vec_lvsl (0, ref);
1467 perm0A = vec_mergeh (perm0A, perm0A);
1468 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1469 perm0B = vec_add (perm0A, ones);
1470 perm1A = vec_lvsl (stride, ref);
1471 perm1A = vec_mergeh (perm1A, perm1A);
1472 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1473 perm1B = vec_add (perm1A, ones);
1475 height = (height >> 1) - 1;
1477 ref0 = vec_ld (0, ref);
1478 ref1 = vec_ld (8, ref);
1480 A = vec_perm (ref0, ref1, perm0A);
1481 B = vec_perm (ref0, ref1, perm0B);
1482 avg0 = vec_avg (A, B);
1483 xor0 = vec_xor (A, B);
1485 ref0 = vec_ld (0, ref);
1486 ref1 = vec_ld (8, ref);
1488 A = vec_perm (ref0, ref1, perm1A);
1489 B = vec_perm (ref0, ref1, perm1B);
1490 avg1 = vec_avg (A, B);
1491 xor1 = vec_xor (A, B);
1492 tmp = vec_sub (vec_avg (avg0, avg1),
1493 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1494 vec_xor (avg0, avg1)));
1497 ref0 = vec_ld (0, ref);
1498 ref1 = vec_ld (8, ref);
1500 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1501 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1503 A = vec_perm (ref0, ref1, perm0A);
1504 B = vec_perm (ref0, ref1, perm0B);
1505 avg0 = vec_avg (A, B);
1506 xor0 = vec_xor (A, B);
1507 tmp = vec_sub (vec_avg (avg0, avg1),
1508 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1509 vec_xor (avg0, avg1)));
1511 ref0 = vec_ld (0, ref);
1512 ref1 = vec_ld (8, ref);
1514 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1515 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1517 A = vec_perm (ref0, ref1, perm1A);
1518 B = vec_perm (ref0, ref1, perm1B);
1519 avg1 = vec_avg (A, B);
1520 xor1 = vec_xor (A, B);
1521 tmp = vec_sub (vec_avg (avg0, avg1),
1522 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1523 vec_xor (avg0, avg1)));
1526 ref0 = vec_ld (0, ref);
1527 ref1 = vec_ld (8, ref);
1528 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1529 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1531 A = vec_perm (ref0, ref1, perm0A);
1532 B = vec_perm (ref0, ref1, perm0B);
1533 avg0 = vec_avg (A, B);
1534 xor0 = vec_xor (A, B);
1535 tmp = vec_sub (vec_avg (avg0, avg1),
1536 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1537 vec_xor (avg0, avg1)));
1538 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1539 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1543 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1544 int stride, int height)
1546 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
1547 vector_u16_t splat2, temp;
1549 ones = vec_splat_u8 (1);
1550 permA = vec_lvsl (0, ref);
1551 permB = vec_add (permA, ones);
1553 zero = vec_splat_u8 (0);
1554 splat2 = vec_splat_u16 (2);
1557 ref0 = vec_ld (0, ref);
1558 ref1 = vec_ld (8, ref);
1560 A = vec_perm (ref0, ref1, permA);
1561 B = vec_perm (ref0, ref1, permB);
1562 ref0 = vec_ld (0, ref);
1563 ref1 = vec_ld (8, ref);
1564 C = vec_perm (ref0, ref1, permA);
1565 D = vec_perm (ref0, ref1, permB);
1567 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
1568 (vector_u16_t)vec_mergeh (zero, B)),
1569 vec_add ((vector_u16_t)vec_mergeh (zero, C),
1570 (vector_u16_t)vec_mergeh (zero, D)));
1571 temp = vec_sr (vec_add (temp, splat2), splat2);
1572 tmp = vec_pack (temp, temp);
1574 vec_st (tmp, 0, dest);
1576 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1577 vec_perm (ref0, ref1, permB));
1582 void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref,
1583 int stride, int height)
1585 vector_u8_t perm, ref0, ref1, tmp, prev;
1587 perm = vec_lvsl (0, ref);
1589 height = (height >> 1) - 1;
1591 ref0 = vec_ld (0, ref);
1592 ref1 = vec_ld (15, ref);
1594 prev = vec_ld (0, dest);
1595 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1598 ref0 = vec_ld (0, ref);
1599 ref1 = vec_ld (15, ref);
1601 prev = vec_ld (stride, dest);
1602 vec_st (tmp, 0, dest);
1603 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1605 ref0 = vec_ld (0, ref);
1606 ref1 = vec_ld (15, ref);
1608 prev = vec_ld (2*stride, dest);
1609 vec_st (tmp, stride, dest);
1611 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1614 ref0 = vec_ld (0, ref);
1615 ref1 = vec_ld (15, ref);
1616 prev = vec_ld (stride, dest);
1617 vec_st (tmp, 0, dest);
1618 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1619 vec_st (tmp, stride, dest);
1622 void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref,
1623 int stride, int height)
1625 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
1627 tmp0 = vec_lvsl (0, ref);
1628 tmp0 = vec_mergeh (tmp0, tmp0);
1629 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1630 tmp1 = vec_lvsl (stride, ref);
1631 tmp1 = vec_mergeh (tmp1, tmp1);
1632 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1634 height = (height >> 1) - 1;
1636 ref0 = vec_ld (0, ref);
1637 ref1 = vec_ld (7, ref);
1639 prev = vec_ld (0, dest);
1640 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1643 ref0 = vec_ld (0, ref);
1644 ref1 = vec_ld (7, ref);
1646 prev = vec_ld (stride, dest);
1647 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1648 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1650 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1652 ref0 = vec_ld (0, ref);
1653 ref1 = vec_ld (7, ref);
1655 prev = vec_ld (stride, dest);
1656 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1657 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1659 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1662 ref0 = vec_ld (0, ref);
1663 ref1 = vec_ld (7, ref);
1664 prev = vec_ld (stride, dest);
1665 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1666 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1668 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1669 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1670 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1673 void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref,
1674 int stride, int height)
1676 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
1678 permA = vec_lvsl (0, ref);
1679 permB = vec_add (permA, vec_splat_u8 (1));
1681 height = (height >> 1) - 1;
1683 ref0 = vec_ld (0, ref);
1684 ref1 = vec_ld (16, ref);
1685 prev = vec_ld (0, dest);
1687 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1688 vec_perm (ref0, ref1, permB)));
1691 ref0 = vec_ld (0, ref);
1692 ref1 = vec_ld (16, ref);
1694 prev = vec_ld (stride, dest);
1695 vec_st (tmp, 0, dest);
1696 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1697 vec_perm (ref0, ref1, permB)));
1699 ref0 = vec_ld (0, ref);
1700 ref1 = vec_ld (16, ref);
1702 prev = vec_ld (2*stride, dest);
1703 vec_st (tmp, stride, dest);
1705 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1706 vec_perm (ref0, ref1, permB)));
1709 ref0 = vec_ld (0, ref);
1710 ref1 = vec_ld (16, ref);
1711 prev = vec_ld (stride, dest);
1712 vec_st (tmp, 0, dest);
1713 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1714 vec_perm (ref0, ref1, permB)));
1715 vec_st (tmp, stride, dest);
1718 void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref,
1719 int stride, int height)
1721 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1724 ones = vec_splat_u8 (1);
1725 tmp0 = vec_lvsl (0, ref);
1726 tmp0 = vec_mergeh (tmp0, tmp0);
1727 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1728 perm0B = vec_add (perm0A, ones);
1729 tmp1 = vec_lvsl (stride, ref);
1730 tmp1 = vec_mergeh (tmp1, tmp1);
1731 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1732 perm1B = vec_add (perm1A, ones);
1734 height = (height >> 1) - 1;
1736 ref0 = vec_ld (0, ref);
1737 ref1 = vec_ld (8, ref);
1738 prev = vec_ld (0, dest);
1740 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1741 vec_perm (ref0, ref1, perm0B)));
1744 ref0 = vec_ld (0, ref);
1745 ref1 = vec_ld (8, ref);
1747 prev = vec_ld (stride, dest);
1748 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1749 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1751 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1752 vec_perm (ref0, ref1, perm1B)));
1754 ref0 = vec_ld (0, ref);
1755 ref1 = vec_ld (8, ref);
1757 prev = vec_ld (stride, dest);
1758 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1759 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1761 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1762 vec_perm (ref0, ref1, perm0B)));
1765 ref0 = vec_ld (0, ref);
1766 ref1 = vec_ld (8, ref);
1767 prev = vec_ld (stride, dest);
1768 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1769 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1771 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1772 vec_perm (ref0, ref1, perm1B)));
1773 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1774 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1777 void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref,
1778 int stride, int height)
1780 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
1782 perm = vec_lvsl (0, ref);
1784 height = (height >> 1) - 1;
1786 ref0 = vec_ld (0, ref);
1787 ref1 = vec_ld (15, ref);
1789 tmp0 = vec_perm (ref0, ref1, perm);
1790 ref0 = vec_ld (0, ref);
1791 ref1 = vec_ld (15, ref);
1793 prev = vec_ld (0, dest);
1794 tmp1 = vec_perm (ref0, ref1, perm);
1795 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1798 ref0 = vec_ld (0, ref);
1799 ref1 = vec_ld (15, ref);
1801 prev = vec_ld (stride, dest);
1802 vec_st (tmp, 0, dest);
1803 tmp0 = vec_perm (ref0, ref1, perm);
1804 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1806 ref0 = vec_ld (0, ref);
1807 ref1 = vec_ld (15, ref);
1809 prev = vec_ld (2*stride, dest);
1810 vec_st (tmp, stride, dest);
1812 tmp1 = vec_perm (ref0, ref1, perm);
1813 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1816 ref0 = vec_ld (0, ref);
1817 ref1 = vec_ld (15, ref);
1818 prev = vec_ld (stride, dest);
1819 vec_st (tmp, 0, dest);
1820 tmp0 = vec_perm (ref0, ref1, perm);
1821 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1822 vec_st (tmp, stride, dest);
1825 void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref,
1826 int stride, int height)
1828 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
1830 tmp0 = vec_lvsl (0, ref);
1831 tmp0 = vec_mergeh (tmp0, tmp0);
1832 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1833 tmp1 = vec_lvsl (stride, ref);
1834 tmp1 = vec_mergeh (tmp1, tmp1);
1835 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1837 height = (height >> 1) - 1;
1839 ref0 = vec_ld (0, ref);
1840 ref1 = vec_ld (7, ref);
1842 tmp0 = vec_perm (ref0, ref1, perm0);
1843 ref0 = vec_ld (0, ref);
1844 ref1 = vec_ld (7, ref);
1846 prev = vec_ld (0, dest);
1847 tmp1 = vec_perm (ref0, ref1, perm1);
1848 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1851 ref0 = vec_ld (0, ref);
1852 ref1 = vec_ld (7, ref);
1854 prev = vec_ld (stride, dest);
1855 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1856 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1858 tmp0 = vec_perm (ref0, ref1, perm0);
1859 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1861 ref0 = vec_ld (0, ref);
1862 ref1 = vec_ld (7, ref);
1864 prev = vec_ld (stride, dest);
1865 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1866 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1868 tmp1 = vec_perm (ref0, ref1, perm1);
1869 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1872 ref0 = vec_ld (0, ref);
1873 ref1 = vec_ld (7, ref);
1874 prev = vec_ld (stride, dest);
1875 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1876 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1878 tmp0 = vec_perm (ref0, ref1, perm0);
1879 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1880 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1881 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1884 void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1885 int stride, int height)
1887 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1888 vector_u8_t ones, prev;
1890 ones = vec_splat_u8 (1);
1891 permA = vec_lvsl (0, ref);
1892 permB = vec_add (permA, ones);
1894 height = (height >> 1) - 1;
1896 ref0 = vec_ld (0, ref);
1897 ref1 = vec_ld (16, ref);
1899 A = vec_perm (ref0, ref1, permA);
1900 B = vec_perm (ref0, ref1, permB);
1901 avg0 = vec_avg (A, B);
1902 xor0 = vec_xor (A, B);
1904 ref0 = vec_ld (0, ref);
1905 ref1 = vec_ld (16, ref);
1907 prev = vec_ld (0, dest);
1908 A = vec_perm (ref0, ref1, permA);
1909 B = vec_perm (ref0, ref1, permB);
1910 avg1 = vec_avg (A, B);
1911 xor1 = vec_xor (A, B);
1912 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1913 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1914 vec_xor (avg0, avg1))));
1917 ref0 = vec_ld (0, ref);
1918 ref1 = vec_ld (16, ref);
1920 prev = vec_ld (stride, dest);
1921 vec_st (tmp, 0, dest);
1922 A = vec_perm (ref0, ref1, permA);
1923 B = vec_perm (ref0, ref1, permB);
1924 avg0 = vec_avg (A, B);
1925 xor0 = vec_xor (A, B);
1926 tmp = vec_avg (prev,
1927 vec_sub (vec_avg (avg0, avg1),
1928 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1929 vec_xor (avg0, avg1))));
1931 ref0 = vec_ld (0, ref);
1932 ref1 = vec_ld (16, ref);
1934 prev = vec_ld (2*stride, dest);
1935 vec_st (tmp, stride, dest);
1937 A = vec_perm (ref0, ref1, permA);
1938 B = vec_perm (ref0, ref1, permB);
1939 avg1 = vec_avg (A, B);
1940 xor1 = vec_xor (A, B);
1941 tmp = vec_avg (prev,
1942 vec_sub (vec_avg (avg0, avg1),
1943 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1944 vec_xor (avg0, avg1))));
1947 ref0 = vec_ld (0, ref);
1948 ref1 = vec_ld (16, ref);
1949 prev = vec_ld (stride, dest);
1950 vec_st (tmp, 0, dest);
1951 A = vec_perm (ref0, ref1, permA);
1952 B = vec_perm (ref0, ref1, permB);
1953 avg0 = vec_avg (A, B);
1954 xor0 = vec_xor (A, B);
1955 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1956 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1957 vec_xor (avg0, avg1))));
1958 vec_st (tmp, stride, dest);
1961 void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1962 int stride, int height)
1964 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1965 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
1967 ones = vec_splat_u8 (1);
1968 perm0A = vec_lvsl (0, ref);
1969 perm0A = vec_mergeh (perm0A, perm0A);
1970 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1971 perm0B = vec_add (perm0A, ones);
1972 perm1A = vec_lvsl (stride, ref);
1973 perm1A = vec_mergeh (perm1A, perm1A);
1974 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1975 perm1B = vec_add (perm1A, ones);
1977 height = (height >> 1) - 1;
1979 ref0 = vec_ld (0, ref);
1980 ref1 = vec_ld (8, ref);
1982 A = vec_perm (ref0, ref1, perm0A);
1983 B = vec_perm (ref0, ref1, perm0B);
1984 avg0 = vec_avg (A, B);
1985 xor0 = vec_xor (A, B);
1987 ref0 = vec_ld (0, ref);
1988 ref1 = vec_ld (8, ref);
1990 prev = vec_ld (0, dest);
1991 A = vec_perm (ref0, ref1, perm1A);
1992 B = vec_perm (ref0, ref1, perm1B);
1993 avg1 = vec_avg (A, B);
1994 xor1 = vec_xor (A, B);
1995 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1996 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1997 vec_xor (avg0, avg1))));
2000 ref0 = vec_ld (0, ref);
2001 ref1 = vec_ld (8, ref);
2003 prev = vec_ld (stride, dest);
2004 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2005 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2007 A = vec_perm (ref0, ref1, perm0A);
2008 B = vec_perm (ref0, ref1, perm0B);
2009 avg0 = vec_avg (A, B);
2010 xor0 = vec_xor (A, B);
2011 tmp = vec_avg (prev,
2012 vec_sub (vec_avg (avg0, avg1),
2013 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2014 vec_xor (avg0, avg1))));
2016 ref0 = vec_ld (0, ref);
2017 ref1 = vec_ld (8, ref);
2019 prev = vec_ld (stride, dest);
2020 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2021 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2023 A = vec_perm (ref0, ref1, perm1A);
2024 B = vec_perm (ref0, ref1, perm1B);
2025 avg1 = vec_avg (A, B);
2026 xor1 = vec_xor (A, B);
2027 tmp = vec_avg (prev,
2028 vec_sub (vec_avg (avg0, avg1),
2029 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2030 vec_xor (avg0, avg1))));
2033 ref0 = vec_ld (0, ref);
2034 ref1 = vec_ld (8, ref);
2035 prev = vec_ld (stride, dest);
2036 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2037 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2039 A = vec_perm (ref0, ref1, perm0A);
2040 B = vec_perm (ref0, ref1, perm0B);
2041 avg0 = vec_avg (A, B);
2042 xor0 = vec_xor (A, B);
2043 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
2044 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2045 vec_xor (avg0, avg1))));
2046 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2047 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2050 #endif /* CAN_COMPILE_C_ALTIVEC || __BUILD_ALTIVEC_ASM__ */
2051 #ifndef __BUILD_ALTIVEC_ASM__
2053 /*****************************************************************************
2054 * Functions exported as capabilities. They are declared as static so that
2055 * we don't pollute the namespace too much.
2056 *****************************************************************************/
2057 static void motion_getfunctions( function_list_t * p_function_list )
2059 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
2063 /* Copying functions */
2066 MC_put_o_16_altivec, MC_put_x_16_altivec, MC_put_y_16_altivec, MC_put_xy_16_altivec
2070 MC_put_o_8_altivec, MC_put_x_8_altivec, MC_put_y_8_altivec, MC_put_xy_8_altivec
2074 /* Averaging functions */
2077 MC_avg_o_16_altivec, MC_avg_x_16_altivec, MC_avg_y_16_altivec, MC_avg_xy_16_altivec
2081 MC_avg_o_8_altivec, MC_avg_x_8_altivec, MC_avg_y_8_altivec, MC_avg_xy_8_altivec
2086 #define list p_function_list->functions.motion
2087 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );
2093 #endif /* __BUILD_ALTIVEC_ASM__ */