1 /*****************************************************************************
2 * motionaltivec.c : AltiVec motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionaltivec.c,v 1.14 2002/06/01 16:45:34 sam Exp $
7 * Authors: Michel Lespinasse <walken@zoy.org>
8 * Paul Mackerras <paulus@linuxcare.com.au>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #ifndef __BUILD_ALTIVEC_ASM__
27 /*****************************************************************************
29 *****************************************************************************/
32 #include <stdlib.h> /* malloc(), free() */
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* int16_t .. */
38 /*****************************************************************************
39 * Local and extern prototypes.
40 *****************************************************************************/
41 static void motion_getfunctions( function_list_t * p_function_list );
43 /*****************************************************************************
44 * Build configuration tree.
45 *****************************************************************************/
50 SET_DESCRIPTION( _("AltiVec motion compensation module") )
51 ADD_CAPABILITY( MOTION, 150 )
52 ADD_REQUIREMENT( ALTIVEC )
53 ADD_SHORTCUT( "altivec" )
57 motion_getfunctions( &p_module->p_functions->motion );
60 MODULE_DEACTIVATE_START
61 MODULE_DEACTIVATE_STOP
63 /*****************************************************************************
64 * Motion compensation in AltiVec
65 *****************************************************************************/
67 #ifndef CAN_COMPILE_C_ALTIVEC
70 * The asm code is generated with:
72 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S
73 * motion_comp_altivec.c
75 * sed 's/.L/._L/g' motion_comp_altivec.s |
76 * awk '{args=""; len=split ($2, arg, ",");
77 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
78 * args = args sprintf ("%-6s", a) }
79 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
83 static void MC_put_o_16_altivec (uint8_t * dest, uint8_t * ref,
84 int stride, int height)
87 " srawi %r6, %r6, 1 \n"
89 " addi %r6, %r6, -1 \n"
90 " lvsl %v12, 0, %r4 \n"
93 " lvx %v0, %r9, %r4 \n"
94 " add %r0, %r5, %r5 \n"
95 " vperm %v13, %v1, %v0, %v12 \n"
96 " add %r4, %r4, %r5 \n"
100 " lvx %v0, %r9, %r4 \n"
101 " stvx %v13, 0, %r3 \n"
102 " vperm %v13, %v1, %v0, %v12 \n"
103 " add %r4, %r4, %r5 \n"
104 " lvx %v1, 0, %r4 \n"
105 " lvx %v0, %r9, %r4 \n"
106 " stvx %v13, %r5, %r3 \n"
107 " vperm %v13, %v1, %v0, %v12 \n"
108 " add %r4, %r4, %r5 \n"
109 " add %r3, %r3, %r0 \n"
111 " lvx %v0, %r9, %r4 \n"
112 " lvx %v1, 0, %r4 \n"
113 " stvx %v13, 0, %r3 \n"
114 " vperm %v13, %v1, %v0, %v12 \n"
115 " stvx %v13, %r5, %r3 \n"
119 static void MC_put_o_8_altivec (uint8_t * dest, uint8_t * ref,
120 int stride, int height)
123 " lvsl %v12, 0, %r4 \n"
124 " lvsl %v1, %r5, %r4 \n"
125 " vmrghb %v12, %v12, %v12 \n"
126 " srawi %r6, %r6, 1 \n"
128 " vmrghb %v1, %v1, %v1 \n"
129 " addi %r6, %r6, -1 \n"
130 " vpkuhum %v10, %v12, %v12 \n"
131 " lvx %v13, 0, %r4 \n"
133 " vpkuhum %v11, %v1, %v1 \n"
134 " lvx %v0, %r9, %r4 \n"
135 " add %r4, %r4, %r5 \n"
136 " vperm %v12, %v13, %v0, %v10 \n"
139 " lvx %v0, %r9, %r4 \n"
140 " lvx %v13, 0, %r4 \n"
141 " stvewx %v12, 0, %r3 \n"
143 " vperm %v1, %v13, %v0, %v11 \n"
144 " stvewx %v12, %r9, %r3 \n"
145 " add %r4, %r4, %r5 \n"
147 " lvx %v0, %r9, %r4 \n"
148 " lvx %v13, 0, %r4 \n"
149 " add %r3, %r3, %r5 \n"
150 " stvewx %v1, 0, %r3 \n"
151 " vperm %v12, %v13, %v0, %v10 \n"
153 " stvewx %v1, %r9, %r3 \n"
154 " add %r4, %r4, %r5 \n"
155 " add %r3, %r3, %r5 \n"
158 " lvx %v0, %r9, %r4 \n"
159 " lvx %v13, 0, %r4 \n"
160 " stvewx %v12, 0, %r3 \n"
162 " vperm %v1, %v13, %v0, %v11 \n"
163 " stvewx %v12, %r9, %r3 \n"
164 " add %r3, %r3, %r5 \n"
165 " stvewx %v1, 0, %r3 \n"
166 " stvewx %v1, %r9, %r3 \n"
170 static void MC_put_x_16_altivec (uint8_t * dest, uint8_t * ref,
171 int stride, int height)
174 " lvsl %v11, 0, %r4 \n"
175 " vspltisb %v0, 1 \n"
177 " lvx %v12, 0, %r4 \n"
178 " vaddubm %v10, %v11, %v0 \n"
179 " lvx %v13, %r9, %r4 \n"
180 " srawi %r6, %r6, 1 \n"
181 " addi %r6, %r6, -1 \n"
182 " vperm %v1, %v12, %v13, %v10 \n"
183 " vperm %v0, %v12, %v13, %v11 \n"
185 " add %r0, %r5, %r5 \n"
186 " add %r4, %r4, %r5 \n"
187 " vavgub %v0, %v0, %v1 \n"
190 " lvx %v12, 0, %r4 \n"
191 " lvx %v13, %r9, %r4 \n"
192 " stvx %v0, 0, %r3 \n"
193 " vperm %v1, %v12, %v13, %v10 \n"
194 " add %r4, %r4, %r5 \n"
195 " vperm %v0, %v12, %v13, %v11 \n"
196 " lvx %v12, 0, %r4 \n"
197 " lvx %v13, %r9, %r4 \n"
198 " vavgub %v0, %v0, %v1 \n"
199 " stvx %v0, %r5, %r3 \n"
200 " vperm %v1, %v12, %v13, %v10 \n"
201 " add %r4, %r4, %r5 \n"
202 " vperm %v0, %v12, %v13, %v11 \n"
203 " add %r3, %r3, %r0 \n"
204 " vavgub %v0, %v0, %v1 \n"
206 " lvx %v13, %r9, %r4 \n"
207 " lvx %v12, 0, %r4 \n"
208 " stvx %v0, 0, %r3 \n"
209 " vperm %v1, %v12, %v13, %v10 \n"
210 " vperm %v0, %v12, %v13, %v11 \n"
211 " vavgub %v0, %v0, %v1 \n"
212 " stvx %v0, %r5, %r3 \n"
216 static void MC_put_x_8_altivec (uint8_t * dest, uint8_t * ref,
217 int stride, int height)
220 " lvsl %v0, 0, %r4 \n"
221 " vspltisb %v13, 1 \n"
222 " lvsl %v10, %r5, %r4 \n"
223 " vmrghb %v0, %v0, %v0 \n"
225 " lvx %v11, 0, %r4 \n"
226 " vmrghb %v10, %v10, %v10 \n"
227 " vpkuhum %v8, %v0, %v0 \n"
228 " lvx %v12, %r9, %r4 \n"
229 " srawi %r6, %r6, 1 \n"
230 " vpkuhum %v9, %v10, %v10 \n"
231 " vaddubm %v7, %v8, %v13 \n"
232 " addi %r6, %r6, -1 \n"
233 " vperm %v1, %v11, %v12, %v8 \n"
235 " vaddubm %v13, %v9, %v13 \n"
236 " add %r4, %r4, %r5 \n"
237 " vperm %v0, %v11, %v12, %v7 \n"
238 " vavgub %v0, %v1, %v0 \n"
241 " lvx %v12, %r9, %r4 \n"
242 " lvx %v11, 0, %r4 \n"
243 " stvewx %v0, 0, %r3 \n"
245 " vperm %v1, %v11, %v12, %v13 \n"
246 " stvewx %v0, %r9, %r3 \n"
247 " vperm %v0, %v11, %v12, %v9 \n"
248 " add %r4, %r4, %r5 \n"
250 " lvx %v12, %r9, %r4 \n"
251 " vavgub %v10, %v0, %v1 \n"
252 " lvx %v11, 0, %r4 \n"
253 " add %r3, %r3, %r5 \n"
254 " stvewx %v10, 0, %r3 \n"
255 " vperm %v1, %v11, %v12, %v7 \n"
256 " vperm %v0, %v11, %v12, %v8 \n"
258 " stvewx %v10, %r9, %r3 \n"
259 " add %r4, %r4, %r5 \n"
260 " vavgub %v0, %v0, %v1 \n"
261 " add %r3, %r3, %r5 \n"
264 " lvx %v12, %r9, %r4 \n"
265 " lvx %v11, 0, %r4 \n"
266 " stvewx %v0, 0, %r3 \n"
268 " vperm %v1, %v11, %v12, %v13 \n"
269 " stvewx %v0, %r9, %r3 \n"
270 " vperm %v0, %v11, %v12, %v9 \n"
271 " add %r3, %r3, %r5 \n"
272 " vavgub %v10, %v0, %v1 \n"
273 " stvewx %v10, 0, %r3 \n"
274 " stvewx %v10, %r9, %r3 \n"
278 static void MC_put_y_16_altivec (uint8_t * dest, uint8_t * ref,
279 int stride, int height)
283 " lvsl %v10, 0, %r4 \n"
284 " lvx %v13, 0, %r4 \n"
285 " lvx %v1, %r9, %r4 \n"
286 " add %r4, %r4, %r5 \n"
287 " vperm %v12, %v13, %v1, %v10 \n"
288 " srawi %r6, %r6, 1 \n"
289 " lvx %v13, 0, %r4 \n"
290 " lvx %v1, %r9, %r4 \n"
291 " addi %r6, %r6, -1 \n"
292 " vperm %v11, %v13, %v1, %v10 \n"
294 " add %r0, %r5, %r5 \n"
295 " add %r4, %r4, %r5 \n"
296 " vavgub %v0, %v12, %v11 \n"
299 " lvx %v13, 0, %r4 \n"
300 " lvx %v1, %r9, %r4 \n"
301 " stvx %v0, 0, %r3 \n"
302 " vperm %v12, %v13, %v1, %v10 \n"
303 " add %r4, %r4, %r5 \n"
304 " lvx %v13, 0, %r4 \n"
305 " lvx %v1, %r9, %r4 \n"
306 " vavgub %v0, %v12, %v11 \n"
307 " stvx %v0, %r5, %r3 \n"
308 " vperm %v11, %v13, %v1, %v10 \n"
309 " add %r4, %r4, %r5 \n"
310 " add %r3, %r3, %r0 \n"
311 " vavgub %v0, %v12, %v11 \n"
313 " lvx %v1, %r9, %r4 \n"
314 " lvx %v13, 0, %r4 \n"
315 " stvx %v0, 0, %r3 \n"
316 " vperm %v12, %v13, %v1, %v10 \n"
317 " vavgub %v0, %v12, %v11 \n"
318 " stvx %v0, %r5, %r3 \n"
322 static void MC_put_y_8_altivec (uint8_t * dest, uint8_t * ref,
323 int stride, int height)
326 " lvsl %v13, 0, %r4 \n"
327 " lvsl %v11, %r5, %r4 \n"
328 " vmrghb %v13, %v13, %v13 \n"
330 " lvx %v12, 0, %r4 \n"
331 " vmrghb %v11, %v11, %v11 \n"
332 " lvx %v1, %r9, %r4 \n"
333 " vpkuhum %v9, %v13, %v13 \n"
334 " add %r4, %r4, %r5 \n"
335 " vpkuhum %v10, %v11, %v11 \n"
336 " vperm %v13, %v12, %v1, %v9 \n"
337 " srawi %r6, %r6, 1 \n"
338 " lvx %v12, 0, %r4 \n"
339 " lvx %v1, %r9, %r4 \n"
340 " addi %r6, %r6, -1 \n"
341 " vperm %v11, %v12, %v1, %v10 \n"
343 " add %r4, %r4, %r5 \n"
344 " vavgub %v0, %v13, %v11 \n"
347 " lvx %v1, %r9, %r4 \n"
348 " lvx %v12, 0, %r4 \n"
349 " stvewx %v0, 0, %r3 \n"
351 " vperm %v13, %v12, %v1, %v9 \n"
352 " stvewx %v0, %r9, %r3 \n"
353 " add %r4, %r4, %r5 \n"
354 " vavgub %v0, %v13, %v11 \n"
356 " lvx %v1, %r9, %r4 \n"
357 " lvx %v12, 0, %r4 \n"
358 " add %r3, %r3, %r5 \n"
359 " stvewx %v0, 0, %r3 \n"
360 " vperm %v11, %v12, %v1, %v10 \n"
362 " stvewx %v0, %r9, %r3 \n"
363 " vavgub %v0, %v13, %v11 \n"
364 " add %r4, %r4, %r5 \n"
365 " add %r3, %r3, %r5 \n"
368 " lvx %v1, %r9, %r4 \n"
369 " lvx %v12, 0, %r4 \n"
370 " stvewx %v0, 0, %r3 \n"
372 " vperm %v13, %v12, %v1, %v9 \n"
373 " stvewx %v0, %r9, %r3 \n"
374 " add %r3, %r3, %r5 \n"
375 " vavgub %v0, %v13, %v11 \n"
376 " stvewx %v0, 0, %r3 \n"
377 " stvewx %v0, %r9, %r3 \n"
381 static void MC_put_xy_16_altivec (uint8_t * dest, uint8_t * ref,
382 int stride, int height)
385 " lvsl %v5, 0, %r4 \n"
386 " vspltisb %v3, 1 \n"
388 " lvx %v1, 0, %r4 \n"
389 " vaddubm %v4, %v5, %v3 \n"
390 " lvx %v0, %r9, %r4 \n"
391 " add %r4, %r4, %r5 \n"
392 " vperm %v10, %v1, %v0, %v4 \n"
393 " srawi %r6, %r6, 1 \n"
394 " vperm %v11, %v1, %v0, %v5 \n"
395 " addi %r6, %r6, -1 \n"
396 " lvx %v1, 0, %r4 \n"
398 " lvx %v0, %r9, %r4 \n"
399 " vavgub %v9, %v11, %v10 \n"
400 " vxor %v8, %v11, %v10 \n"
401 " add %r0, %r5, %r5 \n"
402 " vperm %v10, %v1, %v0, %v4 \n"
403 " add %r4, %r4, %r5 \n"
404 " vperm %v11, %v1, %v0, %v5 \n"
405 " vxor %v6, %v11, %v10 \n"
406 " vavgub %v7, %v11, %v10 \n"
407 " vor %v0, %v8, %v6 \n"
408 " vxor %v13, %v9, %v7 \n"
409 " vand %v0, %v3, %v0 \n"
410 " vavgub %v1, %v9, %v7 \n"
411 " vand %v0, %v0, %v13 \n"
412 " vsububm %v13, %v1, %v0 \n"
415 " lvx %v1, 0, %r4 \n"
416 " lvx %v0, %r9, %r4 \n"
417 " stvx %v13, 0, %r3 \n"
418 " vperm %v10, %v1, %v0, %v4 \n"
419 " add %r4, %r4, %r5 \n"
420 " vperm %v11, %v1, %v0, %v5 \n"
421 " lvx %v1, 0, %r4 \n"
422 " lvx %v0, %r9, %r4 \n"
423 " vavgub %v9, %v11, %v10 \n"
424 " vxor %v8, %v11, %v10 \n"
425 " add %r4, %r4, %r5 \n"
426 " vperm %v10, %v1, %v0, %v4 \n"
427 " vavgub %v12, %v9, %v7 \n"
428 " vperm %v11, %v1, %v0, %v5 \n"
429 " vor %v13, %v8, %v6 \n"
430 " vxor %v0, %v9, %v7 \n"
431 " vxor %v6, %v11, %v10 \n"
432 " vand %v13, %v3, %v13 \n"
433 " vavgub %v7, %v11, %v10 \n"
434 " vor %v1, %v8, %v6 \n"
435 " vand %v13, %v13, %v0 \n"
436 " vxor %v0, %v9, %v7 \n"
437 " vand %v1, %v3, %v1 \n"
438 " vsububm %v13, %v12, %v13 \n"
439 " vand %v1, %v1, %v0 \n"
440 " stvx %v13, %r5, %r3 \n"
441 " vavgub %v0, %v9, %v7 \n"
442 " add %r3, %r3, %r0 \n"
443 " vsububm %v13, %v0, %v1 \n"
445 " lvx %v0, %r9, %r4 \n"
446 " lvx %v1, 0, %r4 \n"
447 " stvx %v13, 0, %r3 \n"
448 " vperm %v10, %v1, %v0, %v4 \n"
449 " vperm %v11, %v1, %v0, %v5 \n"
450 " vxor %v8, %v11, %v10 \n"
451 " vavgub %v9, %v11, %v10 \n"
452 " vor %v0, %v8, %v6 \n"
453 " vxor %v13, %v9, %v7 \n"
454 " vand %v0, %v3, %v0 \n"
455 " vavgub %v1, %v9, %v7 \n"
456 " vand %v0, %v0, %v13 \n"
457 " vsububm %v13, %v1, %v0 \n"
458 " stvx %v13, %r5, %r3 \n"
462 static void MC_put_xy_8_altivec (uint8_t * dest, uint8_t * ref,
463 int stride, int height)
466 " lvsl %v4, 0, %r4 \n"
467 " vspltisb %v3, 1 \n"
468 " lvsl %v5, %r5, %r4 \n"
469 " vmrghb %v4, %v4, %v4 \n"
471 " vmrghb %v5, %v5, %v5 \n"
472 " lvx %v1, 0, %r4 \n"
473 " vpkuhum %v4, %v4, %v4 \n"
474 " lvx %v0, %r9, %r4 \n"
475 " vpkuhum %v5, %v5, %v5 \n"
476 " add %r4, %r4, %r5 \n"
477 " vaddubm %v2, %v4, %v3 \n"
478 " vperm %v11, %v1, %v0, %v4 \n"
479 " srawi %r6, %r6, 1 \n"
480 " vaddubm %v19, %v5, %v3 \n"
481 " addi %r6, %r6, -1 \n"
482 " vperm %v10, %v1, %v0, %v2 \n"
484 " lvx %v1, 0, %r4 \n"
485 " lvx %v0, %r9, %r4 \n"
486 " vavgub %v9, %v11, %v10 \n"
487 " vxor %v8, %v11, %v10 \n"
488 " add %r4, %r4, %r5 \n"
489 " vperm %v10, %v1, %v0, %v19 \n"
490 " vperm %v11, %v1, %v0, %v5 \n"
491 " vxor %v6, %v11, %v10 \n"
492 " vavgub %v7, %v11, %v10 \n"
493 " vor %v0, %v8, %v6 \n"
494 " vxor %v13, %v9, %v7 \n"
495 " vand %v0, %v3, %v0 \n"
496 " vavgub %v1, %v9, %v7 \n"
497 " vand %v0, %v0, %v13 \n"
498 " vsububm %v13, %v1, %v0 \n"
501 " lvx %v0, %r9, %r4 \n"
502 " lvx %v1, 0, %r4 \n"
503 " stvewx %v13, 0, %r3 \n"
505 " vperm %v10, %v1, %v0, %v2 \n"
506 " stvewx %v13, %r9, %r3 \n"
507 " vperm %v11, %v1, %v0, %v4 \n"
508 " add %r4, %r4, %r5 \n"
510 " vavgub %v9, %v11, %v10 \n"
511 " lvx %v0, %r9, %r4 \n"
512 " vxor %v8, %v11, %v10 \n"
513 " lvx %v1, 0, %r4 \n"
514 " vavgub %v12, %v9, %v7 \n"
515 " vor %v13, %v8, %v6 \n"
516 " add %r3, %r3, %r5 \n"
517 " vperm %v10, %v1, %v0, %v19 \n"
519 " vperm %v11, %v1, %v0, %v5 \n"
520 " vand %v13, %v3, %v13 \n"
521 " add %r4, %r4, %r5 \n"
522 " vxor %v0, %v9, %v7 \n"
523 " vxor %v6, %v11, %v10 \n"
524 " vavgub %v7, %v11, %v10 \n"
525 " vor %v1, %v8, %v6 \n"
526 " vand %v13, %v13, %v0 \n"
527 " vxor %v0, %v9, %v7 \n"
528 " vand %v1, %v3, %v1 \n"
529 " vsububm %v13, %v12, %v13 \n"
530 " vand %v1, %v1, %v0 \n"
531 " stvewx %v13, 0, %r3 \n"
532 " vavgub %v0, %v9, %v7 \n"
533 " stvewx %v13, %r9, %r3 \n"
534 " add %r3, %r3, %r5 \n"
535 " vsububm %v13, %v0, %v1 \n"
538 " lvx %v0, %r9, %r4 \n"
539 " lvx %v1, 0, %r4 \n"
540 " stvewx %v13, 0, %r3 \n"
541 " vperm %v10, %v1, %v0, %v2 \n"
543 " vperm %v11, %v1, %v0, %v4 \n"
544 " stvewx %v13, %r9, %r3 \n"
545 " add %r3, %r3, %r5 \n"
546 " vxor %v8, %v11, %v10 \n"
547 " vavgub %v9, %v11, %v10 \n"
548 " vor %v0, %v8, %v6 \n"
549 " vxor %v13, %v9, %v7 \n"
550 " vand %v0, %v3, %v0 \n"
551 " vavgub %v1, %v9, %v7 \n"
552 " vand %v0, %v0, %v13 \n"
553 " vsububm %v13, %v1, %v0 \n"
554 " stvewx %v13, 0, %r3 \n"
555 " stvewx %v13, %r9, %r3 \n"
559 static void MC_avg_o_16_altivec (uint8_t * dest, uint8_t * ref,
560 int stride, int height)
564 " lvx %v0, %r9, %r4 \n"
565 " lvsl %v11, 0, %r4 \n"
566 " lvx %v1, 0, %r4 \n"
567 " srawi %r6, %r6, 1 \n"
568 " addi %r6, %r6, -1 \n"
569 " vperm %v0, %v1, %v0, %v11 \n"
570 " lvx %v13, 0, %r3 \n"
572 " add %r9, %r5, %r5 \n"
573 " vavgub %v12, %v13, %v0 \n"
574 " add %r4, %r4, %r5 \n"
577 " lvx %v1, 0, %r4 \n"
578 " lvx %v0, %r11, %r4 \n"
579 " lvx %v13, %r5, %r3 \n"
580 " vperm %v0, %v1, %v0, %v11 \n"
581 " stvx %v12, 0, %r3 \n"
582 " add %r4, %r4, %r5 \n"
583 " vavgub %v12, %v13, %v0 \n"
584 " lvx %v1, 0, %r4 \n"
585 " lvx %v0, %r11, %r4 \n"
586 " lvx %v13, %r9, %r3 \n"
587 " vperm %v0, %v1, %v0, %v11 \n"
588 " stvx %v12, %r5, %r3 \n"
589 " add %r4, %r4, %r5 \n"
590 " vavgub %v12, %v13, %v0 \n"
591 " add %r3, %r3, %r9 \n"
593 " lvx %v0, %r11, %r4 \n"
594 " lvx %v1, 0, %r4 \n"
595 " lvx %v13, %r5, %r3 \n"
596 " vperm %v0, %v1, %v0, %v11 \n"
597 " stvx %v12, 0, %r3 \n"
598 " vavgub %v12, %v13, %v0 \n"
599 " stvx %v12, %r5, %r3 \n"
603 static void MC_avg_o_8_altivec (uint8_t * dest, uint8_t * ref,
604 int stride, int height)
607 " lvsl %v12, 0, %r4 \n"
609 " vmrghb %v12, %v12, %v12 \n"
610 " lvsl %v1, %r5, %r4 \n"
611 " lvx %v13, 0, %r4 \n"
612 " vpkuhum %v9, %v12, %v12 \n"
613 " lvx %v0, %r9, %r4 \n"
614 " srawi %r6, %r6, 1 \n"
615 " vmrghb %v1, %v1, %v1 \n"
616 " addi %r6, %r6, -1 \n"
617 " vperm %v0, %v13, %v0, %v9 \n"
618 " lvx %v11, 0, %r3 \n"
620 " vpkuhum %v10, %v1, %v1 \n"
621 " add %r4, %r4, %r5 \n"
622 " vavgub %v12, %v11, %v0 \n"
625 " lvx %v0, %r9, %r4 \n"
626 " lvx %v13, 0, %r4 \n"
627 " lvx %v11, %r5, %r3 \n"
628 " stvewx %v12, 0, %r3 \n"
629 " vperm %v0, %v13, %v0, %v10 \n"
631 " stvewx %v12, %r9, %r3 \n"
632 " vavgub %v1, %v11, %v0 \n"
633 " add %r4, %r4, %r5 \n"
635 " lvx %v0, %r9, %r4 \n"
636 " add %r3, %r3, %r5 \n"
637 " lvx %v13, 0, %r4 \n"
638 " lvx %v11, %r5, %r3 \n"
639 " stvewx %v1, 0, %r3 \n"
640 " vperm %v0, %v13, %v0, %v9 \n"
642 " stvewx %v1, %r9, %r3 \n"
643 " vavgub %v12, %v11, %v0 \n"
644 " add %r4, %r4, %r5 \n"
645 " add %r3, %r3, %r5 \n"
648 " lvx %v0, %r9, %r4 \n"
649 " lvx %v13, 0, %r4 \n"
650 " lvx %v11, %r5, %r3 \n"
651 " stvewx %v12, 0, %r3 \n"
652 " vperm %v0, %v13, %v0, %v10 \n"
654 " stvewx %v12, %r9, %r3 \n"
655 " vavgub %v1, %v11, %v0 \n"
656 " add %r3, %r3, %r5 \n"
657 " stvewx %v1, 0, %r3 \n"
658 " stvewx %v1, %r9, %r3 \n"
662 static void MC_avg_x_16_altivec (uint8_t * dest, uint8_t * ref,
663 int stride, int height)
666 " lvsl %v8, 0, %r4 \n"
667 " vspltisb %v0, 1 \n"
669 " lvx %v12, %r9, %r4 \n"
670 " vaddubm %v7, %v8, %v0 \n"
671 " lvx %v11, 0, %r4 \n"
672 " srawi %r6, %r6, 1 \n"
673 " vperm %v1, %v11, %v12, %v7 \n"
674 " addi %r6, %r6, -1 \n"
675 " vperm %v0, %v11, %v12, %v8 \n"
676 " lvx %v9, 0, %r3 \n"
678 " add %r9, %r5, %r5 \n"
679 " vavgub %v0, %v0, %v1 \n"
680 " add %r4, %r4, %r5 \n"
681 " vavgub %v10, %v9, %v0 \n"
684 " lvx %v11, 0, %r4 \n"
685 " lvx %v12, %r11, %r4 \n"
686 " lvx %v9, %r5, %r3 \n"
687 " stvx %v10, 0, %r3 \n"
688 " vperm %v0, %v11, %v12, %v7 \n"
689 " add %r4, %r4, %r5 \n"
690 " vperm %v1, %v11, %v12, %v8 \n"
691 " lvx %v11, 0, %r4 \n"
692 " lvx %v12, %r11, %r4 \n"
693 " vavgub %v1, %v1, %v0 \n"
694 " add %r4, %r4, %r5 \n"
695 " vperm %v13, %v11, %v12, %v7 \n"
696 " vavgub %v10, %v9, %v1 \n"
697 " vperm %v0, %v11, %v12, %v8 \n"
698 " lvx %v9, %r9, %r3 \n"
699 " stvx %v10, %r5, %r3 \n"
700 " vavgub %v0, %v0, %v13 \n"
701 " add %r3, %r3, %r9 \n"
702 " vavgub %v10, %v9, %v0 \n"
704 " lvx %v12, %r11, %r4 \n"
705 " lvx %v11, 0, %r4 \n"
706 " lvx %v9, %r5, %r3 \n"
707 " vperm %v1, %v11, %v12, %v7 \n"
708 " stvx %v10, 0, %r3 \n"
709 " vperm %v0, %v11, %v12, %v8 \n"
710 " vavgub %v0, %v0, %v1 \n"
711 " vavgub %v10, %v9, %v0 \n"
712 " stvx %v10, %r5, %r3 \n"
716 static void MC_avg_x_8_altivec (uint8_t * dest, uint8_t * ref,
717 int stride, int height)
720 " lvsl %v10, 0, %r4 \n"
721 " vspltisb %v13, 1 \n"
723 " vmrghb %v10, %v10, %v10 \n"
724 " lvx %v11, 0, %r4 \n"
725 " lvx %v12, %r9, %r4 \n"
726 " vpkuhum %v7, %v10, %v10 \n"
727 " srawi %r6, %r6, 1 \n"
728 " lvsl %v10, %r5, %r4 \n"
729 " vaddubm %v6, %v7, %v13 \n"
730 " vperm %v0, %v11, %v12, %v7 \n"
731 " addi %r6, %r6, -1 \n"
732 " vmrghb %v10, %v10, %v10 \n"
733 " lvx %v9, 0, %r3 \n"
735 " vperm %v1, %v11, %v12, %v6 \n"
736 " add %r4, %r4, %r5 \n"
737 " vpkuhum %v8, %v10, %v10 \n"
738 " vavgub %v0, %v0, %v1 \n"
739 " vaddubm %v13, %v8, %v13 \n"
740 " vavgub %v10, %v9, %v0 \n"
743 " lvx %v12, %r9, %r4 \n"
744 " lvx %v11, 0, %r4 \n"
745 " lvx %v9, %r5, %r3 \n"
746 " stvewx %v10, 0, %r3 \n"
747 " vperm %v1, %v11, %v12, %v13 \n"
748 " vperm %v0, %v11, %v12, %v8 \n"
750 " stvewx %v10, %r9, %r3 \n"
751 " add %r4, %r4, %r5 \n"
752 " vavgub %v0, %v0, %v1 \n"
754 " lvx %v12, %r9, %r4 \n"
755 " vavgub %v10, %v9, %v0 \n"
756 " lvx %v11, 0, %r4 \n"
757 " add %r3, %r3, %r5 \n"
758 " vperm %v1, %v11, %v12, %v6 \n"
759 " lvx %v9, %r5, %r3 \n"
760 " vperm %v0, %v11, %v12, %v7 \n"
761 " stvewx %v10, 0, %r3 \n"
763 " vavgub %v0, %v0, %v1 \n"
764 " stvewx %v10, %r9, %r3 \n"
765 " add %r4, %r4, %r5 \n"
766 " add %r3, %r3, %r5 \n"
767 " vavgub %v10, %v9, %v0 \n"
770 " lvx %v12, %r9, %r4 \n"
771 " lvx %v11, 0, %r4 \n"
772 " lvx %v9, %r5, %r3 \n"
773 " vperm %v1, %v11, %v12, %v13 \n"
774 " stvewx %v10, 0, %r3 \n"
775 " vperm %v0, %v11, %v12, %v8 \n"
777 " stvewx %v10, %r9, %r3 \n"
778 " vavgub %v0, %v0, %v1 \n"
779 " add %r3, %r3, %r5 \n"
780 " vavgub %v10, %v9, %v0 \n"
781 " stvewx %v10, 0, %r3 \n"
782 " stvewx %v10, %r9, %r3 \n"
786 static void MC_avg_y_16_altivec (uint8_t * dest, uint8_t * ref,
787 int stride, int height)
791 " lvx %v1, %r9, %r4 \n"
792 " lvsl %v9, 0, %r4 \n"
793 " lvx %v13, 0, %r4 \n"
794 " add %r4, %r4, %r5 \n"
795 " vperm %v11, %v13, %v1, %v9 \n"
797 " lvx %v13, 0, %r4 \n"
798 " lvx %v1, %r11, %r4 \n"
799 " srawi %r6, %r6, 1 \n"
800 " vperm %v10, %v13, %v1, %v9 \n"
801 " addi %r6, %r6, -1 \n"
802 " lvx %v12, 0, %r3 \n"
804 " vavgub %v0, %v11, %v10 \n"
805 " add %r9, %r5, %r5 \n"
806 " add %r4, %r4, %r5 \n"
807 " vavgub %v0, %v12, %v0 \n"
810 " lvx %v13, 0, %r4 \n"
811 " lvx %v1, %r11, %r4 \n"
812 " lvx %v12, %r5, %r3 \n"
813 " vperm %v11, %v13, %v1, %v9 \n"
814 " stvx %v0, 0, %r3 \n"
815 " add %r4, %r4, %r5 \n"
816 " vavgub %v0, %v11, %v10 \n"
817 " lvx %v13, 0, %r4 \n"
818 " lvx %v1, %r11, %r4 \n"
819 " vavgub %v0, %v12, %v0 \n"
820 " add %r4, %r4, %r5 \n"
821 " lvx %v12, %r9, %r3 \n"
822 " vperm %v10, %v13, %v1, %v9 \n"
823 " stvx %v0, %r5, %r3 \n"
824 " vavgub %v0, %v11, %v10 \n"
825 " add %r3, %r3, %r9 \n"
826 " vavgub %v0, %v12, %v0 \n"
828 " lvx %v1, %r11, %r4 \n"
829 " lvx %v13, 0, %r4 \n"
830 " lvx %v12, %r5, %r3 \n"
831 " vperm %v11, %v13, %v1, %v9 \n"
832 " stvx %v0, 0, %r3 \n"
833 " vavgub %v0, %v11, %v10 \n"
834 " vavgub %v0, %v12, %v0 \n"
835 " stvx %v0, %r5, %r3 \n"
839 static void MC_avg_y_8_altivec (uint8_t * dest, uint8_t * ref,
840 int stride, int height)
843 " lvsl %v12, 0, %r4 \n"
844 " lvsl %v9, %r5, %r4 \n"
845 " vmrghb %v12, %v12, %v12 \n"
847 " lvx %v11, 0, %r4 \n"
848 " vmrghb %v9, %v9, %v9 \n"
849 " lvx %v13, %r9, %r4 \n"
850 " vpkuhum %v7, %v12, %v12 \n"
851 " add %r4, %r4, %r5 \n"
852 " vpkuhum %v8, %v9, %v9 \n"
853 " vperm %v12, %v11, %v13, %v7 \n"
854 " srawi %r6, %r6, 1 \n"
855 " lvx %v11, 0, %r4 \n"
856 " lvx %v13, %r9, %r4 \n"
857 " addi %r6, %r6, -1 \n"
858 " vperm %v9, %v11, %v13, %v8 \n"
859 " lvx %v10, 0, %r3 \n"
861 " add %r4, %r4, %r5 \n"
862 " vavgub %v0, %v12, %v9 \n"
863 " vavgub %v1, %v10, %v0 \n"
866 " lvx %v13, %r9, %r4 \n"
867 " lvx %v11, 0, %r4 \n"
868 " lvx %v10, %r5, %r3 \n"
869 " stvewx %v1, 0, %r3 \n"
870 " vperm %v12, %v11, %v13, %v7 \n"
872 " stvewx %v1, %r9, %r3 \n"
873 " vavgub %v0, %v12, %v9 \n"
874 " add %r4, %r4, %r5 \n"
876 " vavgub %v1, %v10, %v0 \n"
877 " lvx %v13, %r9, %r4 \n"
878 " lvx %v11, 0, %r4 \n"
879 " add %r3, %r3, %r5 \n"
880 " vperm %v9, %v11, %v13, %v8 \n"
881 " lvx %v10, %r5, %r3 \n"
882 " stvewx %v1, 0, %r3 \n"
883 " vavgub %v0, %v12, %v9 \n"
885 " stvewx %v1, %r9, %r3 \n"
886 " add %r4, %r4, %r5 \n"
887 " vavgub %v1, %v10, %v0 \n"
888 " add %r3, %r3, %r5 \n"
891 " lvx %v13, %r9, %r4 \n"
892 " lvx %v11, 0, %r4 \n"
893 " lvx %v10, %r5, %r3 \n"
894 " vperm %v12, %v11, %v13, %v7 \n"
895 " stvewx %v1, 0, %r3 \n"
897 " vavgub %v0, %v12, %v9 \n"
898 " stvewx %v1, %r9, %r3 \n"
899 " add %r3, %r3, %r5 \n"
900 " vavgub %v1, %v10, %v0 \n"
901 " stvewx %v1, 0, %r3 \n"
902 " stvewx %v1, %r9, %r3 \n"
906 static void MC_avg_xy_16_altivec (uint8_t * dest, uint8_t * ref,
907 int stride, int height)
910 " lvsl %v4, 0, %r4 \n"
911 " vspltisb %v2, 1 \n"
913 " lvx %v1, %r9, %r4 \n"
914 " vaddubm %v3, %v4, %v2 \n"
915 " lvx %v13, 0, %r4 \n"
916 " add %r4, %r4, %r5 \n"
917 " vperm %v10, %v13, %v1, %v3 \n"
919 " vperm %v11, %v13, %v1, %v4 \n"
920 " srawi %r6, %r6, 1 \n"
921 " lvx %v13, 0, %r4 \n"
922 " lvx %v1, %r11, %r4 \n"
923 " vavgub %v9, %v11, %v10 \n"
924 " vxor %v8, %v11, %v10 \n"
925 " addi %r6, %r6, -1 \n"
926 " vperm %v10, %v13, %v1, %v3 \n"
927 " lvx %v6, 0, %r3 \n"
929 " vperm %v11, %v13, %v1, %v4 \n"
930 " add %r9, %r5, %r5 \n"
931 " add %r4, %r4, %r5 \n"
932 " vxor %v5, %v11, %v10 \n"
933 " vavgub %v7, %v11, %v10 \n"
934 " vor %v1, %v8, %v5 \n"
935 " vxor %v13, %v9, %v7 \n"
936 " vand %v1, %v2, %v1 \n"
937 " vavgub %v0, %v9, %v7 \n"
938 " vand %v1, %v1, %v13 \n"
939 " vsububm %v0, %v0, %v1 \n"
940 " vavgub %v12, %v6, %v0 \n"
943 " lvx %v13, 0, %r4 \n"
944 " lvx %v1, %r11, %r4 \n"
945 " lvx %v6, %r5, %r3 \n"
946 " stvx %v12, 0, %r3 \n"
947 " vperm %v10, %v13, %v1, %v3 \n"
948 " vperm %v11, %v13, %v1, %v4 \n"
949 " add %r4, %r4, %r5 \n"
950 " lvx %v13, 0, %r4 \n"
951 " lvx %v1, %r11, %r4 \n"
952 " vavgub %v9, %v11, %v10 \n"
953 " vxor %v8, %v11, %v10 \n"
954 " add %r4, %r4, %r5 \n"
955 " vperm %v10, %v13, %v1, %v3 \n"
956 " vavgub %v12, %v9, %v7 \n"
957 " vperm %v11, %v13, %v1, %v4 \n"
958 " vor %v0, %v8, %v5 \n"
959 " vxor %v13, %v9, %v7 \n"
960 " vxor %v5, %v11, %v10 \n"
961 " vand %v0, %v2, %v0 \n"
962 " vavgub %v7, %v11, %v10 \n"
963 " vor %v1, %v8, %v5 \n"
964 " vand %v0, %v0, %v13 \n"
965 " vand %v1, %v2, %v1 \n"
966 " vxor %v13, %v9, %v7 \n"
967 " vsububm %v12, %v12, %v0 \n"
968 " vand %v1, %v1, %v13 \n"
969 " vavgub %v0, %v9, %v7 \n"
970 " vavgub %v12, %v6, %v12 \n"
971 " lvx %v6, %r9, %r3 \n"
972 " vsububm %v0, %v0, %v1 \n"
973 " stvx %v12, %r5, %r3 \n"
974 " vavgub %v12, %v6, %v0 \n"
975 " add %r3, %r3, %r9 \n"
977 " lvx %v1, %r11, %r4 \n"
978 " lvx %v13, 0, %r4 \n"
979 " lvx %v6, %r5, %r3 \n"
980 " vperm %v10, %v13, %v1, %v3 \n"
981 " stvx %v12, 0, %r3 \n"
982 " vperm %v11, %v13, %v1, %v4 \n"
983 " vxor %v8, %v11, %v10 \n"
984 " vavgub %v9, %v11, %v10 \n"
985 " vor %v0, %v8, %v5 \n"
986 " vxor %v13, %v9, %v7 \n"
987 " vand %v0, %v2, %v0 \n"
988 " vavgub %v1, %v9, %v7 \n"
989 " vand %v0, %v0, %v13 \n"
990 " vsububm %v1, %v1, %v0 \n"
991 " vavgub %v12, %v6, %v1 \n"
992 " stvx %v12, %r5, %r3 \n"
996 static void MC_avg_xy_8_altivec (uint8_t * dest, uint8_t * ref,
997 int stride, int height)
1000 " lvsl %v2, 0, %r4 \n"
1001 " vspltisb %v19, 1 \n"
1002 " lvsl %v3, %r5, %r4 \n"
1003 " vmrghb %v2, %v2, %v2 \n"
1005 " vmrghb %v3, %v3, %v3 \n"
1006 " lvx %v9, 0, %r4 \n"
1007 " vpkuhum %v2, %v2, %v2 \n"
1008 " lvx %v1, %r9, %r4 \n"
1009 " vpkuhum %v3, %v3, %v3 \n"
1010 " add %r4, %r4, %r5 \n"
1011 " vaddubm %v18, %v2, %v19 \n"
1012 " vperm %v11, %v9, %v1, %v2 \n"
1013 " srawi %r6, %r6, 1 \n"
1014 " vaddubm %v17, %v3, %v19 \n"
1015 " addi %r6, %r6, -1 \n"
1016 " vperm %v10, %v9, %v1, %v18 \n"
1017 " lvx %v4, 0, %r3 \n"
1019 " lvx %v1, %r9, %r4 \n"
1020 " lvx %v9, 0, %r4 \n"
1021 " vavgub %v8, %v11, %v10 \n"
1022 " vxor %v7, %v11, %v10 \n"
1023 " add %r4, %r4, %r5 \n"
1024 " vperm %v10, %v9, %v1, %v17 \n"
1025 " vperm %v11, %v9, %v1, %v3 \n"
1026 " vxor %v5, %v11, %v10 \n"
1027 " vavgub %v6, %v11, %v10 \n"
1028 " vor %v1, %v7, %v5 \n"
1029 " vxor %v13, %v8, %v6 \n"
1030 " vand %v1, %v19, %v1 \n"
1031 " vavgub %v0, %v8, %v6 \n"
1032 " vand %v1, %v1, %v13 \n"
1033 " vsububm %v0, %v0, %v1 \n"
1034 " vavgub %v13, %v4, %v0 \n"
1037 " lvx %v1, %r9, %r4 \n"
1038 " lvx %v9, 0, %r4 \n"
1039 " lvx %v4, %r5, %r3 \n"
1040 " stvewx %v13, 0, %r3 \n"
1041 " vperm %v10, %v9, %v1, %v18 \n"
1042 " vperm %v11, %v9, %v1, %v2 \n"
1044 " stvewx %v13, %r9, %r3 \n"
1045 " vxor %v7, %v11, %v10 \n"
1046 " add %r4, %r4, %r5 \n"
1048 " vavgub %v8, %v11, %v10 \n"
1049 " lvx %v1, %r9, %r4 \n"
1050 " vor %v0, %v7, %v5 \n"
1051 " lvx %v9, 0, %r4 \n"
1052 " vxor %v12, %v8, %v6 \n"
1053 " vand %v0, %v19, %v0 \n"
1054 " add %r3, %r3, %r5 \n"
1055 " vperm %v10, %v9, %v1, %v17 \n"
1056 " vavgub %v13, %v8, %v6 \n"
1058 " vperm %v11, %v9, %v1, %v3 \n"
1059 " vand %v0, %v0, %v12 \n"
1060 " add %r4, %r4, %r5 \n"
1061 " vxor %v5, %v11, %v10 \n"
1062 " vavgub %v6, %v11, %v10 \n"
1063 " vor %v1, %v7, %v5 \n"
1064 " vsububm %v13, %v13, %v0 \n"
1065 " vxor %v0, %v8, %v6 \n"
1066 " vand %v1, %v19, %v1 \n"
1067 " vavgub %v13, %v4, %v13 \n"
1068 " vand %v1, %v1, %v0 \n"
1069 " lvx %v4, %r5, %r3 \n"
1070 " vavgub %v0, %v8, %v6 \n"
1071 " stvewx %v13, 0, %r3 \n"
1072 " stvewx %v13, %r9, %r3 \n"
1073 " vsububm %v0, %v0, %v1 \n"
1074 " add %r3, %r3, %r5 \n"
1075 " vavgub %v13, %v4, %v0 \n"
1078 " lvx %v1, %r9, %r4 \n"
1079 " lvx %v9, 0, %r4 \n"
1080 " lvx %v4, %r5, %r3 \n"
1081 " vperm %v10, %v9, %v1, %v18 \n"
1082 " stvewx %v13, 0, %r3 \n"
1083 " vperm %v11, %v9, %v1, %v2 \n"
1085 " stvewx %v13, %r9, %r3 \n"
1086 " vxor %v7, %v11, %v10 \n"
1087 " add %r3, %r3, %r5 \n"
1088 " vavgub %v8, %v11, %v10 \n"
1089 " vor %v0, %v7, %v5 \n"
1090 " vxor %v13, %v8, %v6 \n"
1091 " vand %v0, %v19, %v0 \n"
1092 " vavgub %v1, %v8, %v6 \n"
1093 " vand %v0, %v0, %v13 \n"
1094 " vsububm %v1, %v1, %v0 \n"
1095 " vavgub %v13, %v4, %v1 \n"
1096 " stvewx %v13, 0, %r3 \n"
1097 " stvewx %v13, %r9, %r3 \n"
1101 #endif /* !CAN_COMPILE_C_ALTIVEC */
1102 #endif /* __BUILD_ALTIVEC_ASM__ */
1104 #if defined(CAN_COMPILE_C_ALTIVEC) || defined(__BUILD_ALTIVEC_ASM__)
1106 #define vector_s16_t vector signed short
1107 #define vector_u16_t vector unsigned short
1108 #define vector_s8_t vector signed char
1109 #define vector_u8_t vector unsigned char
1110 #define vector_s32_t vector signed int
1111 #define vector_u32_t vector unsigned int
1113 void MC_put_o_16_altivec (unsigned char * dest, unsigned char * ref,
1114 int stride, int height)
1116 vector_u8_t perm, ref0, ref1, tmp;
1118 perm = vec_lvsl (0, ref);
1120 height = (height >> 1) - 1;
1122 ref0 = vec_ld (0, ref);
1123 ref1 = vec_ld (15, ref);
1125 tmp = vec_perm (ref0, ref1, perm);
1128 ref0 = vec_ld (0, ref);
1129 ref1 = vec_ld (15, ref);
1131 vec_st (tmp, 0, dest);
1132 tmp = vec_perm (ref0, ref1, perm);
1134 ref0 = vec_ld (0, ref);
1135 ref1 = vec_ld (15, ref);
1137 vec_st (tmp, stride, dest);
1139 tmp = vec_perm (ref0, ref1, perm);
1142 ref0 = vec_ld (0, ref);
1143 ref1 = vec_ld (15, ref);
1144 vec_st (tmp, 0, dest);
1145 tmp = vec_perm (ref0, ref1, perm);
1146 vec_st (tmp, stride, dest);
1149 void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref,
1150 int stride, int height)
1152 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
1154 tmp0 = vec_lvsl (0, ref);
1155 tmp0 = vec_mergeh (tmp0, tmp0);
1156 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1157 tmp1 = vec_lvsl (stride, ref);
1158 tmp1 = vec_mergeh (tmp1, tmp1);
1159 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1161 height = (height >> 1) - 1;
1163 ref0 = vec_ld (0, ref);
1164 ref1 = vec_ld (7, ref);
1166 tmp0 = vec_perm (ref0, ref1, perm0);
1169 ref0 = vec_ld (0, ref);
1170 ref1 = vec_ld (7, ref);
1172 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1173 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1175 tmp1 = vec_perm (ref0, ref1, perm1);
1177 ref0 = vec_ld (0, ref);
1178 ref1 = vec_ld (7, ref);
1180 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1181 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1183 tmp0 = vec_perm (ref0, ref1, perm0);
1186 ref0 = vec_ld (0, ref);
1187 ref1 = vec_ld (7, ref);
1188 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1189 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1191 tmp1 = vec_perm (ref0, ref1, perm1);
1192 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1193 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1196 void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref,
1197 int stride, int height)
1199 vector_u8_t permA, permB, ref0, ref1, tmp;
1201 permA = vec_lvsl (0, ref);
1202 permB = vec_add (permA, vec_splat_u8 (1));
1204 height = (height >> 1) - 1;
1206 ref0 = vec_ld (0, ref);
1207 ref1 = vec_ld (16, ref);
1209 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1210 vec_perm (ref0, ref1, permB));
1213 ref0 = vec_ld (0, ref);
1214 ref1 = vec_ld (16, ref);
1216 vec_st (tmp, 0, dest);
1217 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1218 vec_perm (ref0, ref1, permB));
1220 ref0 = vec_ld (0, ref);
1221 ref1 = vec_ld (16, ref);
1223 vec_st (tmp, stride, dest);
1225 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1226 vec_perm (ref0, ref1, permB));
1229 ref0 = vec_ld (0, ref);
1230 ref1 = vec_ld (16, ref);
1231 vec_st (tmp, 0, dest);
1232 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1233 vec_perm (ref0, ref1, permB));
1234 vec_st (tmp, stride, dest);
1237 void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref,
1238 int stride, int height)
1240 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1242 ones = vec_splat_u8 (1);
1243 tmp0 = vec_lvsl (0, ref);
1244 tmp0 = vec_mergeh (tmp0, tmp0);
1245 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1246 perm0B = vec_add (perm0A, ones);
1247 tmp1 = vec_lvsl (stride, ref);
1248 tmp1 = vec_mergeh (tmp1, tmp1);
1249 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1250 perm1B = vec_add (perm1A, ones);
1252 height = (height >> 1) - 1;
1254 ref0 = vec_ld (0, ref);
1255 ref1 = vec_ld (8, ref);
1257 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1258 vec_perm (ref0, ref1, perm0B));
1261 ref0 = vec_ld (0, ref);
1262 ref1 = vec_ld (8, ref);
1264 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1265 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1267 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1268 vec_perm (ref0, ref1, perm1B));
1270 ref0 = vec_ld (0, ref);
1271 ref1 = vec_ld (8, ref);
1273 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1274 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1276 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1277 vec_perm (ref0, ref1, perm0B));
1280 ref0 = vec_ld (0, ref);
1281 ref1 = vec_ld (8, ref);
1282 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1283 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1285 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1286 vec_perm (ref0, ref1, perm1B));
1287 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1288 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1291 void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref,
1292 int stride, int height)
1294 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
1296 perm = vec_lvsl (0, ref);
1298 height = (height >> 1) - 1;
1300 ref0 = vec_ld (0, ref);
1301 ref1 = vec_ld (15, ref);
1303 tmp0 = vec_perm (ref0, ref1, perm);
1304 ref0 = vec_ld (0, ref);
1305 ref1 = vec_ld (15, ref);
1307 tmp1 = vec_perm (ref0, ref1, perm);
1308 tmp = vec_avg (tmp0, tmp1);
1311 ref0 = vec_ld (0, ref);
1312 ref1 = vec_ld (15, ref);
1314 vec_st (tmp, 0, dest);
1315 tmp0 = vec_perm (ref0, ref1, perm);
1316 tmp = vec_avg (tmp0, tmp1);
1318 ref0 = vec_ld (0, ref);
1319 ref1 = vec_ld (15, ref);
1321 vec_st (tmp, stride, dest);
1323 tmp1 = vec_perm (ref0, ref1, perm);
1324 tmp = vec_avg (tmp0, tmp1);
1327 ref0 = vec_ld (0, ref);
1328 ref1 = vec_ld (15, ref);
1329 vec_st (tmp, 0, dest);
1330 tmp0 = vec_perm (ref0, ref1, perm);
1331 tmp = vec_avg (tmp0, tmp1);
1332 vec_st (tmp, stride, dest);
1335 void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref,
1336 int stride, int height)
1338 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
1340 tmp0 = vec_lvsl (0, ref);
1341 tmp0 = vec_mergeh (tmp0, tmp0);
1342 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1343 tmp1 = vec_lvsl (stride, ref);
1344 tmp1 = vec_mergeh (tmp1, tmp1);
1345 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1347 height = (height >> 1) - 1;
1349 ref0 = vec_ld (0, ref);
1350 ref1 = vec_ld (7, ref);
1352 tmp0 = vec_perm (ref0, ref1, perm0);
1353 ref0 = vec_ld (0, ref);
1354 ref1 = vec_ld (7, ref);
1356 tmp1 = vec_perm (ref0, ref1, perm1);
1357 tmp = vec_avg (tmp0, tmp1);
1360 ref0 = vec_ld (0, ref);
1361 ref1 = vec_ld (7, ref);
1363 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1364 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1366 tmp0 = vec_perm (ref0, ref1, perm0);
1367 tmp = vec_avg (tmp0, tmp1);
1369 ref0 = vec_ld (0, ref);
1370 ref1 = vec_ld (7, ref);
1372 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1373 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1375 tmp1 = vec_perm (ref0, ref1, perm1);
1376 tmp = vec_avg (tmp0, tmp1);
1379 ref0 = vec_ld (0, ref);
1380 ref1 = vec_ld (7, ref);
1381 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1382 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1384 tmp0 = vec_perm (ref0, ref1, perm0);
1385 tmp = vec_avg (tmp0, tmp1);
1386 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1387 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1390 void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1391 int stride, int height)
1393 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1396 ones = vec_splat_u8 (1);
1397 permA = vec_lvsl (0, ref);
1398 permB = vec_add (permA, ones);
1400 height = (height >> 1) - 1;
1402 ref0 = vec_ld (0, ref);
1403 ref1 = vec_ld (16, ref);
1405 A = vec_perm (ref0, ref1, permA);
1406 B = vec_perm (ref0, ref1, permB);
1407 avg0 = vec_avg (A, B);
1408 xor0 = vec_xor (A, B);
1410 ref0 = vec_ld (0, ref);
1411 ref1 = vec_ld (16, ref);
1413 A = vec_perm (ref0, ref1, permA);
1414 B = vec_perm (ref0, ref1, permB);
1415 avg1 = vec_avg (A, B);
1416 xor1 = vec_xor (A, B);
1417 tmp = vec_sub (vec_avg (avg0, avg1),
1418 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1419 vec_xor (avg0, avg1)));
1422 ref0 = vec_ld (0, ref);
1423 ref1 = vec_ld (16, ref);
1425 vec_st (tmp, 0, dest);
1426 A = vec_perm (ref0, ref1, permA);
1427 B = vec_perm (ref0, ref1, permB);
1428 avg0 = vec_avg (A, B);
1429 xor0 = vec_xor (A, B);
1430 tmp = vec_sub (vec_avg (avg0, avg1),
1431 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1432 vec_xor (avg0, avg1)));
1434 ref0 = vec_ld (0, ref);
1435 ref1 = vec_ld (16, ref);
1437 vec_st (tmp, stride, dest);
1439 A = vec_perm (ref0, ref1, permA);
1440 B = vec_perm (ref0, ref1, permB);
1441 avg1 = vec_avg (A, B);
1442 xor1 = vec_xor (A, B);
1443 tmp = vec_sub (vec_avg (avg0, avg1),
1444 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1445 vec_xor (avg0, avg1)));
1448 ref0 = vec_ld (0, ref);
1449 ref1 = vec_ld (16, ref);
1450 vec_st (tmp, 0, dest);
1451 A = vec_perm (ref0, ref1, permA);
1452 B = vec_perm (ref0, ref1, permB);
1453 avg0 = vec_avg (A, B);
1454 xor0 = vec_xor (A, B);
1455 tmp = vec_sub (vec_avg (avg0, avg1),
1456 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1457 vec_xor (avg0, avg1)));
1458 vec_st (tmp, stride, dest);
1461 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1462 int stride, int height)
1464 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1465 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
1467 ones = vec_splat_u8 (1);
1468 perm0A = vec_lvsl (0, ref);
1469 perm0A = vec_mergeh (perm0A, perm0A);
1470 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1471 perm0B = vec_add (perm0A, ones);
1472 perm1A = vec_lvsl (stride, ref);
1473 perm1A = vec_mergeh (perm1A, perm1A);
1474 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1475 perm1B = vec_add (perm1A, ones);
1477 height = (height >> 1) - 1;
1479 ref0 = vec_ld (0, ref);
1480 ref1 = vec_ld (8, ref);
1482 A = vec_perm (ref0, ref1, perm0A);
1483 B = vec_perm (ref0, ref1, perm0B);
1484 avg0 = vec_avg (A, B);
1485 xor0 = vec_xor (A, B);
1487 ref0 = vec_ld (0, ref);
1488 ref1 = vec_ld (8, ref);
1490 A = vec_perm (ref0, ref1, perm1A);
1491 B = vec_perm (ref0, ref1, perm1B);
1492 avg1 = vec_avg (A, B);
1493 xor1 = vec_xor (A, B);
1494 tmp = vec_sub (vec_avg (avg0, avg1),
1495 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1496 vec_xor (avg0, avg1)));
1499 ref0 = vec_ld (0, ref);
1500 ref1 = vec_ld (8, ref);
1502 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1503 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1505 A = vec_perm (ref0, ref1, perm0A);
1506 B = vec_perm (ref0, ref1, perm0B);
1507 avg0 = vec_avg (A, B);
1508 xor0 = vec_xor (A, B);
1509 tmp = vec_sub (vec_avg (avg0, avg1),
1510 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1511 vec_xor (avg0, avg1)));
1513 ref0 = vec_ld (0, ref);
1514 ref1 = vec_ld (8, ref);
1516 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1517 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1519 A = vec_perm (ref0, ref1, perm1A);
1520 B = vec_perm (ref0, ref1, perm1B);
1521 avg1 = vec_avg (A, B);
1522 xor1 = vec_xor (A, B);
1523 tmp = vec_sub (vec_avg (avg0, avg1),
1524 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1525 vec_xor (avg0, avg1)));
1528 ref0 = vec_ld (0, ref);
1529 ref1 = vec_ld (8, ref);
1530 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1531 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1533 A = vec_perm (ref0, ref1, perm0A);
1534 B = vec_perm (ref0, ref1, perm0B);
1535 avg0 = vec_avg (A, B);
1536 xor0 = vec_xor (A, B);
1537 tmp = vec_sub (vec_avg (avg0, avg1),
1538 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1539 vec_xor (avg0, avg1)));
1540 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1541 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1545 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1546 int stride, int height)
1548 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
1549 vector_u16_t splat2, temp;
1551 ones = vec_splat_u8 (1);
1552 permA = vec_lvsl (0, ref);
1553 permB = vec_add (permA, ones);
1555 zero = vec_splat_u8 (0);
1556 splat2 = vec_splat_u16 (2);
1559 ref0 = vec_ld (0, ref);
1560 ref1 = vec_ld (8, ref);
1562 A = vec_perm (ref0, ref1, permA);
1563 B = vec_perm (ref0, ref1, permB);
1564 ref0 = vec_ld (0, ref);
1565 ref1 = vec_ld (8, ref);
1566 C = vec_perm (ref0, ref1, permA);
1567 D = vec_perm (ref0, ref1, permB);
1569 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
1570 (vector_u16_t)vec_mergeh (zero, B)),
1571 vec_add ((vector_u16_t)vec_mergeh (zero, C),
1572 (vector_u16_t)vec_mergeh (zero, D)));
1573 temp = vec_sr (vec_add (temp, splat2), splat2);
1574 tmp = vec_pack (temp, temp);
1576 vec_st (tmp, 0, dest);
1578 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1579 vec_perm (ref0, ref1, permB));
1584 void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref,
1585 int stride, int height)
1587 vector_u8_t perm, ref0, ref1, tmp, prev;
1589 perm = vec_lvsl (0, ref);
1591 height = (height >> 1) - 1;
1593 ref0 = vec_ld (0, ref);
1594 ref1 = vec_ld (15, ref);
1596 prev = vec_ld (0, dest);
1597 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1600 ref0 = vec_ld (0, ref);
1601 ref1 = vec_ld (15, ref);
1603 prev = vec_ld (stride, dest);
1604 vec_st (tmp, 0, dest);
1605 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1607 ref0 = vec_ld (0, ref);
1608 ref1 = vec_ld (15, ref);
1610 prev = vec_ld (2*stride, dest);
1611 vec_st (tmp, stride, dest);
1613 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1616 ref0 = vec_ld (0, ref);
1617 ref1 = vec_ld (15, ref);
1618 prev = vec_ld (stride, dest);
1619 vec_st (tmp, 0, dest);
1620 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1621 vec_st (tmp, stride, dest);
1624 void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref,
1625 int stride, int height)
1627 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
1629 tmp0 = vec_lvsl (0, ref);
1630 tmp0 = vec_mergeh (tmp0, tmp0);
1631 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1632 tmp1 = vec_lvsl (stride, ref);
1633 tmp1 = vec_mergeh (tmp1, tmp1);
1634 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1636 height = (height >> 1) - 1;
1638 ref0 = vec_ld (0, ref);
1639 ref1 = vec_ld (7, ref);
1641 prev = vec_ld (0, dest);
1642 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1645 ref0 = vec_ld (0, ref);
1646 ref1 = vec_ld (7, ref);
1648 prev = vec_ld (stride, dest);
1649 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1650 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1652 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1654 ref0 = vec_ld (0, ref);
1655 ref1 = vec_ld (7, ref);
1657 prev = vec_ld (stride, dest);
1658 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1659 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1661 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1664 ref0 = vec_ld (0, ref);
1665 ref1 = vec_ld (7, ref);
1666 prev = vec_ld (stride, dest);
1667 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1668 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1670 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1671 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1672 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1675 void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref,
1676 int stride, int height)
1678 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
1680 permA = vec_lvsl (0, ref);
1681 permB = vec_add (permA, vec_splat_u8 (1));
1683 height = (height >> 1) - 1;
1685 ref0 = vec_ld (0, ref);
1686 ref1 = vec_ld (16, ref);
1687 prev = vec_ld (0, dest);
1689 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1690 vec_perm (ref0, ref1, permB)));
1693 ref0 = vec_ld (0, ref);
1694 ref1 = vec_ld (16, ref);
1696 prev = vec_ld (stride, dest);
1697 vec_st (tmp, 0, dest);
1698 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1699 vec_perm (ref0, ref1, permB)));
1701 ref0 = vec_ld (0, ref);
1702 ref1 = vec_ld (16, ref);
1704 prev = vec_ld (2*stride, dest);
1705 vec_st (tmp, stride, dest);
1707 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1708 vec_perm (ref0, ref1, permB)));
1711 ref0 = vec_ld (0, ref);
1712 ref1 = vec_ld (16, ref);
1713 prev = vec_ld (stride, dest);
1714 vec_st (tmp, 0, dest);
1715 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1716 vec_perm (ref0, ref1, permB)));
1717 vec_st (tmp, stride, dest);
1720 void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref,
1721 int stride, int height)
1723 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1726 ones = vec_splat_u8 (1);
1727 tmp0 = vec_lvsl (0, ref);
1728 tmp0 = vec_mergeh (tmp0, tmp0);
1729 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1730 perm0B = vec_add (perm0A, ones);
1731 tmp1 = vec_lvsl (stride, ref);
1732 tmp1 = vec_mergeh (tmp1, tmp1);
1733 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1734 perm1B = vec_add (perm1A, ones);
1736 height = (height >> 1) - 1;
1738 ref0 = vec_ld (0, ref);
1739 ref1 = vec_ld (8, ref);
1740 prev = vec_ld (0, dest);
1742 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1743 vec_perm (ref0, ref1, perm0B)));
1746 ref0 = vec_ld (0, ref);
1747 ref1 = vec_ld (8, ref);
1749 prev = vec_ld (stride, dest);
1750 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1751 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1753 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1754 vec_perm (ref0, ref1, perm1B)));
1756 ref0 = vec_ld (0, ref);
1757 ref1 = vec_ld (8, ref);
1759 prev = vec_ld (stride, dest);
1760 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1761 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1763 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1764 vec_perm (ref0, ref1, perm0B)));
1767 ref0 = vec_ld (0, ref);
1768 ref1 = vec_ld (8, ref);
1769 prev = vec_ld (stride, dest);
1770 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1771 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1773 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1774 vec_perm (ref0, ref1, perm1B)));
1775 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1776 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1779 void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref,
1780 int stride, int height)
1782 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
1784 perm = vec_lvsl (0, ref);
1786 height = (height >> 1) - 1;
1788 ref0 = vec_ld (0, ref);
1789 ref1 = vec_ld (15, ref);
1791 tmp0 = vec_perm (ref0, ref1, perm);
1792 ref0 = vec_ld (0, ref);
1793 ref1 = vec_ld (15, ref);
1795 prev = vec_ld (0, dest);
1796 tmp1 = vec_perm (ref0, ref1, perm);
1797 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1800 ref0 = vec_ld (0, ref);
1801 ref1 = vec_ld (15, ref);
1803 prev = vec_ld (stride, dest);
1804 vec_st (tmp, 0, dest);
1805 tmp0 = vec_perm (ref0, ref1, perm);
1806 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1808 ref0 = vec_ld (0, ref);
1809 ref1 = vec_ld (15, ref);
1811 prev = vec_ld (2*stride, dest);
1812 vec_st (tmp, stride, dest);
1814 tmp1 = vec_perm (ref0, ref1, perm);
1815 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1818 ref0 = vec_ld (0, ref);
1819 ref1 = vec_ld (15, ref);
1820 prev = vec_ld (stride, dest);
1821 vec_st (tmp, 0, dest);
1822 tmp0 = vec_perm (ref0, ref1, perm);
1823 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1824 vec_st (tmp, stride, dest);
1827 void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref,
1828 int stride, int height)
1830 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
1832 tmp0 = vec_lvsl (0, ref);
1833 tmp0 = vec_mergeh (tmp0, tmp0);
1834 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1835 tmp1 = vec_lvsl (stride, ref);
1836 tmp1 = vec_mergeh (tmp1, tmp1);
1837 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1839 height = (height >> 1) - 1;
1841 ref0 = vec_ld (0, ref);
1842 ref1 = vec_ld (7, ref);
1844 tmp0 = vec_perm (ref0, ref1, perm0);
1845 ref0 = vec_ld (0, ref);
1846 ref1 = vec_ld (7, ref);
1848 prev = vec_ld (0, dest);
1849 tmp1 = vec_perm (ref0, ref1, perm1);
1850 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1853 ref0 = vec_ld (0, ref);
1854 ref1 = vec_ld (7, ref);
1856 prev = vec_ld (stride, dest);
1857 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1858 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1860 tmp0 = vec_perm (ref0, ref1, perm0);
1861 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1863 ref0 = vec_ld (0, ref);
1864 ref1 = vec_ld (7, ref);
1866 prev = vec_ld (stride, dest);
1867 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1868 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1870 tmp1 = vec_perm (ref0, ref1, perm1);
1871 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1874 ref0 = vec_ld (0, ref);
1875 ref1 = vec_ld (7, ref);
1876 prev = vec_ld (stride, dest);
1877 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1878 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1880 tmp0 = vec_perm (ref0, ref1, perm0);
1881 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1882 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1883 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1886 void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1887 int stride, int height)
1889 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1890 vector_u8_t ones, prev;
1892 ones = vec_splat_u8 (1);
1893 permA = vec_lvsl (0, ref);
1894 permB = vec_add (permA, ones);
1896 height = (height >> 1) - 1;
1898 ref0 = vec_ld (0, ref);
1899 ref1 = vec_ld (16, ref);
1901 A = vec_perm (ref0, ref1, permA);
1902 B = vec_perm (ref0, ref1, permB);
1903 avg0 = vec_avg (A, B);
1904 xor0 = vec_xor (A, B);
1906 ref0 = vec_ld (0, ref);
1907 ref1 = vec_ld (16, ref);
1909 prev = vec_ld (0, dest);
1910 A = vec_perm (ref0, ref1, permA);
1911 B = vec_perm (ref0, ref1, permB);
1912 avg1 = vec_avg (A, B);
1913 xor1 = vec_xor (A, B);
1914 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1915 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1916 vec_xor (avg0, avg1))));
1919 ref0 = vec_ld (0, ref);
1920 ref1 = vec_ld (16, ref);
1922 prev = vec_ld (stride, dest);
1923 vec_st (tmp, 0, dest);
1924 A = vec_perm (ref0, ref1, permA);
1925 B = vec_perm (ref0, ref1, permB);
1926 avg0 = vec_avg (A, B);
1927 xor0 = vec_xor (A, B);
1928 tmp = vec_avg (prev,
1929 vec_sub (vec_avg (avg0, avg1),
1930 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1931 vec_xor (avg0, avg1))));
1933 ref0 = vec_ld (0, ref);
1934 ref1 = vec_ld (16, ref);
1936 prev = vec_ld (2*stride, dest);
1937 vec_st (tmp, stride, dest);
1939 A = vec_perm (ref0, ref1, permA);
1940 B = vec_perm (ref0, ref1, permB);
1941 avg1 = vec_avg (A, B);
1942 xor1 = vec_xor (A, B);
1943 tmp = vec_avg (prev,
1944 vec_sub (vec_avg (avg0, avg1),
1945 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1946 vec_xor (avg0, avg1))));
1949 ref0 = vec_ld (0, ref);
1950 ref1 = vec_ld (16, ref);
1951 prev = vec_ld (stride, dest);
1952 vec_st (tmp, 0, dest);
1953 A = vec_perm (ref0, ref1, permA);
1954 B = vec_perm (ref0, ref1, permB);
1955 avg0 = vec_avg (A, B);
1956 xor0 = vec_xor (A, B);
1957 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1958 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1959 vec_xor (avg0, avg1))));
1960 vec_st (tmp, stride, dest);
1963 void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1964 int stride, int height)
1966 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1967 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
1969 ones = vec_splat_u8 (1);
1970 perm0A = vec_lvsl (0, ref);
1971 perm0A = vec_mergeh (perm0A, perm0A);
1972 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1973 perm0B = vec_add (perm0A, ones);
1974 perm1A = vec_lvsl (stride, ref);
1975 perm1A = vec_mergeh (perm1A, perm1A);
1976 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1977 perm1B = vec_add (perm1A, ones);
1979 height = (height >> 1) - 1;
1981 ref0 = vec_ld (0, ref);
1982 ref1 = vec_ld (8, ref);
1984 A = vec_perm (ref0, ref1, perm0A);
1985 B = vec_perm (ref0, ref1, perm0B);
1986 avg0 = vec_avg (A, B);
1987 xor0 = vec_xor (A, B);
1989 ref0 = vec_ld (0, ref);
1990 ref1 = vec_ld (8, ref);
1992 prev = vec_ld (0, dest);
1993 A = vec_perm (ref0, ref1, perm1A);
1994 B = vec_perm (ref0, ref1, perm1B);
1995 avg1 = vec_avg (A, B);
1996 xor1 = vec_xor (A, B);
1997 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1998 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1999 vec_xor (avg0, avg1))));
2002 ref0 = vec_ld (0, ref);
2003 ref1 = vec_ld (8, ref);
2005 prev = vec_ld (stride, dest);
2006 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2007 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2009 A = vec_perm (ref0, ref1, perm0A);
2010 B = vec_perm (ref0, ref1, perm0B);
2011 avg0 = vec_avg (A, B);
2012 xor0 = vec_xor (A, B);
2013 tmp = vec_avg (prev,
2014 vec_sub (vec_avg (avg0, avg1),
2015 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2016 vec_xor (avg0, avg1))));
2018 ref0 = vec_ld (0, ref);
2019 ref1 = vec_ld (8, ref);
2021 prev = vec_ld (stride, dest);
2022 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2023 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2025 A = vec_perm (ref0, ref1, perm1A);
2026 B = vec_perm (ref0, ref1, perm1B);
2027 avg1 = vec_avg (A, B);
2028 xor1 = vec_xor (A, B);
2029 tmp = vec_avg (prev,
2030 vec_sub (vec_avg (avg0, avg1),
2031 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2032 vec_xor (avg0, avg1))));
2035 ref0 = vec_ld (0, ref);
2036 ref1 = vec_ld (8, ref);
2037 prev = vec_ld (stride, dest);
2038 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2039 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2041 A = vec_perm (ref0, ref1, perm0A);
2042 B = vec_perm (ref0, ref1, perm0B);
2043 avg0 = vec_avg (A, B);
2044 xor0 = vec_xor (A, B);
2045 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
2046 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2047 vec_xor (avg0, avg1))));
2048 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2049 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2052 #endif /* CAN_COMPILE_C_ALTIVEC || __BUILD_ALTIVEC_ASM__ */
2053 #ifndef __BUILD_ALTIVEC_ASM__
2055 /*****************************************************************************
2056 * Functions exported as capabilities. They are declared as static so that
2057 * we don't pollute the namespace too much.
2058 *****************************************************************************/
2059 static void motion_getfunctions( function_list_t * p_function_list )
2061 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *,
2065 /* Copying functions */
2068 MC_put_o_16_altivec, MC_put_x_16_altivec, MC_put_y_16_altivec, MC_put_xy_16_altivec
2072 MC_put_o_8_altivec, MC_put_x_8_altivec, MC_put_y_8_altivec, MC_put_xy_8_altivec
2076 /* Averaging functions */
2079 MC_avg_o_16_altivec, MC_avg_x_16_altivec, MC_avg_y_16_altivec, MC_avg_xy_16_altivec
2083 MC_avg_o_8_altivec, MC_avg_x_8_altivec, MC_avg_y_8_altivec, MC_avg_xy_8_altivec
2088 #define list p_function_list->functions.motion
2089 memcpy( list.ppppf_motion, ppppf_motion, sizeof( void * ) * 16 );
2095 #endif /* __BUILD_ALTIVEC_ASM__ */