1 /*****************************************************************************
2 * motionaltivec.c : AltiVec motion compensation module for vlc
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: motionaltivec.c,v 1.2 2003/03/30 18:14:37 gbazin Exp $
7 * Authors: Michel Lespinasse <walken@zoy.org>
8 * Paul Mackerras <paulus@linuxcare.com.au>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
25 #ifndef __BUILD_ALTIVEC_ASM__
27 /*****************************************************************************
29 *****************************************************************************/
32 #include <stdlib.h> /* malloc(), free() */
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h> /* int16_t .. */
38 /*****************************************************************************
40 *****************************************************************************/
41 static int Open ( vlc_object_t * );
43 /*****************************************************************************
45 *****************************************************************************/
47 set_description( _("AltiVec motion compensation") );
48 set_capability( "motion compensation", 150 );
49 add_requirement( ALTIVEC );
50 add_shortcut( "altivec" );
51 set_callbacks( Open, NULL );
54 /*****************************************************************************
55 * Motion compensation in AltiVec
56 *****************************************************************************/
58 #ifndef CAN_COMPILE_C_ALTIVEC
61 * The asm code is generated with:
63 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S
64 * motion_comp_altivec.c
66 * sed 's/.L/._L/g' motion_comp_altivec.s |
67 * awk '{args=""; len=split ($2, arg, ",");
68 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
69 * args = args sprintf ("%-6s", a) }
70 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
74 static void MC_put_o_16_altivec (uint8_t * dest, uint8_t * ref,
75 int stride, int height)
78 " srawi %r6, %r6, 1 \n"
80 " addi %r6, %r6, -1 \n"
81 " lvsl %v12, 0, %r4 \n"
84 " lvx %v0, %r9, %r4 \n"
85 " add %r0, %r5, %r5 \n"
86 " vperm %v13, %v1, %v0, %v12 \n"
87 " add %r4, %r4, %r5 \n"
91 " lvx %v0, %r9, %r4 \n"
92 " stvx %v13, 0, %r3 \n"
93 " vperm %v13, %v1, %v0, %v12 \n"
94 " add %r4, %r4, %r5 \n"
96 " lvx %v0, %r9, %r4 \n"
97 " stvx %v13, %r5, %r3 \n"
98 " vperm %v13, %v1, %v0, %v12 \n"
99 " add %r4, %r4, %r5 \n"
100 " add %r3, %r3, %r0 \n"
102 " lvx %v0, %r9, %r4 \n"
103 " lvx %v1, 0, %r4 \n"
104 " stvx %v13, 0, %r3 \n"
105 " vperm %v13, %v1, %v0, %v12 \n"
106 " stvx %v13, %r5, %r3 \n"
110 static void MC_put_o_8_altivec (uint8_t * dest, uint8_t * ref,
111 int stride, int height)
114 " lvsl %v12, 0, %r4 \n"
115 " lvsl %v1, %r5, %r4 \n"
116 " vmrghb %v12, %v12, %v12 \n"
117 " srawi %r6, %r6, 1 \n"
119 " vmrghb %v1, %v1, %v1 \n"
120 " addi %r6, %r6, -1 \n"
121 " vpkuhum %v10, %v12, %v12 \n"
122 " lvx %v13, 0, %r4 \n"
124 " vpkuhum %v11, %v1, %v1 \n"
125 " lvx %v0, %r9, %r4 \n"
126 " add %r4, %r4, %r5 \n"
127 " vperm %v12, %v13, %v0, %v10 \n"
130 " lvx %v0, %r9, %r4 \n"
131 " lvx %v13, 0, %r4 \n"
132 " stvewx %v12, 0, %r3 \n"
134 " vperm %v1, %v13, %v0, %v11 \n"
135 " stvewx %v12, %r9, %r3 \n"
136 " add %r4, %r4, %r5 \n"
138 " lvx %v0, %r9, %r4 \n"
139 " lvx %v13, 0, %r4 \n"
140 " add %r3, %r3, %r5 \n"
141 " stvewx %v1, 0, %r3 \n"
142 " vperm %v12, %v13, %v0, %v10 \n"
144 " stvewx %v1, %r9, %r3 \n"
145 " add %r4, %r4, %r5 \n"
146 " add %r3, %r3, %r5 \n"
149 " lvx %v0, %r9, %r4 \n"
150 " lvx %v13, 0, %r4 \n"
151 " stvewx %v12, 0, %r3 \n"
153 " vperm %v1, %v13, %v0, %v11 \n"
154 " stvewx %v12, %r9, %r3 \n"
155 " add %r3, %r3, %r5 \n"
156 " stvewx %v1, 0, %r3 \n"
157 " stvewx %v1, %r9, %r3 \n"
161 static void MC_put_x_16_altivec (uint8_t * dest, uint8_t * ref,
162 int stride, int height)
165 " lvsl %v11, 0, %r4 \n"
166 " vspltisb %v0, 1 \n"
168 " lvx %v12, 0, %r4 \n"
169 " vaddubm %v10, %v11, %v0 \n"
170 " lvx %v13, %r9, %r4 \n"
171 " srawi %r6, %r6, 1 \n"
172 " addi %r6, %r6, -1 \n"
173 " vperm %v1, %v12, %v13, %v10 \n"
174 " vperm %v0, %v12, %v13, %v11 \n"
176 " add %r0, %r5, %r5 \n"
177 " add %r4, %r4, %r5 \n"
178 " vavgub %v0, %v0, %v1 \n"
181 " lvx %v12, 0, %r4 \n"
182 " lvx %v13, %r9, %r4 \n"
183 " stvx %v0, 0, %r3 \n"
184 " vperm %v1, %v12, %v13, %v10 \n"
185 " add %r4, %r4, %r5 \n"
186 " vperm %v0, %v12, %v13, %v11 \n"
187 " lvx %v12, 0, %r4 \n"
188 " lvx %v13, %r9, %r4 \n"
189 " vavgub %v0, %v0, %v1 \n"
190 " stvx %v0, %r5, %r3 \n"
191 " vperm %v1, %v12, %v13, %v10 \n"
192 " add %r4, %r4, %r5 \n"
193 " vperm %v0, %v12, %v13, %v11 \n"
194 " add %r3, %r3, %r0 \n"
195 " vavgub %v0, %v0, %v1 \n"
197 " lvx %v13, %r9, %r4 \n"
198 " lvx %v12, 0, %r4 \n"
199 " stvx %v0, 0, %r3 \n"
200 " vperm %v1, %v12, %v13, %v10 \n"
201 " vperm %v0, %v12, %v13, %v11 \n"
202 " vavgub %v0, %v0, %v1 \n"
203 " stvx %v0, %r5, %r3 \n"
207 static void MC_put_x_8_altivec (uint8_t * dest, uint8_t * ref,
208 int stride, int height)
211 " lvsl %v0, 0, %r4 \n"
212 " vspltisb %v13, 1 \n"
213 " lvsl %v10, %r5, %r4 \n"
214 " vmrghb %v0, %v0, %v0 \n"
216 " lvx %v11, 0, %r4 \n"
217 " vmrghb %v10, %v10, %v10 \n"
218 " vpkuhum %v8, %v0, %v0 \n"
219 " lvx %v12, %r9, %r4 \n"
220 " srawi %r6, %r6, 1 \n"
221 " vpkuhum %v9, %v10, %v10 \n"
222 " vaddubm %v7, %v8, %v13 \n"
223 " addi %r6, %r6, -1 \n"
224 " vperm %v1, %v11, %v12, %v8 \n"
226 " vaddubm %v13, %v9, %v13 \n"
227 " add %r4, %r4, %r5 \n"
228 " vperm %v0, %v11, %v12, %v7 \n"
229 " vavgub %v0, %v1, %v0 \n"
232 " lvx %v12, %r9, %r4 \n"
233 " lvx %v11, 0, %r4 \n"
234 " stvewx %v0, 0, %r3 \n"
236 " vperm %v1, %v11, %v12, %v13 \n"
237 " stvewx %v0, %r9, %r3 \n"
238 " vperm %v0, %v11, %v12, %v9 \n"
239 " add %r4, %r4, %r5 \n"
241 " lvx %v12, %r9, %r4 \n"
242 " vavgub %v10, %v0, %v1 \n"
243 " lvx %v11, 0, %r4 \n"
244 " add %r3, %r3, %r5 \n"
245 " stvewx %v10, 0, %r3 \n"
246 " vperm %v1, %v11, %v12, %v7 \n"
247 " vperm %v0, %v11, %v12, %v8 \n"
249 " stvewx %v10, %r9, %r3 \n"
250 " add %r4, %r4, %r5 \n"
251 " vavgub %v0, %v0, %v1 \n"
252 " add %r3, %r3, %r5 \n"
255 " lvx %v12, %r9, %r4 \n"
256 " lvx %v11, 0, %r4 \n"
257 " stvewx %v0, 0, %r3 \n"
259 " vperm %v1, %v11, %v12, %v13 \n"
260 " stvewx %v0, %r9, %r3 \n"
261 " vperm %v0, %v11, %v12, %v9 \n"
262 " add %r3, %r3, %r5 \n"
263 " vavgub %v10, %v0, %v1 \n"
264 " stvewx %v10, 0, %r3 \n"
265 " stvewx %v10, %r9, %r3 \n"
269 static void MC_put_y_16_altivec (uint8_t * dest, uint8_t * ref,
270 int stride, int height)
274 " lvsl %v10, 0, %r4 \n"
275 " lvx %v13, 0, %r4 \n"
276 " lvx %v1, %r9, %r4 \n"
277 " add %r4, %r4, %r5 \n"
278 " vperm %v12, %v13, %v1, %v10 \n"
279 " srawi %r6, %r6, 1 \n"
280 " lvx %v13, 0, %r4 \n"
281 " lvx %v1, %r9, %r4 \n"
282 " addi %r6, %r6, -1 \n"
283 " vperm %v11, %v13, %v1, %v10 \n"
285 " add %r0, %r5, %r5 \n"
286 " add %r4, %r4, %r5 \n"
287 " vavgub %v0, %v12, %v11 \n"
290 " lvx %v13, 0, %r4 \n"
291 " lvx %v1, %r9, %r4 \n"
292 " stvx %v0, 0, %r3 \n"
293 " vperm %v12, %v13, %v1, %v10 \n"
294 " add %r4, %r4, %r5 \n"
295 " lvx %v13, 0, %r4 \n"
296 " lvx %v1, %r9, %r4 \n"
297 " vavgub %v0, %v12, %v11 \n"
298 " stvx %v0, %r5, %r3 \n"
299 " vperm %v11, %v13, %v1, %v10 \n"
300 " add %r4, %r4, %r5 \n"
301 " add %r3, %r3, %r0 \n"
302 " vavgub %v0, %v12, %v11 \n"
304 " lvx %v1, %r9, %r4 \n"
305 " lvx %v13, 0, %r4 \n"
306 " stvx %v0, 0, %r3 \n"
307 " vperm %v12, %v13, %v1, %v10 \n"
308 " vavgub %v0, %v12, %v11 \n"
309 " stvx %v0, %r5, %r3 \n"
313 static void MC_put_y_8_altivec (uint8_t * dest, uint8_t * ref,
314 int stride, int height)
317 " lvsl %v13, 0, %r4 \n"
318 " lvsl %v11, %r5, %r4 \n"
319 " vmrghb %v13, %v13, %v13 \n"
321 " lvx %v12, 0, %r4 \n"
322 " vmrghb %v11, %v11, %v11 \n"
323 " lvx %v1, %r9, %r4 \n"
324 " vpkuhum %v9, %v13, %v13 \n"
325 " add %r4, %r4, %r5 \n"
326 " vpkuhum %v10, %v11, %v11 \n"
327 " vperm %v13, %v12, %v1, %v9 \n"
328 " srawi %r6, %r6, 1 \n"
329 " lvx %v12, 0, %r4 \n"
330 " lvx %v1, %r9, %r4 \n"
331 " addi %r6, %r6, -1 \n"
332 " vperm %v11, %v12, %v1, %v10 \n"
334 " add %r4, %r4, %r5 \n"
335 " vavgub %v0, %v13, %v11 \n"
338 " lvx %v1, %r9, %r4 \n"
339 " lvx %v12, 0, %r4 \n"
340 " stvewx %v0, 0, %r3 \n"
342 " vperm %v13, %v12, %v1, %v9 \n"
343 " stvewx %v0, %r9, %r3 \n"
344 " add %r4, %r4, %r5 \n"
345 " vavgub %v0, %v13, %v11 \n"
347 " lvx %v1, %r9, %r4 \n"
348 " lvx %v12, 0, %r4 \n"
349 " add %r3, %r3, %r5 \n"
350 " stvewx %v0, 0, %r3 \n"
351 " vperm %v11, %v12, %v1, %v10 \n"
353 " stvewx %v0, %r9, %r3 \n"
354 " vavgub %v0, %v13, %v11 \n"
355 " add %r4, %r4, %r5 \n"
356 " add %r3, %r3, %r5 \n"
359 " lvx %v1, %r9, %r4 \n"
360 " lvx %v12, 0, %r4 \n"
361 " stvewx %v0, 0, %r3 \n"
363 " vperm %v13, %v12, %v1, %v9 \n"
364 " stvewx %v0, %r9, %r3 \n"
365 " add %r3, %r3, %r5 \n"
366 " vavgub %v0, %v13, %v11 \n"
367 " stvewx %v0, 0, %r3 \n"
368 " stvewx %v0, %r9, %r3 \n"
372 static void MC_put_xy_16_altivec (uint8_t * dest, uint8_t * ref,
373 int stride, int height)
376 " lvsl %v5, 0, %r4 \n"
377 " vspltisb %v3, 1 \n"
379 " lvx %v1, 0, %r4 \n"
380 " vaddubm %v4, %v5, %v3 \n"
381 " lvx %v0, %r9, %r4 \n"
382 " add %r4, %r4, %r5 \n"
383 " vperm %v10, %v1, %v0, %v4 \n"
384 " srawi %r6, %r6, 1 \n"
385 " vperm %v11, %v1, %v0, %v5 \n"
386 " addi %r6, %r6, -1 \n"
387 " lvx %v1, 0, %r4 \n"
389 " lvx %v0, %r9, %r4 \n"
390 " vavgub %v9, %v11, %v10 \n"
391 " vxor %v8, %v11, %v10 \n"
392 " add %r0, %r5, %r5 \n"
393 " vperm %v10, %v1, %v0, %v4 \n"
394 " add %r4, %r4, %r5 \n"
395 " vperm %v11, %v1, %v0, %v5 \n"
396 " vxor %v6, %v11, %v10 \n"
397 " vavgub %v7, %v11, %v10 \n"
398 " vor %v0, %v8, %v6 \n"
399 " vxor %v13, %v9, %v7 \n"
400 " vand %v0, %v3, %v0 \n"
401 " vavgub %v1, %v9, %v7 \n"
402 " vand %v0, %v0, %v13 \n"
403 " vsububm %v13, %v1, %v0 \n"
406 " lvx %v1, 0, %r4 \n"
407 " lvx %v0, %r9, %r4 \n"
408 " stvx %v13, 0, %r3 \n"
409 " vperm %v10, %v1, %v0, %v4 \n"
410 " add %r4, %r4, %r5 \n"
411 " vperm %v11, %v1, %v0, %v5 \n"
412 " lvx %v1, 0, %r4 \n"
413 " lvx %v0, %r9, %r4 \n"
414 " vavgub %v9, %v11, %v10 \n"
415 " vxor %v8, %v11, %v10 \n"
416 " add %r4, %r4, %r5 \n"
417 " vperm %v10, %v1, %v0, %v4 \n"
418 " vavgub %v12, %v9, %v7 \n"
419 " vperm %v11, %v1, %v0, %v5 \n"
420 " vor %v13, %v8, %v6 \n"
421 " vxor %v0, %v9, %v7 \n"
422 " vxor %v6, %v11, %v10 \n"
423 " vand %v13, %v3, %v13 \n"
424 " vavgub %v7, %v11, %v10 \n"
425 " vor %v1, %v8, %v6 \n"
426 " vand %v13, %v13, %v0 \n"
427 " vxor %v0, %v9, %v7 \n"
428 " vand %v1, %v3, %v1 \n"
429 " vsububm %v13, %v12, %v13 \n"
430 " vand %v1, %v1, %v0 \n"
431 " stvx %v13, %r5, %r3 \n"
432 " vavgub %v0, %v9, %v7 \n"
433 " add %r3, %r3, %r0 \n"
434 " vsububm %v13, %v0, %v1 \n"
436 " lvx %v0, %r9, %r4 \n"
437 " lvx %v1, 0, %r4 \n"
438 " stvx %v13, 0, %r3 \n"
439 " vperm %v10, %v1, %v0, %v4 \n"
440 " vperm %v11, %v1, %v0, %v5 \n"
441 " vxor %v8, %v11, %v10 \n"
442 " vavgub %v9, %v11, %v10 \n"
443 " vor %v0, %v8, %v6 \n"
444 " vxor %v13, %v9, %v7 \n"
445 " vand %v0, %v3, %v0 \n"
446 " vavgub %v1, %v9, %v7 \n"
447 " vand %v0, %v0, %v13 \n"
448 " vsububm %v13, %v1, %v0 \n"
449 " stvx %v13, %r5, %r3 \n"
453 static void MC_put_xy_8_altivec (uint8_t * dest, uint8_t * ref,
454 int stride, int height)
457 " lvsl %v4, 0, %r4 \n"
458 " vspltisb %v3, 1 \n"
459 " lvsl %v5, %r5, %r4 \n"
460 " vmrghb %v4, %v4, %v4 \n"
462 " vmrghb %v5, %v5, %v5 \n"
463 " lvx %v1, 0, %r4 \n"
464 " vpkuhum %v4, %v4, %v4 \n"
465 " lvx %v0, %r9, %r4 \n"
466 " vpkuhum %v5, %v5, %v5 \n"
467 " add %r4, %r4, %r5 \n"
468 " vaddubm %v2, %v4, %v3 \n"
469 " vperm %v11, %v1, %v0, %v4 \n"
470 " srawi %r6, %r6, 1 \n"
471 " vaddubm %v19, %v5, %v3 \n"
472 " addi %r6, %r6, -1 \n"
473 " vperm %v10, %v1, %v0, %v2 \n"
475 " lvx %v1, 0, %r4 \n"
476 " lvx %v0, %r9, %r4 \n"
477 " vavgub %v9, %v11, %v10 \n"
478 " vxor %v8, %v11, %v10 \n"
479 " add %r4, %r4, %r5 \n"
480 " vperm %v10, %v1, %v0, %v19 \n"
481 " vperm %v11, %v1, %v0, %v5 \n"
482 " vxor %v6, %v11, %v10 \n"
483 " vavgub %v7, %v11, %v10 \n"
484 " vor %v0, %v8, %v6 \n"
485 " vxor %v13, %v9, %v7 \n"
486 " vand %v0, %v3, %v0 \n"
487 " vavgub %v1, %v9, %v7 \n"
488 " vand %v0, %v0, %v13 \n"
489 " vsububm %v13, %v1, %v0 \n"
492 " lvx %v0, %r9, %r4 \n"
493 " lvx %v1, 0, %r4 \n"
494 " stvewx %v13, 0, %r3 \n"
496 " vperm %v10, %v1, %v0, %v2 \n"
497 " stvewx %v13, %r9, %r3 \n"
498 " vperm %v11, %v1, %v0, %v4 \n"
499 " add %r4, %r4, %r5 \n"
501 " vavgub %v9, %v11, %v10 \n"
502 " lvx %v0, %r9, %r4 \n"
503 " vxor %v8, %v11, %v10 \n"
504 " lvx %v1, 0, %r4 \n"
505 " vavgub %v12, %v9, %v7 \n"
506 " vor %v13, %v8, %v6 \n"
507 " add %r3, %r3, %r5 \n"
508 " vperm %v10, %v1, %v0, %v19 \n"
510 " vperm %v11, %v1, %v0, %v5 \n"
511 " vand %v13, %v3, %v13 \n"
512 " add %r4, %r4, %r5 \n"
513 " vxor %v0, %v9, %v7 \n"
514 " vxor %v6, %v11, %v10 \n"
515 " vavgub %v7, %v11, %v10 \n"
516 " vor %v1, %v8, %v6 \n"
517 " vand %v13, %v13, %v0 \n"
518 " vxor %v0, %v9, %v7 \n"
519 " vand %v1, %v3, %v1 \n"
520 " vsububm %v13, %v12, %v13 \n"
521 " vand %v1, %v1, %v0 \n"
522 " stvewx %v13, 0, %r3 \n"
523 " vavgub %v0, %v9, %v7 \n"
524 " stvewx %v13, %r9, %r3 \n"
525 " add %r3, %r3, %r5 \n"
526 " vsububm %v13, %v0, %v1 \n"
529 " lvx %v0, %r9, %r4 \n"
530 " lvx %v1, 0, %r4 \n"
531 " stvewx %v13, 0, %r3 \n"
532 " vperm %v10, %v1, %v0, %v2 \n"
534 " vperm %v11, %v1, %v0, %v4 \n"
535 " stvewx %v13, %r9, %r3 \n"
536 " add %r3, %r3, %r5 \n"
537 " vxor %v8, %v11, %v10 \n"
538 " vavgub %v9, %v11, %v10 \n"
539 " vor %v0, %v8, %v6 \n"
540 " vxor %v13, %v9, %v7 \n"
541 " vand %v0, %v3, %v0 \n"
542 " vavgub %v1, %v9, %v7 \n"
543 " vand %v0, %v0, %v13 \n"
544 " vsububm %v13, %v1, %v0 \n"
545 " stvewx %v13, 0, %r3 \n"
546 " stvewx %v13, %r9, %r3 \n"
550 static void MC_avg_o_16_altivec (uint8_t * dest, uint8_t * ref,
551 int stride, int height)
555 " lvx %v0, %r9, %r4 \n"
556 " lvsl %v11, 0, %r4 \n"
557 " lvx %v1, 0, %r4 \n"
558 " srawi %r6, %r6, 1 \n"
559 " addi %r6, %r6, -1 \n"
560 " vperm %v0, %v1, %v0, %v11 \n"
561 " lvx %v13, 0, %r3 \n"
563 " add %r9, %r5, %r5 \n"
564 " vavgub %v12, %v13, %v0 \n"
565 " add %r4, %r4, %r5 \n"
568 " lvx %v1, 0, %r4 \n"
569 " lvx %v0, %r11, %r4 \n"
570 " lvx %v13, %r5, %r3 \n"
571 " vperm %v0, %v1, %v0, %v11 \n"
572 " stvx %v12, 0, %r3 \n"
573 " add %r4, %r4, %r5 \n"
574 " vavgub %v12, %v13, %v0 \n"
575 " lvx %v1, 0, %r4 \n"
576 " lvx %v0, %r11, %r4 \n"
577 " lvx %v13, %r9, %r3 \n"
578 " vperm %v0, %v1, %v0, %v11 \n"
579 " stvx %v12, %r5, %r3 \n"
580 " add %r4, %r4, %r5 \n"
581 " vavgub %v12, %v13, %v0 \n"
582 " add %r3, %r3, %r9 \n"
584 " lvx %v0, %r11, %r4 \n"
585 " lvx %v1, 0, %r4 \n"
586 " lvx %v13, %r5, %r3 \n"
587 " vperm %v0, %v1, %v0, %v11 \n"
588 " stvx %v12, 0, %r3 \n"
589 " vavgub %v12, %v13, %v0 \n"
590 " stvx %v12, %r5, %r3 \n"
594 static void MC_avg_o_8_altivec (uint8_t * dest, uint8_t * ref,
595 int stride, int height)
598 " lvsl %v12, 0, %r4 \n"
600 " vmrghb %v12, %v12, %v12 \n"
601 " lvsl %v1, %r5, %r4 \n"
602 " lvx %v13, 0, %r4 \n"
603 " vpkuhum %v9, %v12, %v12 \n"
604 " lvx %v0, %r9, %r4 \n"
605 " srawi %r6, %r6, 1 \n"
606 " vmrghb %v1, %v1, %v1 \n"
607 " addi %r6, %r6, -1 \n"
608 " vperm %v0, %v13, %v0, %v9 \n"
609 " lvx %v11, 0, %r3 \n"
611 " vpkuhum %v10, %v1, %v1 \n"
612 " add %r4, %r4, %r5 \n"
613 " vavgub %v12, %v11, %v0 \n"
616 " lvx %v0, %r9, %r4 \n"
617 " lvx %v13, 0, %r4 \n"
618 " lvx %v11, %r5, %r3 \n"
619 " stvewx %v12, 0, %r3 \n"
620 " vperm %v0, %v13, %v0, %v10 \n"
622 " stvewx %v12, %r9, %r3 \n"
623 " vavgub %v1, %v11, %v0 \n"
624 " add %r4, %r4, %r5 \n"
626 " lvx %v0, %r9, %r4 \n"
627 " add %r3, %r3, %r5 \n"
628 " lvx %v13, 0, %r4 \n"
629 " lvx %v11, %r5, %r3 \n"
630 " stvewx %v1, 0, %r3 \n"
631 " vperm %v0, %v13, %v0, %v9 \n"
633 " stvewx %v1, %r9, %r3 \n"
634 " vavgub %v12, %v11, %v0 \n"
635 " add %r4, %r4, %r5 \n"
636 " add %r3, %r3, %r5 \n"
639 " lvx %v0, %r9, %r4 \n"
640 " lvx %v13, 0, %r4 \n"
641 " lvx %v11, %r5, %r3 \n"
642 " stvewx %v12, 0, %r3 \n"
643 " vperm %v0, %v13, %v0, %v10 \n"
645 " stvewx %v12, %r9, %r3 \n"
646 " vavgub %v1, %v11, %v0 \n"
647 " add %r3, %r3, %r5 \n"
648 " stvewx %v1, 0, %r3 \n"
649 " stvewx %v1, %r9, %r3 \n"
653 static void MC_avg_x_16_altivec (uint8_t * dest, uint8_t * ref,
654 int stride, int height)
657 " lvsl %v8, 0, %r4 \n"
658 " vspltisb %v0, 1 \n"
660 " lvx %v12, %r9, %r4 \n"
661 " vaddubm %v7, %v8, %v0 \n"
662 " lvx %v11, 0, %r4 \n"
663 " srawi %r6, %r6, 1 \n"
664 " vperm %v1, %v11, %v12, %v7 \n"
665 " addi %r6, %r6, -1 \n"
666 " vperm %v0, %v11, %v12, %v8 \n"
667 " lvx %v9, 0, %r3 \n"
669 " add %r9, %r5, %r5 \n"
670 " vavgub %v0, %v0, %v1 \n"
671 " add %r4, %r4, %r5 \n"
672 " vavgub %v10, %v9, %v0 \n"
675 " lvx %v11, 0, %r4 \n"
676 " lvx %v12, %r11, %r4 \n"
677 " lvx %v9, %r5, %r3 \n"
678 " stvx %v10, 0, %r3 \n"
679 " vperm %v0, %v11, %v12, %v7 \n"
680 " add %r4, %r4, %r5 \n"
681 " vperm %v1, %v11, %v12, %v8 \n"
682 " lvx %v11, 0, %r4 \n"
683 " lvx %v12, %r11, %r4 \n"
684 " vavgub %v1, %v1, %v0 \n"
685 " add %r4, %r4, %r5 \n"
686 " vperm %v13, %v11, %v12, %v7 \n"
687 " vavgub %v10, %v9, %v1 \n"
688 " vperm %v0, %v11, %v12, %v8 \n"
689 " lvx %v9, %r9, %r3 \n"
690 " stvx %v10, %r5, %r3 \n"
691 " vavgub %v0, %v0, %v13 \n"
692 " add %r3, %r3, %r9 \n"
693 " vavgub %v10, %v9, %v0 \n"
695 " lvx %v12, %r11, %r4 \n"
696 " lvx %v11, 0, %r4 \n"
697 " lvx %v9, %r5, %r3 \n"
698 " vperm %v1, %v11, %v12, %v7 \n"
699 " stvx %v10, 0, %r3 \n"
700 " vperm %v0, %v11, %v12, %v8 \n"
701 " vavgub %v0, %v0, %v1 \n"
702 " vavgub %v10, %v9, %v0 \n"
703 " stvx %v10, %r5, %r3 \n"
707 static void MC_avg_x_8_altivec (uint8_t * dest, uint8_t * ref,
708 int stride, int height)
711 " lvsl %v10, 0, %r4 \n"
712 " vspltisb %v13, 1 \n"
714 " vmrghb %v10, %v10, %v10 \n"
715 " lvx %v11, 0, %r4 \n"
716 " lvx %v12, %r9, %r4 \n"
717 " vpkuhum %v7, %v10, %v10 \n"
718 " srawi %r6, %r6, 1 \n"
719 " lvsl %v10, %r5, %r4 \n"
720 " vaddubm %v6, %v7, %v13 \n"
721 " vperm %v0, %v11, %v12, %v7 \n"
722 " addi %r6, %r6, -1 \n"
723 " vmrghb %v10, %v10, %v10 \n"
724 " lvx %v9, 0, %r3 \n"
726 " vperm %v1, %v11, %v12, %v6 \n"
727 " add %r4, %r4, %r5 \n"
728 " vpkuhum %v8, %v10, %v10 \n"
729 " vavgub %v0, %v0, %v1 \n"
730 " vaddubm %v13, %v8, %v13 \n"
731 " vavgub %v10, %v9, %v0 \n"
734 " lvx %v12, %r9, %r4 \n"
735 " lvx %v11, 0, %r4 \n"
736 " lvx %v9, %r5, %r3 \n"
737 " stvewx %v10, 0, %r3 \n"
738 " vperm %v1, %v11, %v12, %v13 \n"
739 " vperm %v0, %v11, %v12, %v8 \n"
741 " stvewx %v10, %r9, %r3 \n"
742 " add %r4, %r4, %r5 \n"
743 " vavgub %v0, %v0, %v1 \n"
745 " lvx %v12, %r9, %r4 \n"
746 " vavgub %v10, %v9, %v0 \n"
747 " lvx %v11, 0, %r4 \n"
748 " add %r3, %r3, %r5 \n"
749 " vperm %v1, %v11, %v12, %v6 \n"
750 " lvx %v9, %r5, %r3 \n"
751 " vperm %v0, %v11, %v12, %v7 \n"
752 " stvewx %v10, 0, %r3 \n"
754 " vavgub %v0, %v0, %v1 \n"
755 " stvewx %v10, %r9, %r3 \n"
756 " add %r4, %r4, %r5 \n"
757 " add %r3, %r3, %r5 \n"
758 " vavgub %v10, %v9, %v0 \n"
761 " lvx %v12, %r9, %r4 \n"
762 " lvx %v11, 0, %r4 \n"
763 " lvx %v9, %r5, %r3 \n"
764 " vperm %v1, %v11, %v12, %v13 \n"
765 " stvewx %v10, 0, %r3 \n"
766 " vperm %v0, %v11, %v12, %v8 \n"
768 " stvewx %v10, %r9, %r3 \n"
769 " vavgub %v0, %v0, %v1 \n"
770 " add %r3, %r3, %r5 \n"
771 " vavgub %v10, %v9, %v0 \n"
772 " stvewx %v10, 0, %r3 \n"
773 " stvewx %v10, %r9, %r3 \n"
777 static void MC_avg_y_16_altivec (uint8_t * dest, uint8_t * ref,
778 int stride, int height)
782 " lvx %v1, %r9, %r4 \n"
783 " lvsl %v9, 0, %r4 \n"
784 " lvx %v13, 0, %r4 \n"
785 " add %r4, %r4, %r5 \n"
786 " vperm %v11, %v13, %v1, %v9 \n"
788 " lvx %v13, 0, %r4 \n"
789 " lvx %v1, %r11, %r4 \n"
790 " srawi %r6, %r6, 1 \n"
791 " vperm %v10, %v13, %v1, %v9 \n"
792 " addi %r6, %r6, -1 \n"
793 " lvx %v12, 0, %r3 \n"
795 " vavgub %v0, %v11, %v10 \n"
796 " add %r9, %r5, %r5 \n"
797 " add %r4, %r4, %r5 \n"
798 " vavgub %v0, %v12, %v0 \n"
801 " lvx %v13, 0, %r4 \n"
802 " lvx %v1, %r11, %r4 \n"
803 " lvx %v12, %r5, %r3 \n"
804 " vperm %v11, %v13, %v1, %v9 \n"
805 " stvx %v0, 0, %r3 \n"
806 " add %r4, %r4, %r5 \n"
807 " vavgub %v0, %v11, %v10 \n"
808 " lvx %v13, 0, %r4 \n"
809 " lvx %v1, %r11, %r4 \n"
810 " vavgub %v0, %v12, %v0 \n"
811 " add %r4, %r4, %r5 \n"
812 " lvx %v12, %r9, %r3 \n"
813 " vperm %v10, %v13, %v1, %v9 \n"
814 " stvx %v0, %r5, %r3 \n"
815 " vavgub %v0, %v11, %v10 \n"
816 " add %r3, %r3, %r9 \n"
817 " vavgub %v0, %v12, %v0 \n"
819 " lvx %v1, %r11, %r4 \n"
820 " lvx %v13, 0, %r4 \n"
821 " lvx %v12, %r5, %r3 \n"
822 " vperm %v11, %v13, %v1, %v9 \n"
823 " stvx %v0, 0, %r3 \n"
824 " vavgub %v0, %v11, %v10 \n"
825 " vavgub %v0, %v12, %v0 \n"
826 " stvx %v0, %r5, %r3 \n"
830 static void MC_avg_y_8_altivec (uint8_t * dest, uint8_t * ref,
831 int stride, int height)
834 " lvsl %v12, 0, %r4 \n"
835 " lvsl %v9, %r5, %r4 \n"
836 " vmrghb %v12, %v12, %v12 \n"
838 " lvx %v11, 0, %r4 \n"
839 " vmrghb %v9, %v9, %v9 \n"
840 " lvx %v13, %r9, %r4 \n"
841 " vpkuhum %v7, %v12, %v12 \n"
842 " add %r4, %r4, %r5 \n"
843 " vpkuhum %v8, %v9, %v9 \n"
844 " vperm %v12, %v11, %v13, %v7 \n"
845 " srawi %r6, %r6, 1 \n"
846 " lvx %v11, 0, %r4 \n"
847 " lvx %v13, %r9, %r4 \n"
848 " addi %r6, %r6, -1 \n"
849 " vperm %v9, %v11, %v13, %v8 \n"
850 " lvx %v10, 0, %r3 \n"
852 " add %r4, %r4, %r5 \n"
853 " vavgub %v0, %v12, %v9 \n"
854 " vavgub %v1, %v10, %v0 \n"
857 " lvx %v13, %r9, %r4 \n"
858 " lvx %v11, 0, %r4 \n"
859 " lvx %v10, %r5, %r3 \n"
860 " stvewx %v1, 0, %r3 \n"
861 " vperm %v12, %v11, %v13, %v7 \n"
863 " stvewx %v1, %r9, %r3 \n"
864 " vavgub %v0, %v12, %v9 \n"
865 " add %r4, %r4, %r5 \n"
867 " vavgub %v1, %v10, %v0 \n"
868 " lvx %v13, %r9, %r4 \n"
869 " lvx %v11, 0, %r4 \n"
870 " add %r3, %r3, %r5 \n"
871 " vperm %v9, %v11, %v13, %v8 \n"
872 " lvx %v10, %r5, %r3 \n"
873 " stvewx %v1, 0, %r3 \n"
874 " vavgub %v0, %v12, %v9 \n"
876 " stvewx %v1, %r9, %r3 \n"
877 " add %r4, %r4, %r5 \n"
878 " vavgub %v1, %v10, %v0 \n"
879 " add %r3, %r3, %r5 \n"
882 " lvx %v13, %r9, %r4 \n"
883 " lvx %v11, 0, %r4 \n"
884 " lvx %v10, %r5, %r3 \n"
885 " vperm %v12, %v11, %v13, %v7 \n"
886 " stvewx %v1, 0, %r3 \n"
888 " vavgub %v0, %v12, %v9 \n"
889 " stvewx %v1, %r9, %r3 \n"
890 " add %r3, %r3, %r5 \n"
891 " vavgub %v1, %v10, %v0 \n"
892 " stvewx %v1, 0, %r3 \n"
893 " stvewx %v1, %r9, %r3 \n"
897 static void MC_avg_xy_16_altivec (uint8_t * dest, uint8_t * ref,
898 int stride, int height)
901 " lvsl %v4, 0, %r4 \n"
902 " vspltisb %v2, 1 \n"
904 " lvx %v1, %r9, %r4 \n"
905 " vaddubm %v3, %v4, %v2 \n"
906 " lvx %v13, 0, %r4 \n"
907 " add %r4, %r4, %r5 \n"
908 " vperm %v10, %v13, %v1, %v3 \n"
910 " vperm %v11, %v13, %v1, %v4 \n"
911 " srawi %r6, %r6, 1 \n"
912 " lvx %v13, 0, %r4 \n"
913 " lvx %v1, %r11, %r4 \n"
914 " vavgub %v9, %v11, %v10 \n"
915 " vxor %v8, %v11, %v10 \n"
916 " addi %r6, %r6, -1 \n"
917 " vperm %v10, %v13, %v1, %v3 \n"
918 " lvx %v6, 0, %r3 \n"
920 " vperm %v11, %v13, %v1, %v4 \n"
921 " add %r9, %r5, %r5 \n"
922 " add %r4, %r4, %r5 \n"
923 " vxor %v5, %v11, %v10 \n"
924 " vavgub %v7, %v11, %v10 \n"
925 " vor %v1, %v8, %v5 \n"
926 " vxor %v13, %v9, %v7 \n"
927 " vand %v1, %v2, %v1 \n"
928 " vavgub %v0, %v9, %v7 \n"
929 " vand %v1, %v1, %v13 \n"
930 " vsububm %v0, %v0, %v1 \n"
931 " vavgub %v12, %v6, %v0 \n"
934 " lvx %v13, 0, %r4 \n"
935 " lvx %v1, %r11, %r4 \n"
936 " lvx %v6, %r5, %r3 \n"
937 " stvx %v12, 0, %r3 \n"
938 " vperm %v10, %v13, %v1, %v3 \n"
939 " vperm %v11, %v13, %v1, %v4 \n"
940 " add %r4, %r4, %r5 \n"
941 " lvx %v13, 0, %r4 \n"
942 " lvx %v1, %r11, %r4 \n"
943 " vavgub %v9, %v11, %v10 \n"
944 " vxor %v8, %v11, %v10 \n"
945 " add %r4, %r4, %r5 \n"
946 " vperm %v10, %v13, %v1, %v3 \n"
947 " vavgub %v12, %v9, %v7 \n"
948 " vperm %v11, %v13, %v1, %v4 \n"
949 " vor %v0, %v8, %v5 \n"
950 " vxor %v13, %v9, %v7 \n"
951 " vxor %v5, %v11, %v10 \n"
952 " vand %v0, %v2, %v0 \n"
953 " vavgub %v7, %v11, %v10 \n"
954 " vor %v1, %v8, %v5 \n"
955 " vand %v0, %v0, %v13 \n"
956 " vand %v1, %v2, %v1 \n"
957 " vxor %v13, %v9, %v7 \n"
958 " vsububm %v12, %v12, %v0 \n"
959 " vand %v1, %v1, %v13 \n"
960 " vavgub %v0, %v9, %v7 \n"
961 " vavgub %v12, %v6, %v12 \n"
962 " lvx %v6, %r9, %r3 \n"
963 " vsububm %v0, %v0, %v1 \n"
964 " stvx %v12, %r5, %r3 \n"
965 " vavgub %v12, %v6, %v0 \n"
966 " add %r3, %r3, %r9 \n"
968 " lvx %v1, %r11, %r4 \n"
969 " lvx %v13, 0, %r4 \n"
970 " lvx %v6, %r5, %r3 \n"
971 " vperm %v10, %v13, %v1, %v3 \n"
972 " stvx %v12, 0, %r3 \n"
973 " vperm %v11, %v13, %v1, %v4 \n"
974 " vxor %v8, %v11, %v10 \n"
975 " vavgub %v9, %v11, %v10 \n"
976 " vor %v0, %v8, %v5 \n"
977 " vxor %v13, %v9, %v7 \n"
978 " vand %v0, %v2, %v0 \n"
979 " vavgub %v1, %v9, %v7 \n"
980 " vand %v0, %v0, %v13 \n"
981 " vsububm %v1, %v1, %v0 \n"
982 " vavgub %v12, %v6, %v1 \n"
983 " stvx %v12, %r5, %r3 \n"
987 static void MC_avg_xy_8_altivec (uint8_t * dest, uint8_t * ref,
988 int stride, int height)
991 " lvsl %v2, 0, %r4 \n"
992 " vspltisb %v19, 1 \n"
993 " lvsl %v3, %r5, %r4 \n"
994 " vmrghb %v2, %v2, %v2 \n"
996 " vmrghb %v3, %v3, %v3 \n"
997 " lvx %v9, 0, %r4 \n"
998 " vpkuhum %v2, %v2, %v2 \n"
999 " lvx %v1, %r9, %r4 \n"
1000 " vpkuhum %v3, %v3, %v3 \n"
1001 " add %r4, %r4, %r5 \n"
1002 " vaddubm %v18, %v2, %v19 \n"
1003 " vperm %v11, %v9, %v1, %v2 \n"
1004 " srawi %r6, %r6, 1 \n"
1005 " vaddubm %v17, %v3, %v19 \n"
1006 " addi %r6, %r6, -1 \n"
1007 " vperm %v10, %v9, %v1, %v18 \n"
1008 " lvx %v4, 0, %r3 \n"
1010 " lvx %v1, %r9, %r4 \n"
1011 " lvx %v9, 0, %r4 \n"
1012 " vavgub %v8, %v11, %v10 \n"
1013 " vxor %v7, %v11, %v10 \n"
1014 " add %r4, %r4, %r5 \n"
1015 " vperm %v10, %v9, %v1, %v17 \n"
1016 " vperm %v11, %v9, %v1, %v3 \n"
1017 " vxor %v5, %v11, %v10 \n"
1018 " vavgub %v6, %v11, %v10 \n"
1019 " vor %v1, %v7, %v5 \n"
1020 " vxor %v13, %v8, %v6 \n"
1021 " vand %v1, %v19, %v1 \n"
1022 " vavgub %v0, %v8, %v6 \n"
1023 " vand %v1, %v1, %v13 \n"
1024 " vsububm %v0, %v0, %v1 \n"
1025 " vavgub %v13, %v4, %v0 \n"
1028 " lvx %v1, %r9, %r4 \n"
1029 " lvx %v9, 0, %r4 \n"
1030 " lvx %v4, %r5, %r3 \n"
1031 " stvewx %v13, 0, %r3 \n"
1032 " vperm %v10, %v9, %v1, %v18 \n"
1033 " vperm %v11, %v9, %v1, %v2 \n"
1035 " stvewx %v13, %r9, %r3 \n"
1036 " vxor %v7, %v11, %v10 \n"
1037 " add %r4, %r4, %r5 \n"
1039 " vavgub %v8, %v11, %v10 \n"
1040 " lvx %v1, %r9, %r4 \n"
1041 " vor %v0, %v7, %v5 \n"
1042 " lvx %v9, 0, %r4 \n"
1043 " vxor %v12, %v8, %v6 \n"
1044 " vand %v0, %v19, %v0 \n"
1045 " add %r3, %r3, %r5 \n"
1046 " vperm %v10, %v9, %v1, %v17 \n"
1047 " vavgub %v13, %v8, %v6 \n"
1049 " vperm %v11, %v9, %v1, %v3 \n"
1050 " vand %v0, %v0, %v12 \n"
1051 " add %r4, %r4, %r5 \n"
1052 " vxor %v5, %v11, %v10 \n"
1053 " vavgub %v6, %v11, %v10 \n"
1054 " vor %v1, %v7, %v5 \n"
1055 " vsububm %v13, %v13, %v0 \n"
1056 " vxor %v0, %v8, %v6 \n"
1057 " vand %v1, %v19, %v1 \n"
1058 " vavgub %v13, %v4, %v13 \n"
1059 " vand %v1, %v1, %v0 \n"
1060 " lvx %v4, %r5, %r3 \n"
1061 " vavgub %v0, %v8, %v6 \n"
1062 " stvewx %v13, 0, %r3 \n"
1063 " stvewx %v13, %r9, %r3 \n"
1064 " vsububm %v0, %v0, %v1 \n"
1065 " add %r3, %r3, %r5 \n"
1066 " vavgub %v13, %v4, %v0 \n"
1069 " lvx %v1, %r9, %r4 \n"
1070 " lvx %v9, 0, %r4 \n"
1071 " lvx %v4, %r5, %r3 \n"
1072 " vperm %v10, %v9, %v1, %v18 \n"
1073 " stvewx %v13, 0, %r3 \n"
1074 " vperm %v11, %v9, %v1, %v2 \n"
1076 " stvewx %v13, %r9, %r3 \n"
1077 " vxor %v7, %v11, %v10 \n"
1078 " add %r3, %r3, %r5 \n"
1079 " vavgub %v8, %v11, %v10 \n"
1080 " vor %v0, %v7, %v5 \n"
1081 " vxor %v13, %v8, %v6 \n"
1082 " vand %v0, %v19, %v0 \n"
1083 " vavgub %v1, %v8, %v6 \n"
1084 " vand %v0, %v0, %v13 \n"
1085 " vsububm %v1, %v1, %v0 \n"
1086 " vavgub %v13, %v4, %v1 \n"
1087 " stvewx %v13, 0, %r3 \n"
1088 " stvewx %v13, %r9, %r3 \n"
1092 #endif /* !CAN_COMPILE_C_ALTIVEC */
1093 #endif /* __BUILD_ALTIVEC_ASM__ */
1095 #if defined(CAN_COMPILE_C_ALTIVEC) || defined(__BUILD_ALTIVEC_ASM__)
1097 #define vector_s16_t vector signed short
1098 #define vector_u16_t vector unsigned short
1099 #define vector_s8_t vector signed char
1100 #define vector_u8_t vector unsigned char
1101 #define vector_s32_t vector signed int
1102 #define vector_u32_t vector unsigned int
1104 void MC_put_o_16_altivec (unsigned char * dest, unsigned char * ref,
1105 int stride, int height)
1107 vector_u8_t perm, ref0, ref1, tmp;
1109 perm = vec_lvsl (0, ref);
1111 height = (height >> 1) - 1;
1113 ref0 = vec_ld (0, ref);
1114 ref1 = vec_ld (15, ref);
1116 tmp = vec_perm (ref0, ref1, perm);
1119 ref0 = vec_ld (0, ref);
1120 ref1 = vec_ld (15, ref);
1122 vec_st (tmp, 0, dest);
1123 tmp = vec_perm (ref0, ref1, perm);
1125 ref0 = vec_ld (0, ref);
1126 ref1 = vec_ld (15, ref);
1128 vec_st (tmp, stride, dest);
1130 tmp = vec_perm (ref0, ref1, perm);
1133 ref0 = vec_ld (0, ref);
1134 ref1 = vec_ld (15, ref);
1135 vec_st (tmp, 0, dest);
1136 tmp = vec_perm (ref0, ref1, perm);
1137 vec_st (tmp, stride, dest);
1140 void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref,
1141 int stride, int height)
1143 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
1145 tmp0 = vec_lvsl (0, ref);
1146 tmp0 = vec_mergeh (tmp0, tmp0);
1147 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1148 tmp1 = vec_lvsl (stride, ref);
1149 tmp1 = vec_mergeh (tmp1, tmp1);
1150 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1152 height = (height >> 1) - 1;
1154 ref0 = vec_ld (0, ref);
1155 ref1 = vec_ld (7, ref);
1157 tmp0 = vec_perm (ref0, ref1, perm0);
1160 ref0 = vec_ld (0, ref);
1161 ref1 = vec_ld (7, ref);
1163 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1164 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1166 tmp1 = vec_perm (ref0, ref1, perm1);
1168 ref0 = vec_ld (0, ref);
1169 ref1 = vec_ld (7, ref);
1171 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1172 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1174 tmp0 = vec_perm (ref0, ref1, perm0);
1177 ref0 = vec_ld (0, ref);
1178 ref1 = vec_ld (7, ref);
1179 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1180 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1182 tmp1 = vec_perm (ref0, ref1, perm1);
1183 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1184 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1187 void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref,
1188 int stride, int height)
1190 vector_u8_t permA, permB, ref0, ref1, tmp;
1192 permA = vec_lvsl (0, ref);
1193 permB = vec_add (permA, vec_splat_u8 (1));
1195 height = (height >> 1) - 1;
1197 ref0 = vec_ld (0, ref);
1198 ref1 = vec_ld (16, ref);
1200 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1201 vec_perm (ref0, ref1, permB));
1204 ref0 = vec_ld (0, ref);
1205 ref1 = vec_ld (16, ref);
1207 vec_st (tmp, 0, dest);
1208 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1209 vec_perm (ref0, ref1, permB));
1211 ref0 = vec_ld (0, ref);
1212 ref1 = vec_ld (16, ref);
1214 vec_st (tmp, stride, dest);
1216 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1217 vec_perm (ref0, ref1, permB));
1220 ref0 = vec_ld (0, ref);
1221 ref1 = vec_ld (16, ref);
1222 vec_st (tmp, 0, dest);
1223 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1224 vec_perm (ref0, ref1, permB));
1225 vec_st (tmp, stride, dest);
1228 void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref,
1229 int stride, int height)
1231 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1233 ones = vec_splat_u8 (1);
1234 tmp0 = vec_lvsl (0, ref);
1235 tmp0 = vec_mergeh (tmp0, tmp0);
1236 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1237 perm0B = vec_add (perm0A, ones);
1238 tmp1 = vec_lvsl (stride, ref);
1239 tmp1 = vec_mergeh (tmp1, tmp1);
1240 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1241 perm1B = vec_add (perm1A, ones);
1243 height = (height >> 1) - 1;
1245 ref0 = vec_ld (0, ref);
1246 ref1 = vec_ld (8, ref);
1248 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1249 vec_perm (ref0, ref1, perm0B));
1252 ref0 = vec_ld (0, ref);
1253 ref1 = vec_ld (8, ref);
1255 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1256 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1258 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1259 vec_perm (ref0, ref1, perm1B));
1261 ref0 = vec_ld (0, ref);
1262 ref1 = vec_ld (8, ref);
1264 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1265 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1267 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
1268 vec_perm (ref0, ref1, perm0B));
1271 ref0 = vec_ld (0, ref);
1272 ref1 = vec_ld (8, ref);
1273 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1274 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1276 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
1277 vec_perm (ref0, ref1, perm1B));
1278 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1279 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1282 void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref,
1283 int stride, int height)
1285 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
1287 perm = vec_lvsl (0, ref);
1289 height = (height >> 1) - 1;
1291 ref0 = vec_ld (0, ref);
1292 ref1 = vec_ld (15, ref);
1294 tmp0 = vec_perm (ref0, ref1, perm);
1295 ref0 = vec_ld (0, ref);
1296 ref1 = vec_ld (15, ref);
1298 tmp1 = vec_perm (ref0, ref1, perm);
1299 tmp = vec_avg (tmp0, tmp1);
1302 ref0 = vec_ld (0, ref);
1303 ref1 = vec_ld (15, ref);
1305 vec_st (tmp, 0, dest);
1306 tmp0 = vec_perm (ref0, ref1, perm);
1307 tmp = vec_avg (tmp0, tmp1);
1309 ref0 = vec_ld (0, ref);
1310 ref1 = vec_ld (15, ref);
1312 vec_st (tmp, stride, dest);
1314 tmp1 = vec_perm (ref0, ref1, perm);
1315 tmp = vec_avg (tmp0, tmp1);
1318 ref0 = vec_ld (0, ref);
1319 ref1 = vec_ld (15, ref);
1320 vec_st (tmp, 0, dest);
1321 tmp0 = vec_perm (ref0, ref1, perm);
1322 tmp = vec_avg (tmp0, tmp1);
1323 vec_st (tmp, stride, dest);
1326 void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref,
1327 int stride, int height)
1329 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
1331 tmp0 = vec_lvsl (0, ref);
1332 tmp0 = vec_mergeh (tmp0, tmp0);
1333 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1334 tmp1 = vec_lvsl (stride, ref);
1335 tmp1 = vec_mergeh (tmp1, tmp1);
1336 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1338 height = (height >> 1) - 1;
1340 ref0 = vec_ld (0, ref);
1341 ref1 = vec_ld (7, ref);
1343 tmp0 = vec_perm (ref0, ref1, perm0);
1344 ref0 = vec_ld (0, ref);
1345 ref1 = vec_ld (7, ref);
1347 tmp1 = vec_perm (ref0, ref1, perm1);
1348 tmp = vec_avg (tmp0, tmp1);
1351 ref0 = vec_ld (0, ref);
1352 ref1 = vec_ld (7, ref);
1354 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1355 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1357 tmp0 = vec_perm (ref0, ref1, perm0);
1358 tmp = vec_avg (tmp0, tmp1);
1360 ref0 = vec_ld (0, ref);
1361 ref1 = vec_ld (7, ref);
1363 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1364 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1366 tmp1 = vec_perm (ref0, ref1, perm1);
1367 tmp = vec_avg (tmp0, tmp1);
1370 ref0 = vec_ld (0, ref);
1371 ref1 = vec_ld (7, ref);
1372 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1373 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1375 tmp0 = vec_perm (ref0, ref1, perm0);
1376 tmp = vec_avg (tmp0, tmp1);
1377 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1378 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1381 void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1382 int stride, int height)
1384 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1387 ones = vec_splat_u8 (1);
1388 permA = vec_lvsl (0, ref);
1389 permB = vec_add (permA, ones);
1391 height = (height >> 1) - 1;
1393 ref0 = vec_ld (0, ref);
1394 ref1 = vec_ld (16, ref);
1396 A = vec_perm (ref0, ref1, permA);
1397 B = vec_perm (ref0, ref1, permB);
1398 avg0 = vec_avg (A, B);
1399 xor0 = vec_xor (A, B);
1401 ref0 = vec_ld (0, ref);
1402 ref1 = vec_ld (16, ref);
1404 A = vec_perm (ref0, ref1, permA);
1405 B = vec_perm (ref0, ref1, permB);
1406 avg1 = vec_avg (A, B);
1407 xor1 = vec_xor (A, B);
1408 tmp = vec_sub (vec_avg (avg0, avg1),
1409 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1410 vec_xor (avg0, avg1)));
1413 ref0 = vec_ld (0, ref);
1414 ref1 = vec_ld (16, ref);
1416 vec_st (tmp, 0, dest);
1417 A = vec_perm (ref0, ref1, permA);
1418 B = vec_perm (ref0, ref1, permB);
1419 avg0 = vec_avg (A, B);
1420 xor0 = vec_xor (A, B);
1421 tmp = vec_sub (vec_avg (avg0, avg1),
1422 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1423 vec_xor (avg0, avg1)));
1425 ref0 = vec_ld (0, ref);
1426 ref1 = vec_ld (16, ref);
1428 vec_st (tmp, stride, dest);
1430 A = vec_perm (ref0, ref1, permA);
1431 B = vec_perm (ref0, ref1, permB);
1432 avg1 = vec_avg (A, B);
1433 xor1 = vec_xor (A, B);
1434 tmp = vec_sub (vec_avg (avg0, avg1),
1435 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1436 vec_xor (avg0, avg1)));
1439 ref0 = vec_ld (0, ref);
1440 ref1 = vec_ld (16, ref);
1441 vec_st (tmp, 0, dest);
1442 A = vec_perm (ref0, ref1, permA);
1443 B = vec_perm (ref0, ref1, permB);
1444 avg0 = vec_avg (A, B);
1445 xor0 = vec_xor (A, B);
1446 tmp = vec_sub (vec_avg (avg0, avg1),
1447 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1448 vec_xor (avg0, avg1)));
1449 vec_st (tmp, stride, dest);
1452 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1453 int stride, int height)
1455 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1456 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
1458 ones = vec_splat_u8 (1);
1459 perm0A = vec_lvsl (0, ref);
1460 perm0A = vec_mergeh (perm0A, perm0A);
1461 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1462 perm0B = vec_add (perm0A, ones);
1463 perm1A = vec_lvsl (stride, ref);
1464 perm1A = vec_mergeh (perm1A, perm1A);
1465 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1466 perm1B = vec_add (perm1A, ones);
1468 height = (height >> 1) - 1;
1470 ref0 = vec_ld (0, ref);
1471 ref1 = vec_ld (8, ref);
1473 A = vec_perm (ref0, ref1, perm0A);
1474 B = vec_perm (ref0, ref1, perm0B);
1475 avg0 = vec_avg (A, B);
1476 xor0 = vec_xor (A, B);
1478 ref0 = vec_ld (0, ref);
1479 ref1 = vec_ld (8, ref);
1481 A = vec_perm (ref0, ref1, perm1A);
1482 B = vec_perm (ref0, ref1, perm1B);
1483 avg1 = vec_avg (A, B);
1484 xor1 = vec_xor (A, B);
1485 tmp = vec_sub (vec_avg (avg0, avg1),
1486 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1487 vec_xor (avg0, avg1)));
1490 ref0 = vec_ld (0, ref);
1491 ref1 = vec_ld (8, ref);
1493 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1494 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1496 A = vec_perm (ref0, ref1, perm0A);
1497 B = vec_perm (ref0, ref1, perm0B);
1498 avg0 = vec_avg (A, B);
1499 xor0 = vec_xor (A, B);
1500 tmp = vec_sub (vec_avg (avg0, avg1),
1501 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1502 vec_xor (avg0, avg1)));
1504 ref0 = vec_ld (0, ref);
1505 ref1 = vec_ld (8, ref);
1507 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1508 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1510 A = vec_perm (ref0, ref1, perm1A);
1511 B = vec_perm (ref0, ref1, perm1B);
1512 avg1 = vec_avg (A, B);
1513 xor1 = vec_xor (A, B);
1514 tmp = vec_sub (vec_avg (avg0, avg1),
1515 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1516 vec_xor (avg0, avg1)));
1519 ref0 = vec_ld (0, ref);
1520 ref1 = vec_ld (8, ref);
1521 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1522 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1524 A = vec_perm (ref0, ref1, perm0A);
1525 B = vec_perm (ref0, ref1, perm0B);
1526 avg0 = vec_avg (A, B);
1527 xor0 = vec_xor (A, B);
1528 tmp = vec_sub (vec_avg (avg0, avg1),
1529 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1530 vec_xor (avg0, avg1)));
1531 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1532 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1536 void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1537 int stride, int height)
1539 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
1540 vector_u16_t splat2, temp;
1542 ones = vec_splat_u8 (1);
1543 permA = vec_lvsl (0, ref);
1544 permB = vec_add (permA, ones);
1546 zero = vec_splat_u8 (0);
1547 splat2 = vec_splat_u16 (2);
1550 ref0 = vec_ld (0, ref);
1551 ref1 = vec_ld (8, ref);
1553 A = vec_perm (ref0, ref1, permA);
1554 B = vec_perm (ref0, ref1, permB);
1555 ref0 = vec_ld (0, ref);
1556 ref1 = vec_ld (8, ref);
1557 C = vec_perm (ref0, ref1, permA);
1558 D = vec_perm (ref0, ref1, permB);
1560 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
1561 (vector_u16_t)vec_mergeh (zero, B)),
1562 vec_add ((vector_u16_t)vec_mergeh (zero, C),
1563 (vector_u16_t)vec_mergeh (zero, D)));
1564 temp = vec_sr (vec_add (temp, splat2), splat2);
1565 tmp = vec_pack (temp, temp);
1567 vec_st (tmp, 0, dest);
1569 tmp = vec_avg (vec_perm (ref0, ref1, permA),
1570 vec_perm (ref0, ref1, permB));
1575 void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref,
1576 int stride, int height)
1578 vector_u8_t perm, ref0, ref1, tmp, prev;
1580 perm = vec_lvsl (0, ref);
1582 height = (height >> 1) - 1;
1584 ref0 = vec_ld (0, ref);
1585 ref1 = vec_ld (15, ref);
1587 prev = vec_ld (0, dest);
1588 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1591 ref0 = vec_ld (0, ref);
1592 ref1 = vec_ld (15, ref);
1594 prev = vec_ld (stride, dest);
1595 vec_st (tmp, 0, dest);
1596 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1598 ref0 = vec_ld (0, ref);
1599 ref1 = vec_ld (15, ref);
1601 prev = vec_ld (2*stride, dest);
1602 vec_st (tmp, stride, dest);
1604 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1607 ref0 = vec_ld (0, ref);
1608 ref1 = vec_ld (15, ref);
1609 prev = vec_ld (stride, dest);
1610 vec_st (tmp, 0, dest);
1611 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
1612 vec_st (tmp, stride, dest);
1615 void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref,
1616 int stride, int height)
1618 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
1620 tmp0 = vec_lvsl (0, ref);
1621 tmp0 = vec_mergeh (tmp0, tmp0);
1622 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1623 tmp1 = vec_lvsl (stride, ref);
1624 tmp1 = vec_mergeh (tmp1, tmp1);
1625 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1627 height = (height >> 1) - 1;
1629 ref0 = vec_ld (0, ref);
1630 ref1 = vec_ld (7, ref);
1632 prev = vec_ld (0, dest);
1633 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1636 ref0 = vec_ld (0, ref);
1637 ref1 = vec_ld (7, ref);
1639 prev = vec_ld (stride, dest);
1640 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1641 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1643 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1645 ref0 = vec_ld (0, ref);
1646 ref1 = vec_ld (7, ref);
1648 prev = vec_ld (stride, dest);
1649 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1650 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1652 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
1655 ref0 = vec_ld (0, ref);
1656 ref1 = vec_ld (7, ref);
1657 prev = vec_ld (stride, dest);
1658 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1659 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1661 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
1662 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1663 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1666 void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref,
1667 int stride, int height)
1669 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
1671 permA = vec_lvsl (0, ref);
1672 permB = vec_add (permA, vec_splat_u8 (1));
1674 height = (height >> 1) - 1;
1676 ref0 = vec_ld (0, ref);
1677 ref1 = vec_ld (16, ref);
1678 prev = vec_ld (0, dest);
1680 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1681 vec_perm (ref0, ref1, permB)));
1684 ref0 = vec_ld (0, ref);
1685 ref1 = vec_ld (16, ref);
1687 prev = vec_ld (stride, dest);
1688 vec_st (tmp, 0, dest);
1689 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1690 vec_perm (ref0, ref1, permB)));
1692 ref0 = vec_ld (0, ref);
1693 ref1 = vec_ld (16, ref);
1695 prev = vec_ld (2*stride, dest);
1696 vec_st (tmp, stride, dest);
1698 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1699 vec_perm (ref0, ref1, permB)));
1702 ref0 = vec_ld (0, ref);
1703 ref1 = vec_ld (16, ref);
1704 prev = vec_ld (stride, dest);
1705 vec_st (tmp, 0, dest);
1706 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
1707 vec_perm (ref0, ref1, permB)));
1708 vec_st (tmp, stride, dest);
1711 void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref,
1712 int stride, int height)
1714 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
1717 ones = vec_splat_u8 (1);
1718 tmp0 = vec_lvsl (0, ref);
1719 tmp0 = vec_mergeh (tmp0, tmp0);
1720 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1721 perm0B = vec_add (perm0A, ones);
1722 tmp1 = vec_lvsl (stride, ref);
1723 tmp1 = vec_mergeh (tmp1, tmp1);
1724 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1725 perm1B = vec_add (perm1A, ones);
1727 height = (height >> 1) - 1;
1729 ref0 = vec_ld (0, ref);
1730 ref1 = vec_ld (8, ref);
1731 prev = vec_ld (0, dest);
1733 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1734 vec_perm (ref0, ref1, perm0B)));
1737 ref0 = vec_ld (0, ref);
1738 ref1 = vec_ld (8, ref);
1740 prev = vec_ld (stride, dest);
1741 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1742 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1744 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1745 vec_perm (ref0, ref1, perm1B)));
1747 ref0 = vec_ld (0, ref);
1748 ref1 = vec_ld (8, ref);
1750 prev = vec_ld (stride, dest);
1751 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1752 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1754 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
1755 vec_perm (ref0, ref1, perm0B)));
1758 ref0 = vec_ld (0, ref);
1759 ref1 = vec_ld (8, ref);
1760 prev = vec_ld (stride, dest);
1761 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
1762 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
1764 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
1765 vec_perm (ref0, ref1, perm1B)));
1766 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
1767 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
1770 void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref,
1771 int stride, int height)
1773 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
1775 perm = vec_lvsl (0, ref);
1777 height = (height >> 1) - 1;
1779 ref0 = vec_ld (0, ref);
1780 ref1 = vec_ld (15, ref);
1782 tmp0 = vec_perm (ref0, ref1, perm);
1783 ref0 = vec_ld (0, ref);
1784 ref1 = vec_ld (15, ref);
1786 prev = vec_ld (0, dest);
1787 tmp1 = vec_perm (ref0, ref1, perm);
1788 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1791 ref0 = vec_ld (0, ref);
1792 ref1 = vec_ld (15, ref);
1794 prev = vec_ld (stride, dest);
1795 vec_st (tmp, 0, dest);
1796 tmp0 = vec_perm (ref0, ref1, perm);
1797 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1799 ref0 = vec_ld (0, ref);
1800 ref1 = vec_ld (15, ref);
1802 prev = vec_ld (2*stride, dest);
1803 vec_st (tmp, stride, dest);
1805 tmp1 = vec_perm (ref0, ref1, perm);
1806 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1809 ref0 = vec_ld (0, ref);
1810 ref1 = vec_ld (15, ref);
1811 prev = vec_ld (stride, dest);
1812 vec_st (tmp, 0, dest);
1813 tmp0 = vec_perm (ref0, ref1, perm);
1814 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1815 vec_st (tmp, stride, dest);
1818 void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref,
1819 int stride, int height)
1821 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
1823 tmp0 = vec_lvsl (0, ref);
1824 tmp0 = vec_mergeh (tmp0, tmp0);
1825 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
1826 tmp1 = vec_lvsl (stride, ref);
1827 tmp1 = vec_mergeh (tmp1, tmp1);
1828 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
1830 height = (height >> 1) - 1;
1832 ref0 = vec_ld (0, ref);
1833 ref1 = vec_ld (7, ref);
1835 tmp0 = vec_perm (ref0, ref1, perm0);
1836 ref0 = vec_ld (0, ref);
1837 ref1 = vec_ld (7, ref);
1839 prev = vec_ld (0, dest);
1840 tmp1 = vec_perm (ref0, ref1, perm1);
1841 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1844 ref0 = vec_ld (0, ref);
1845 ref1 = vec_ld (7, ref);
1847 prev = vec_ld (stride, dest);
1848 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1849 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1851 tmp0 = vec_perm (ref0, ref1, perm0);
1852 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1854 ref0 = vec_ld (0, ref);
1855 ref1 = vec_ld (7, ref);
1857 prev = vec_ld (stride, dest);
1858 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1859 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1861 tmp1 = vec_perm (ref0, ref1, perm1);
1862 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1865 ref0 = vec_ld (0, ref);
1866 ref1 = vec_ld (7, ref);
1867 prev = vec_ld (stride, dest);
1868 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1869 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1871 tmp0 = vec_perm (ref0, ref1, perm0);
1872 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
1873 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1874 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
1877 void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref,
1878 int stride, int height)
1880 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
1881 vector_u8_t ones, prev;
1883 ones = vec_splat_u8 (1);
1884 permA = vec_lvsl (0, ref);
1885 permB = vec_add (permA, ones);
1887 height = (height >> 1) - 1;
1889 ref0 = vec_ld (0, ref);
1890 ref1 = vec_ld (16, ref);
1892 A = vec_perm (ref0, ref1, permA);
1893 B = vec_perm (ref0, ref1, permB);
1894 avg0 = vec_avg (A, B);
1895 xor0 = vec_xor (A, B);
1897 ref0 = vec_ld (0, ref);
1898 ref1 = vec_ld (16, ref);
1900 prev = vec_ld (0, dest);
1901 A = vec_perm (ref0, ref1, permA);
1902 B = vec_perm (ref0, ref1, permB);
1903 avg1 = vec_avg (A, B);
1904 xor1 = vec_xor (A, B);
1905 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1906 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1907 vec_xor (avg0, avg1))));
1910 ref0 = vec_ld (0, ref);
1911 ref1 = vec_ld (16, ref);
1913 prev = vec_ld (stride, dest);
1914 vec_st (tmp, 0, dest);
1915 A = vec_perm (ref0, ref1, permA);
1916 B = vec_perm (ref0, ref1, permB);
1917 avg0 = vec_avg (A, B);
1918 xor0 = vec_xor (A, B);
1919 tmp = vec_avg (prev,
1920 vec_sub (vec_avg (avg0, avg1),
1921 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1922 vec_xor (avg0, avg1))));
1924 ref0 = vec_ld (0, ref);
1925 ref1 = vec_ld (16, ref);
1927 prev = vec_ld (2*stride, dest);
1928 vec_st (tmp, stride, dest);
1930 A = vec_perm (ref0, ref1, permA);
1931 B = vec_perm (ref0, ref1, permB);
1932 avg1 = vec_avg (A, B);
1933 xor1 = vec_xor (A, B);
1934 tmp = vec_avg (prev,
1935 vec_sub (vec_avg (avg0, avg1),
1936 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1937 vec_xor (avg0, avg1))));
1940 ref0 = vec_ld (0, ref);
1941 ref1 = vec_ld (16, ref);
1942 prev = vec_ld (stride, dest);
1943 vec_st (tmp, 0, dest);
1944 A = vec_perm (ref0, ref1, permA);
1945 B = vec_perm (ref0, ref1, permB);
1946 avg0 = vec_avg (A, B);
1947 xor0 = vec_xor (A, B);
1948 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1949 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1950 vec_xor (avg0, avg1))));
1951 vec_st (tmp, stride, dest);
1954 void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref,
1955 int stride, int height)
1957 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
1958 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
1960 ones = vec_splat_u8 (1);
1961 perm0A = vec_lvsl (0, ref);
1962 perm0A = vec_mergeh (perm0A, perm0A);
1963 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
1964 perm0B = vec_add (perm0A, ones);
1965 perm1A = vec_lvsl (stride, ref);
1966 perm1A = vec_mergeh (perm1A, perm1A);
1967 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
1968 perm1B = vec_add (perm1A, ones);
1970 height = (height >> 1) - 1;
1972 ref0 = vec_ld (0, ref);
1973 ref1 = vec_ld (8, ref);
1975 A = vec_perm (ref0, ref1, perm0A);
1976 B = vec_perm (ref0, ref1, perm0B);
1977 avg0 = vec_avg (A, B);
1978 xor0 = vec_xor (A, B);
1980 ref0 = vec_ld (0, ref);
1981 ref1 = vec_ld (8, ref);
1983 prev = vec_ld (0, dest);
1984 A = vec_perm (ref0, ref1, perm1A);
1985 B = vec_perm (ref0, ref1, perm1B);
1986 avg1 = vec_avg (A, B);
1987 xor1 = vec_xor (A, B);
1988 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
1989 vec_and (vec_and (ones, vec_or (xor0, xor1)),
1990 vec_xor (avg0, avg1))));
1993 ref0 = vec_ld (0, ref);
1994 ref1 = vec_ld (8, ref);
1996 prev = vec_ld (stride, dest);
1997 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
1998 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2000 A = vec_perm (ref0, ref1, perm0A);
2001 B = vec_perm (ref0, ref1, perm0B);
2002 avg0 = vec_avg (A, B);
2003 xor0 = vec_xor (A, B);
2004 tmp = vec_avg (prev,
2005 vec_sub (vec_avg (avg0, avg1),
2006 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2007 vec_xor (avg0, avg1))));
2009 ref0 = vec_ld (0, ref);
2010 ref1 = vec_ld (8, ref);
2012 prev = vec_ld (stride, dest);
2013 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2014 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2016 A = vec_perm (ref0, ref1, perm1A);
2017 B = vec_perm (ref0, ref1, perm1B);
2018 avg1 = vec_avg (A, B);
2019 xor1 = vec_xor (A, B);
2020 tmp = vec_avg (prev,
2021 vec_sub (vec_avg (avg0, avg1),
2022 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2023 vec_xor (avg0, avg1))));
2026 ref0 = vec_ld (0, ref);
2027 ref1 = vec_ld (8, ref);
2028 prev = vec_ld (stride, dest);
2029 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2030 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2032 A = vec_perm (ref0, ref1, perm0A);
2033 B = vec_perm (ref0, ref1, perm0B);
2034 avg0 = vec_avg (A, B);
2035 xor0 = vec_xor (A, B);
2036 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
2037 vec_and (vec_and (ones, vec_or (xor0, xor1)),
2038 vec_xor (avg0, avg1))));
2039 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
2040 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
2043 #endif /* CAN_COMPILE_C_ALTIVEC || __BUILD_ALTIVEC_ASM__ */
2044 #ifndef __BUILD_ALTIVEC_ASM__
2046 /*****************************************************************************
2047 * Functions exported as capabilities. They are declared as static so that
2048 * we don't pollute the namespace too much.
2049 *****************************************************************************/
2050 static void (* ppppf_motion[2][2][4])( yuv_data_t *, yuv_data_t *, int, int ) =
2052 /* Copying functions */
2055 { MC_put_o_16_altivec, MC_put_x_16_altivec, MC_put_y_16_altivec, MC_put_xy_16_altivec },
2057 { MC_put_o_8_altivec, MC_put_x_8_altivec, MC_put_y_8_altivec, MC_put_xy_8_altivec }
2059 /* Averaging functions */
2062 { MC_avg_o_16_altivec, MC_avg_x_16_altivec, MC_avg_y_16_altivec, MC_avg_xy_16_altivec },
2064 { MC_avg_o_8_altivec, MC_avg_x_8_altivec, MC_avg_y_8_altivec, MC_avg_xy_8_altivec }
2068 static int Open ( vlc_object_t *p_this )
2070 p_this->p_private = ppppf_motion;
2074 #endif /* __BUILD_ALTIVEC_ASM__ */