1 /*****************************************************************************
2 * memcpyaltivec.c : Altivec memcpy module
3 *****************************************************************************
4 * Copyright (C) 2001 VideoLAN
5 * $Id: memcpyaltivec.c,v 1.4 2002/04/16 23:00:54 massiot Exp $
7 * Authors: Christophe Massiot <massiot@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
24 #ifndef __BUILD_ALTIVEC_ASM__
26 /*****************************************************************************
28 *****************************************************************************/
32 #include <videolan/vlc.h>
34 /*****************************************************************************
35 * Local and extern prototypes.
36 *****************************************************************************/
37 static void memcpy_getfunctions( function_list_t * p_function_list );
38 void * _M( fast_memcpy ) ( void * to, const void * from, size_t len );
40 /*****************************************************************************
41 * Build configuration tree.
42 *****************************************************************************/
47 SET_DESCRIPTION( "Altivec memcpy module" )
48 ADD_CAPABILITY( MEMCPY, 100 )
49 ADD_REQUIREMENT( ALTIVEC )
50 ADD_SHORTCUT( "altivec" )
51 ADD_SHORTCUT( "memcpyaltivec" )
55 memcpy_getfunctions( &p_module->p_functions->memcpy );
58 MODULE_DEACTIVATE_START
59 MODULE_DEACTIVATE_STOP
61 /* Following functions are local */
63 /*****************************************************************************
64 * Functions exported as capabilities. They are declared as static so that
65 * we don't pollute the namespace too much.
66 *****************************************************************************/
67 static void memcpy_getfunctions( function_list_t * p_function_list )
69 p_function_list->functions.memcpy.pf_memcpy = _M( fast_memcpy );
73 # define _M( toto ) toto
74 typedef unsigned long size_t;
75 #endif /* __BUILD_ALTIVEC_ASM__ */
77 #if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
79 #define vector_s16_t vector signed short
80 #define vector_u16_t vector unsigned short
81 #define vector_s8_t vector signed char
82 #define vector_u8_t vector unsigned char
83 #define vector_s32_t vector signed int
84 #define vector_u32_t vector unsigned int
87 #define SMALL_MEMCPY(to, from, len) \
89 unsigned char * end = to + len; \
96 void * _M( fast_memcpy )(void * _to, const void * _from, size_t len)
99 unsigned char * to = (unsigned char *)_to;
100 unsigned char * from = (unsigned char *)_from;
104 /* Align destination to MMREG_SIZE -boundary */
105 register unsigned long int delta;
107 delta = ((unsigned long)to)&(MMREG_SIZE-1);
110 delta = MMREG_SIZE - delta;
112 SMALL_MEMCPY(to, from, delta);
115 if( len & ~(MMREG_SIZE-1) )
117 vector_u8_t perm, ref0, ref1, tmp;
119 perm = vec_lvsl( 0, from );
120 ref0 = vec_ld( 0, from );
121 ref1 = vec_ld( 15, from );
124 tmp = vec_perm( ref0, ref1, perm );
125 while( len & ~(MMREG_SIZE-1) )
127 ref0 = vec_ld( 0, from );
128 ref1 = vec_ld( 15, from );
131 vec_st( tmp, 0, to );
132 tmp = vec_perm( ref0, ref1, perm );
135 vec_st( tmp, 0, to );
142 SMALL_MEMCPY( to, from, len );
150 #if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
153 * The asm code is generated with:
155 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S * memcpyaltivec.c
157 * sed 's/.L/._L/g' memcpyaltivec.s |
158 * awk '{args=""; len=split ($2, arg, ",");
159 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
160 * args = args sprintf ("%-6s", a) }
161 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
165 void * _M( fast_memcpy )(void * _to, const void * _from, size_t len)
168 " cmplwi %cr0, %r5, 16 \n"
171 " andi. %r0, %r3, 15 \n"
173 " subfic %r0, %r0, 16 \n"
174 " add %r11, %r3, %r0 \n"
175 " cmplw %cr0, %r3, %r11 \n"
176 " subf %r5, %r0, %r5 \n"
179 " lbz %r0, 0(%r4) \n"
180 " stb %r0, 0(%r9) \n"
181 " addi %r9, %r9, 1 \n"
182 " cmplw %cr0, %r9, %r11 \n"
183 " addi %r4, %r4, 1 \n"
186 " rlwinm. %r0, %r5, 0, 0, 27 \n"
188 " addi %r5, %r5, -16 \n"
190 " lvsl %v12, 0, %r4 \n"
191 " lvx %v1, 0, %r4 \n"
192 " lvx %v0, %r11, %r4 \n"
193 " rlwinm. %r0, %r5, 0, 0, 27 \n"
194 " vperm %v13, %v1, %v0, %v12 \n"
195 " addi %r4, %r4, 16 \n"
196 " bc 12, 2, ._L11 \n"
198 " addi %r5, %r5, -16 \n"
200 " lvx %v1, 0, %r4 \n"
201 " lvx %v0, %r11, %r4 \n"
202 " rlwinm. %r0, %r5, 0, 0, 27 \n"
203 " stvx %v13, 0, %r9 \n"
204 " vperm %v13, %v1, %v0, %v12 \n"
205 " addi %r4, %r4, 16 \n"
206 " addi %r9, %r9, 16 \n"
209 " stvx %v13, 0, %r9 \n"
210 " addi %r9, %r9, 16 \n"
212 " cmpwi %cr0, %r5, 0 \n"
214 " add %r5, %r9, %r5 \n"
215 " cmplw %cr0, %r9, %r5 \n"
218 " lbz %r0, 0(%r4) \n"
219 " stb %r0, 0(%r9) \n"
220 " addi %r9, %r9, 1 \n"
221 " cmplw %cr0, %r9, %r5 \n"
222 " addi %r4, %r4, 1 \n"
223 " bc 12, 0, ._L17 \n"