]> git.sesse.net Git - vlc/blob - plugins/memcpy/memcpyaltivec.c
44a389d7fff2423f3b6caef8d0c01608d938eb16
[vlc] / plugins / memcpy / memcpyaltivec.c
1 /*****************************************************************************
2  * memcpyaltivec.c : Altivec memcpy module
3  *****************************************************************************
4  * Copyright (C) 2001 VideoLAN
5  * $Id: memcpyaltivec.c,v 1.5 2002/04/19 13:56:11 sam Exp $
6  *
7  * Authors: Christophe Massiot <massiot@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  * 
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #ifndef __BUILD_ALTIVEC_ASM__
25
26 /*****************************************************************************
27  * Preamble
28  *****************************************************************************/
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include <videolan/vlc.h>
33
34 /*****************************************************************************
35  * Local and extern prototypes.
36  *****************************************************************************/
37 static void memcpy_getfunctions( function_list_t * p_function_list );
38 void *      _M( fast_memcpy )  ( void * to, const void * from, size_t len );
39
40 /*****************************************************************************
41  * Build configuration tree.
42  *****************************************************************************/
43 MODULE_CONFIG_START
44 MODULE_CONFIG_STOP
45
46 MODULE_INIT_START
47     SET_DESCRIPTION( _("AltiVec memcpy module") )
48     ADD_CAPABILITY( MEMCPY, 100 )
49     ADD_REQUIREMENT( ALTIVEC )
50     ADD_SHORTCUT( "altivec" )
51     ADD_SHORTCUT( "memcpyaltivec" )
52 MODULE_INIT_STOP
53
54 MODULE_ACTIVATE_START
55     memcpy_getfunctions( &p_module->p_functions->memcpy );
56 MODULE_ACTIVATE_STOP
57
58 MODULE_DEACTIVATE_START
59 MODULE_DEACTIVATE_STOP
60
61 /* Following functions are local */
62
63 /*****************************************************************************
64  * Functions exported as capabilities. They are declared as static so that
65  * we don't pollute the namespace too much.
66  *****************************************************************************/
67 static void memcpy_getfunctions( function_list_t * p_function_list )
68 {
69     p_function_list->functions.memcpy.pf_memcpy = _M( fast_memcpy );
70 }
71
72 #else
73 #   define _M( toto ) toto
74 typedef unsigned long size_t;
75 #endif /* __BUILD_ALTIVEC_ASM__ */
76
77 #if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
78
79 #define vector_s16_t vector signed short
80 #define vector_u16_t vector unsigned short
81 #define vector_s8_t vector signed char
82 #define vector_u8_t vector unsigned char
83 #define vector_s32_t vector signed int
84 #define vector_u32_t vector unsigned int
85 #define MMREG_SIZE 16
86
87 #define SMALL_MEMCPY(to, from, len)                                         \
88 {                                                                           \
89     unsigned char * end = to + len;                                         \
90     while( to < end )                                                       \
91     {                                                                       \
92         *to++ = *from++;                                                    \
93     }                                                                       \
94 }
95
96 void * _M( fast_memcpy )(void * _to, const void * _from, size_t len)
97 {
98     void * retval = _to;
99     unsigned char * to = (unsigned char *)_to;
100     unsigned char * from = (unsigned char *)_from;
101
102     if( len > 16 )
103     {
104         /* Align destination to MMREG_SIZE -boundary */
105         register unsigned long int delta;
106
107         delta = ((unsigned long)to)&(MMREG_SIZE-1);
108         if( delta )
109         {
110             delta = MMREG_SIZE - delta;
111             len -= delta;
112             SMALL_MEMCPY(to, from, delta);
113         }
114
115         if( len & ~(MMREG_SIZE-1) )
116         {
117             vector_u8_t perm, ref0, ref1, tmp;
118
119             perm = vec_lvsl( 0, from );
120             ref0 = vec_ld( 0, from );
121             ref1 = vec_ld( 15, from );
122             from += 16;
123             len -= 16;
124             tmp = vec_perm( ref0, ref1, perm );
125             while( len & ~(MMREG_SIZE-1) )
126             {
127                 ref0 = vec_ld( 0, from );
128                 ref1 = vec_ld( 15, from );
129                 from += 16;
130                 len -= 16;
131                 vec_st( tmp, 0, to );
132                 tmp = vec_perm( ref0, ref1, perm );
133                 to += 16;
134             }
135             vec_st( tmp, 0, to );
136             to += 16;
137         }
138     }
139
140     if( len )
141     {
142         SMALL_MEMCPY( to, from, len );
143     }
144
145     return retval;
146 }
147
148 #endif
149
150 #if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
151
152 /*
153  * The asm code is generated with:
154  *
155  * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S *      memcpyaltivec.c
156  *
157  * sed 's/.L/._L/g' memcpyaltivec.s |
158  * awk '{args=""; len=split ($2, arg, ",");
159  *      for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
160  *                               args = args sprintf ("%-6s", a) }
161  *      printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
162  * unexpand -a
163  */
164
165 void * _M( fast_memcpy )(void * _to, const void * _from, size_t len)
166 {
167     asm ("                                              \n"                     
168         "       cmplwi          %cr0, %r5,  16          \n"
169         "       mr              %r9,  %r3               \n"
170         "       bc              4,    1,    ._L3        \n"
171         "       andi.           %r0,  %r3,  15          \n"
172         "       bc              12,   2,    ._L4        \n"
173         "       subfic          %r0,  %r0,  16          \n"
174         "       add             %r11, %r3,  %r0         \n"
175         "       cmplw           %cr0, %r3,  %r11        \n"
176         "       subf            %r5,  %r0,  %r5         \n"
177         "       bc              4,    0,    ._L4        \n"
178         "       ._L7:                                   \n"
179         "       lbz             %r0,  0(%r4)            \n"
180         "       stb             %r0,  0(%r9)            \n"
181         "       addi            %r9,  %r9,  1           \n"
182         "       cmplw           %cr0, %r9,  %r11        \n"
183         "       addi            %r4,  %r4,  1           \n"
184         "       bc              12,   0,    ._L7        \n"
185         "       ._L4:                                   \n"
186         "       rlwinm.         %r0,  %r5,  0,    0,    27    \n"
187         "       bc              12,   2,    ._L3        \n"
188         "       addi            %r5,  %r5,  -16         \n"
189         "       li              %r11, 15                \n"
190         "       lvsl            %v12, 0,    %r4         \n"
191         "       lvx             %v1,  0,    %r4         \n"
192         "       lvx             %v0,  %r11, %r4         \n"
193         "       rlwinm.         %r0,  %r5,  0,    0,    27    \n"
194         "       vperm           %v13, %v1,  %v0,  %v12  \n"
195         "       addi            %r4,  %r4,  16          \n"
196         "       bc              12,   2,    ._L11       \n"
197         "       ._L12:                                  \n"
198         "       addi            %r5,  %r5,  -16         \n"
199         "       li              %r11, 15                \n"
200         "       lvx             %v1,  0,    %r4         \n"
201         "       lvx             %v0,  %r11, %r4         \n"
202         "       rlwinm.         %r0,  %r5,  0,    0,    27    \n"
203         "       stvx            %v13, 0,    %r9         \n"
204         "       vperm           %v13, %v1,  %v0,  %v12  \n"
205         "       addi            %r4,  %r4,  16          \n"
206         "       addi            %r9,  %r9,  16          \n"
207         "       bc              4,    2,    ._L12       \n"
208         "       ._L11:                                  \n"
209         "       stvx            %v13, 0,    %r9         \n"
210         "       addi            %r9,  %r9,  16          \n"
211         "       ._L3:                                   \n"
212         "       cmpwi           %cr0, %r5,  0           \n"
213         "       bclr            12,   2                 \n"
214         "       add             %r5,  %r9,  %r5         \n"
215         "       cmplw           %cr0, %r9,  %r5         \n"
216         "       bclr            4,    0                 \n"
217         "       ._L17:                                  \n"
218         "       lbz             %r0,  0(%r4)            \n"
219         "       stb             %r0,  0(%r9)            \n"
220         "       addi            %r9,  %r9,  1           \n"
221         "       cmplw           %cr0, %r9,  %r5         \n"
222         "       addi            %r4,  %r4,  1           \n"
223         "       bc              12,   0,    ._L17       \n"
224         );
225 }
226
227 #endif