]> git.sesse.net Git - vlc/blobdiff - modules/altivec/memcpy.c
Move Altivec memcpy to altivec directory
[vlc] / modules / altivec / memcpy.c
diff --git a/modules/altivec/memcpy.c b/modules/altivec/memcpy.c
new file mode 100644 (file)
index 0000000..2d91ea2
--- /dev/null
@@ -0,0 +1,223 @@
+/*****************************************************************************
+ * memcpy.c : AltiVec memcpy module
+ *****************************************************************************
+ * Copyright (C) 2001 the VideoLAN team
+ * $Id$
+ *
+ * Author: Christophe Massiot <massiot@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef __BUILD_ALTIVEC_ASM__
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_cpu.h>
+
+#ifdef HAVE_ALTIVEC_H
+#   include <altivec.h>
+#endif
+
+/*****************************************************************************
+ * Local prototypes.
+ *****************************************************************************/
+static void * fast_memcpy ( void * to, const void * from, size_t len );
+
+/*****************************************************************************
+ * Module initializer.
+ *****************************************************************************/
+static int Activate ( vlc_object_t *p_this )
+{
+    VLC_UNUSED(p_this);
+    vlc_fastmem_register( fast_memcpy, NULL );
+    return VLC_SUCCESS;
+}
+
+/*****************************************************************************
+ * Module descriptor.
+ *****************************************************************************/
+vlc_module_begin ()
+    set_description( N_("AltiVec memcpy") )
+    set_category( CAT_ADVANCED )
+    set_subcategory( SUBCAT_ADVANCED_MISC )
+    set_capability( "memcpy", 100 )
+    set_callbacks( Activate, NULL )
+    add_shortcut( "altivec" )
+vlc_module_end ()
+
+#else
+typedef unsigned long size_t;
+#endif /* __BUILD_ALTIVEC_ASM__ */
+
+#if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
+
+#define vector_s16_t vector signed short
+#define vector_u16_t vector unsigned short
+#define vector_s8_t vector signed char
+#define vector_u8_t vector unsigned char
+#define vector_s32_t vector signed int
+#define vector_u32_t vector unsigned int
+#define MMREG_SIZE 16
+
+#define SMALL_MEMCPY(to, from, len)                                         \
+{                                                                           \
+    unsigned char * end = to + len;                                         \
+    while( to < end )                                                       \
+    {                                                                       \
+        *to++ = *from++;                                                    \
+    }                                                                       \
+}
+
+static void * fast_memcpy( void * _to, const void * _from, size_t len )
+{
+    void * retval = _to;
+    unsigned char * to = (unsigned char *)_to;
+    unsigned char * from = (unsigned char *)_from;
+
+    if( len > 16 )
+    {
+        /* Align destination to MMREG_SIZE -boundary */
+        register unsigned long int delta;
+
+        delta = ((unsigned long)to)&(MMREG_SIZE-1);
+        if( delta )
+        {
+            delta = MMREG_SIZE - delta;
+            len -= delta;
+            SMALL_MEMCPY(to, from, delta);
+        }
+
+        if( len & ~(MMREG_SIZE-1) )
+        {
+            vector_u8_t perm, ref0, ref1, tmp;
+
+            perm = vec_lvsl( 0, from );
+            ref0 = vec_ld( 0, from );
+            ref1 = vec_ld( 15, from );
+            from += 16;
+            len -= 16;
+            tmp = vec_perm( ref0, ref1, perm );
+            while( len & ~(MMREG_SIZE-1) )
+            {
+                ref0 = vec_ld( 0, from );
+                ref1 = vec_ld( 15, from );
+                from += 16;
+                len -= 16;
+                vec_st( tmp, 0, to );
+                tmp = vec_perm( ref0, ref1, perm );
+                to += 16;
+            }
+            vec_st( tmp, 0, to );
+            to += 16;
+        }
+    }
+
+    if( len )
+    {
+        SMALL_MEMCPY( to, from, len );
+    }
+
+    return retval;
+}
+
+#endif
+
+#if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
+
+/*
+ * The asm code is generated with:
+ *
+ * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S *      memcpyaltivec.c
+ *
+ * sed 's/.L/._L/g' memcpyaltivec.s |
+ * awk '{args=""; len=split ($2, arg, ",");
+ *      for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
+ *                               args = args sprintf ("%-6s", a) }
+ *      printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
+ * unexpand -a
+ */
+
+static void * fast_memcpy( void * _to, const void * _from, size_t len )
+{
+    asm ("                                              \n"
+    "    cmplwi        %cr0, %r5,  16        \n"
+    "    mr        %r9,  %r3        \n"
+    "    bc        4,    1,    ._L3    \n"
+    "    andi.        %r0,  %r3,  15        \n"
+    "    bc        12,   2,    ._L4    \n"
+    "    subfic        %r0,  %r0,  16        \n"
+    "    add        %r11, %r3,  %r0        \n"
+    "    cmplw        %cr0, %r3,  %r11    \n"
+    "    subf        %r5,  %r0,  %r5        \n"
+    "    bc        4,    0,    ._L4    \n"
+    "    ._L7:                    \n"
+    "    lbz        %r0,  0(%r4)        \n"
+    "    stb        %r0,  0(%r9)        \n"
+    "    addi        %r9,  %r9,  1        \n"
+    "    cmplw        %cr0, %r9,  %r11    \n"
+    "    addi        %r4,  %r4,  1        \n"
+    "    bc        12,   0,    ._L7    \n"
+    "    ._L4:                    \n"
+    "    rlwinm.        %r0,  %r5,  0,      0,    27    \n"
+    "    bc        12,   2,    ._L3    \n"
+    "    addi        %r5,  %r5,  -16        \n"
+    "    li        %r11, 15        \n"
+    "    lvsl        %v12, 0,    %r4        \n"
+    "    lvx        %v1,  0,    %r4        \n"
+    "    lvx        %v0,  %r11, %r4        \n"
+    "    rlwinm.        %r0,  %r5,  0,      0,    27    \n"
+    "    vperm        %v13, %v1,  %v0,  %v12    \n"
+    "    addi        %r4,  %r4,  16        \n"
+    "    bc        12,   2,    ._L11    \n"
+    "    ._L12:                    \n"
+    "    addi        %r5,  %r5,  -16        \n"
+    "    li        %r11, 15        \n"
+    "    lvx        %v1,  0,    %r4        \n"
+    "    lvx        %v0,  %r11, %r4        \n"
+    "    rlwinm.        %r0,  %r5,  0,      0,    27    \n"
+    "    stvx        %v13, 0,    %r9        \n"
+    "    vperm        %v13, %v1,  %v0,  %v12    \n"
+    "    addi        %r4,  %r4,  16        \n"
+    "    addi        %r9,  %r9,  16        \n"
+    "    bc        4,    2,    ._L12    \n"
+    "    ._L11:                    \n"
+    "    stvx        %v13, 0,    %r9        \n"
+    "    addi        %r9,  %r9,  16        \n"
+    "    ._L3:                    \n"
+    "    cmpwi        %cr0, %r5,  0        \n"
+    "    bclr        12,   2            \n"
+    "    add        %r5,  %r9,  %r5        \n"
+    "    cmplw        %cr0, %r9,  %r5        \n"
+    "    bclr        4,    0            \n"
+    "    ._L17:                    \n"
+    "    lbz        %r0,  0(%r4)        \n"
+    "    stb        %r0,  0(%r9)        \n"
+    "    addi        %r9,  %r9,  1        \n"
+    "    cmplw        %cr0, %r9,  %r5        \n"
+    "    addi        %r4,  %r4,  1        \n"
+    "    bc        12,   0,    ._L17    \n"
+        );
+}
+
+#endif