git.sesse.net Git - x264/blob - tools/checkasm-arm.S

   1 /****************************************************************************
   2  * checkasm-arm.S: assembly check tool
   3  *****************************************************************************
   4  * Copyright (C) 2015 x264 project
   5  *
   6  * Authors: Martin Storsjo <martin@martin.st>
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  21  *
  22  * This program is also available under a commercial proprietary license.
  23  * For more information, contact us at licensing@x264.com.
  24  *****************************************************************************/
  25
  26 #include "../common/arm/asm.S"
  27
  28 .section .rodata
  29 .align 4
  30 register_init:
  31 .quad 0x21f86d66c8ca00ce
  32 .quad 0x75b6ba21077c48ad
  33 .quad 0xed56bb2dcb3c7736
  34 .quad 0x8bda43d3fd1a7e06
  35 .quad 0xb64a9c9e5d318408
  36 .quad 0xdf9a54b303f1d3a3
  37 .quad 0x4a75479abd64e097
  38 .quad 0x249214109d5d1c88
  39
  40 error_message:
  41 .asciz "failed to preserve register"
  42
  43 .text
  44
  45 @ max number of args used by any x264 asm function.
  46 #define MAX_ARGS 15
  47
  48 #define ARG_STACK 4*(MAX_ARGS - 2)
  49
  50 .macro clobbercheck variant
  51 .equ pushed, 4*10
  52 function x264_checkasm_call_\variant
  53     push        {r4-r11, lr}
  54 .ifc \variant, neon
  55     vpush       {q4-q7}
  56 .equ pushed, pushed + 16*4
  57 .endif
  58
  59     movrel      r12, register_init
  60 .ifc \variant, neon
  61     vldm        r12, {q4-q7}
  62 .endif
  63     ldm         r12, {r4-r11}
  64
  65     push        {r1}
  66
  67     sub         sp,  sp,  #ARG_STACK
  68 .equ pos, 0
  69 .rept MAX_ARGS-2
  70     ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
  71     str         r12, [sp, #pos]
  72 .equ pos, pos + 4
  73 .endr
  74
  75     mov         r12, r0
  76     mov         r0,  r2
  77     mov         r1,  r3
  78     ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
  79     blx         r12
  80     add         sp,  sp,  #ARG_STACK
  81     pop         {r2}
  82
  83     push        {r0, r1}
  84     movrel      r12, register_init
  85 .ifc \variant, neon
  86     vldm        r12, {q0-q3}
  87     veor        q0,  q0,  q4
  88     veor        q1,  q1,  q5
  89     veor        q2,  q2,  q6
  90     veor        q3,  q3,  q7
  91     vorr        q0,  q0,  q1
  92     vorr        q0,  q0,  q2
  93     vorr        q0,  q0,  q3
  94     vorr        d0,  d0,  d1
  95     vrev64.32   d1,  d0
  96     vorr        d0,  d0,  d1
  97     vmov.32     r3,  d0[0]
  98 .else
  99     mov         r3,  #0
 100 .endif
 101
 102 .macro check_reg reg1, reg2
 103     ldrd        r0,  r1,  [r12], #8
 104     eor         r0,  r0, \reg1
 105     eor         r1,  r1, \reg2
 106     orr         r3,  r3, r0
 107     orr         r3,  r3, r1
 108 .endm
 109     check_reg   r4,  r5
 110     check_reg   r6,  r7
 111     check_reg   r8,  r9
 112     check_reg   r10, r11
 113 .purgem check_reg
 114
 115     cmp         r3,  #0
 116     beq         0f
 117
 118     mov         r12, #0
 119     str         r12, [r2]
 120     movrel      r0, error_message
 121     bl          puts
 122 0:
 123     pop         {r0, r1}
 124 .ifc \variant, neon
 125     vpop        {q4-q7}
 126 .endif
 127     pop         {r4-r11, pc}
 128 endfunc
 129 .endm
 130
 131 clobbercheck neon
 132 clobbercheck noneon