From 59683a97b50b34c6282457a959bb6b3e9e7f8c0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Storsj=C3=B6?= Date: Tue, 25 Aug 2015 14:38:20 +0300 Subject: [PATCH] checkasm: aarch64: Check register clobbering Disable this on iOS, since it has got a slightly different ABI for vararg parameters. --- Makefile | 1 + tools/checkasm-aarch64.S | 156 +++++++++++++++++++++++++++++++++++++++ tools/checkasm.c | 6 +- 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 tools/checkasm-aarch64.S diff --git a/Makefile b/Makefile index 4403a11b..4feef339 100644 --- a/Makefile +++ b/Makefile @@ -140,6 +140,7 @@ SRCS += common/aarch64/asm-offsets.c \ common/aarch64/mc-c.c \ common/aarch64/predict-c.c OBJASM = $(ASMSRC:%.S=%.o) +OBJCHK += tools/checkasm-aarch64.o endif endif diff --git a/tools/checkasm-aarch64.S b/tools/checkasm-aarch64.S new file mode 100644 index 00000000..515c7270 --- /dev/null +++ b/tools/checkasm-aarch64.S @@ -0,0 +1,156 @@ +/**************************************************************************** + * checkasm-aarch64.S: assembly check tool + ***************************************************************************** + * Copyright (C) 2015 x264 project + * + * Authors: Martin Storsjo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#include "../common/aarch64/asm.S" + +.section .rodata +.align 4 +register_init: +.quad 0x21f86d66c8ca00ce +.quad 0x75b6ba21077c48ad +.quad 0xed56bb2dcb3c7736 +.quad 0x8bda43d3fd1a7e06 +.quad 0xb64a9c9e5d318408 +.quad 0xdf9a54b303f1d3a3 +.quad 0x4a75479abd64e097 +.quad 0x249214109d5d1c88 +.quad 0x1a1b2550a612b48c +.quad 0x79445c159ce79064 +.quad 0x2eed899d5a28ddcd +.quad 0x86b2536fcd8cf636 +.quad 0xb0856806085e7943 +.quad 0x3f2bf84fc0fcca4e +.quad 0xacbd382dcf5b8de2 +.quad 0xd229e1f5b281303f +.quad 0x71aeaff20b095fd9 +.quad 0xab63e2e11fa38ed9 + + +error_message: +.asciz "failed to preserve register" + +.text + +// max number of args used by any x264 asm function. +#define MAX_ARGS 15 + +#define ARG_STACK ((8*(MAX_ARGS - 6) + 15) & ~15) + +function x264_checkasm_call, export=1 + stp x29, x30, [sp, #-16]! + mov x29, sp + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp d8, d9, [sp, #-16]! + stp d10, d11, [sp, #-16]! + stp d12, d13, [sp, #-16]! + stp d14, d15, [sp, #-16]! + + movrel x9, register_init + ldp d8, d9, [x9], #16 + ldp d10, d11, [x9], #16 + ldp d12, d13, [x9], #16 + ldp d14, d15, [x9], #16 + ldp x19, x20, [x9], #16 + ldp x21, x22, [x9], #16 + ldp x23, x24, [x9], #16 + ldp x25, x26, [x9], #16 + ldp x27, x28, [x9], #16 + + str x1, [sp, #-16]! + + sub sp, sp, #ARG_STACK +.equ pos, 0 +// first two stacked args are copied to x6, x7 +.rept MAX_ARGS-6 + ldr x9, [x29, #16 + 16 + pos] + str x9, [sp, #pos] +.equ pos, pos + 8 +.endr + + mov x12, x0 + mov x0, x2 + mov x1, x3 + mov x2, x4 + mov x3, x5 + mov x4, x6 + mov x5, x7 + ldp x6, x7, [x29, #16] + blr x12 + add sp, sp, #ARG_STACK + ldr x2, [sp] + stp x0, x1, [sp] + movrel x9, register_init + movi v3.8h, #0 + +.macro check_reg_neon reg1, reg2 + ldr q0, [x9], #16 + uzp1 v1.2d, v\reg1\().2d, v\reg2\().2d + eor v0.16b, v0.16b, v1.16b + orr v3.16b, v3.16b, v0.16b +.endm + check_reg_neon 8, 9 + check_reg_neon 10, 11 + check_reg_neon 12, 13 + check_reg_neon 14, 15 + uqxtn v3.8b, v3.8h + umov x3, v3.d[0] + +.macro check_reg reg1, reg2 + ldp x0, x1, [x9], #16 + eor x0, x0, \reg1 + eor x1, x1, \reg2 + orr x3, x3, x0 + orr x3, x3, x1 +.endm + check_reg x19, x20 + check_reg x21, x22 + check_reg x23, x24 + check_reg x25, x26 + check_reg x27, x28 + + cbz x3, 0f + + mov w9, #0 + str w9, [x2] + movrel x0, error_message + bl puts +0: + ldp x0, x1, [sp], #16 + ldp d14, d15, [sp], #16 + ldp d12, d13, [sp], #16 + ldp d10, d11, [sp], #16 + ldp d8, d9, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x29, x30, [sp], #16 + ret +endfunc diff --git a/tools/checkasm.c b/tools/checkasm.c index f4971dfd..183cef5f 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -227,6 +227,10 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ); #define x264_stack_pagealign( func, align ) func() #endif +#if ARCH_AARCH64 +intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ); +#endif + #define call_c1(func,...) func(__VA_ARGS__) #if ARCH_X86_64 @@ -244,7 +248,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... ); uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_X86 +#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__)) #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1 call_c1 -- 2.39.5