1 ;*****************************************************************************
3 ;*****************************************************************************
4 ;* Copyright (C) 2008 Loren Merritt <lorenm@u.washington.edu>
6 ;* This program is free software; you can redistribute it and/or modify
7 ;* it under the terms of the GNU General Public License as published by
8 ;* the Free Software Foundation; either version 2 of the License, or
9 ;* (at your option) any later version.
11 ;* This program is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;* GNU General Public License for more details.
16 ;* You should have received a copy of the GNU General Public License
17 ;* along with this program; if not, write to the Free Software
18 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
19 ;*****************************************************************************
26 %include "x86inc-64.asm"
28 %include "x86inc-32.asm"
31 ; Macros to eliminate most code duplication between x86_32 and x86_64:
32 ; Currently this works only for leaf functions which load all their arguments
33 ; into registers at the start, and make no other use of the stack. Luckily that
34 ; covers most of x264's asm.
37 ; %1 = number of arguments. loads them from stack if needed.
38 ; %2 = number of registers used, not including PIC. pushes callee-saved regs if needed.
39 ; %3 = whether global constants are used in this function. inits x86_32 PIC if needed.
40 ; PROLOGUE can also be invoked by adding the same options to cglobal
42 ; TODO Some functions can use some args directly from the stack. If they're the
43 ; last args then you can just not declare them, but if they're in the middle
44 ; we need more flexible macro.
47 ; Pops anything that was pushed by PROLOGUE
50 ; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
51 ; which are slow when a normal ret follows a branch.
57 ; no r%1b, because some regs don't have a byte form, and anyway x264 doesn't need it
62 %macro DECLARE_REG_SIZE 1
90 %assign stack_offset stack_offset+push_size
95 %assign stack_offset stack_offset-push_size
101 %assign stack_offset stack_offset+(%2)
108 %assign stack_offset stack_offset-(%2)
118 %macro movsxdifnidn 2
130 %ifdef WIN64 ;================================================================
132 DECLARE_REG 0, rcx, ecx, cx, ecx
133 DECLARE_REG 1, rdx, edx, dx, edx
134 DECLARE_REG 2, r8, r8d, r8w, r8d
135 DECLARE_REG 3, r9, r9d, r9w, r9d
136 DECLARE_REG 4, rdi, edi, di, [rsp + stack_offset + 40]
137 DECLARE_REG 5, rsi, esi, si, [rsp + stack_offset + 48]
138 DECLARE_REG 6, rax, eax, ax, [rsp + stack_offset + 56]
139 %define r7m [rsp + stack_offset + 64]
141 %macro LOAD_IF_USED 2 ; reg_id, number_of_args
143 mov r%1, [rsp + 8 + %1*8]
150 %assign stack_offset 0
164 %elifdef ARCH_X86_64 ;========================================================
166 DECLARE_REG 0, rdi, edi, di, edi
167 DECLARE_REG 1, rsi, esi, si, esi
168 DECLARE_REG 2, rdx, edx, dx, edx
169 DECLARE_REG 3, rcx, ecx, cx, ecx
170 DECLARE_REG 4, r8, r8d, r8w, r8d
171 DECLARE_REG 5, r9, r9d, r9w, r9d
172 DECLARE_REG 6, rax, eax, ax, [rsp + stack_offset + 8]
173 %define r7m [rsp + stack_offset + 16]
175 %macro LOAD_IF_USED 2 ; reg_id, number_of_args
177 mov r%1, [rsp - 40 + %1*8]
184 %assign stack_offset 0
196 %else ; X86_32 ;==============================================================
198 DECLARE_REG 0, eax, eax, ax, [esp + stack_offset + 4]
199 DECLARE_REG 1, ecx, ecx, cx, [esp + stack_offset + 8]
200 DECLARE_REG 2, edx, edx, dx, [esp + stack_offset + 12]
201 DECLARE_REG 3, ebx, ebx, bx, [esp + stack_offset + 16]
202 DECLARE_REG 4, esi, esi, si, [esp + stack_offset + 20]
203 DECLARE_REG 5, edi, edi, di, [esp + stack_offset + 24]
204 DECLARE_REG 6, ebp, ebp, bp, [esp + stack_offset + 28]
205 %define r7m [esp + stack_offset + 32]
208 %macro PUSH_IF_USED 1 ; reg_id
211 %assign stack_offset stack_offset+4
215 %macro POP_IF_USED 1 ; reg_id
221 %macro LOAD_IF_USED 2 ; reg_id, number_of_args
223 mov r%1, [esp + stack_offset + 4 + %1*4]
229 %assign stack_offset 0
233 %assign regs_used regs_used+1
236 ASSERT regs_used <= 7
269 %endif ;======================================================================
273 ;=============================================================================
274 ; arch-independent part
275 ;=============================================================================
277 %assign function_align 16
279 ; Symbol prefix for C linkage
281 %ifidn __OUTPUT_FORMAT__,elf
283 global _%1:function hidden
286 global %1:function hidden
322 ; This is needed for ELF, otherwise the GNU linker assumes the stack is
323 ; executable by default.
324 %ifidn __OUTPUT_FORMAT__,elf
325 SECTION ".note.GNU-stack" noalloc noexec nowrite progbits
328 %assign FENC_STRIDE 16
329 %assign FDEC_STRIDE 32