1 ;*****************************************************************************
2 ;* x86inc-64.asm: h264 encoder library
3 ;*****************************************************************************
4 ;* Copyright (C) 2005-2008 x264 project
6 ;* Authors: Andrew Dunstan
8 ;* This program is free software; you can redistribute it and/or modify
9 ;* it under the terms of the GNU General Public License as published by
10 ;* the Free Software Foundation; either version 2 of the License, or
11 ;* (at your option) any later version.
13 ;* This program is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;* GNU General Public License for more details.
18 ;* You should have received a copy of the GNU General Public License
19 ;* along with this program; if not, write to the Free Software
20 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
21 ;*****************************************************************************
25 ; FIXME: All of the 64bit asm functions that take a stride as an argument
26 ; via register, assume that the high dword of that register is filled with 0.
27 ; This is true in practice (since we never do any 64bit arithmetic on strides,
28 ; and x264's strides are all positive), but is not guaranteed by the ABI.
30 ; Name of the .rodata section. On OS X we cannot use .rodata because YASM
31 ; is unable to compute address offsets outside of .text so we use the .text
32 ; section instead until YASM is fixed.
33 %macro SECTION_RODATA 0
34 %ifidn __OUTPUT_FORMAT__,macho64
35 SECTION .text align=16
37 SECTION .rodata align=16
67 %define parm5q [rsp+40]
68 %define parm6q [rsp+48]
69 %define parm7q [rsp+56]
70 %define parm8q [rsp+64]
75 %define parm5d dword parm5q
76 %define parm6d dword parm6q
77 %define parm7d dword parm7q
78 %define parm8d dword parm8q
90 %define unwindcode(count, code) .unwind %+ count EQU code
102 %define regcoder10 10
103 %define regcoder11 11
104 %define regcoder12 12
105 %define regcoder13 13
106 %define regcoder14 14
107 %define regcoder15 15
108 %define regcodexmm0 0
109 %define regcodexmm1 1
110 %define regcodexmm2 2
111 %define regcodexmm3 3
112 %define regcodexmm4 4
113 %define regcodexmm5 5
114 %define regcodexmm6 6
115 %define regcodexmm7 7
116 %define regcodexmm8 8
117 %define regcodexmm9 9
118 %define regcodexmm10 10
119 %define regcodexmm11 11
120 %define regcodexmm12 12
121 %define regcodexmm13 13
122 %define regcodexmm14 14
123 %define regcodexmm15 15
127 %error Stack Allocation must be at least 8 bytes.
129 %assign unwindcount unwindcount+1
130 unwindcode(unwindcount, $-.startfunc + 0x200 + (((%1-8)/8)<<12))
132 %assign unwindcount unwindcount+1
133 unwindcode(unwindcount, %1/8)
134 %assign unwindcount unwindcount+1
135 unwindcode(unwindcount, $-.startfunc + 0x100)
137 %assign unwindcount unwindcount+1
138 unwindcode(unwindcount, %1>>16)
139 %assign unwindcount unwindcount+1
140 unwindcode(unwindcount, %1 & 0x0000FFFF)
141 %assign unwindcount unwindcount+1
142 unwindcode(unwindcount, $-.startfunc + 0x1100)
147 %assign unwindcount unwindcount+1
148 unwindcode(unwindcount, $-.startfunc + 0 + (regcode%1 << 12))
152 %if ((%2 % 16) | (%2 > 240) | (%2 < 0))
153 %error Frame offset must be a multiple of 16 between 0 and 240.
155 %assign unwindcount unwindcount+1
156 unwindcode(unwindcount, $-.startfunc + (3 << 8 )+ (regcode%1 << 12))
157 %assign framereg regcode%1 + %2
161 %if ((%2 % 8) | (%2 < 0))
162 %error Offset must be a positive multiple of 8.
165 %assign unwindcount unwindcount+1
166 unwindcode(unwindcount, %2/8)
167 %assign unwindcount unwindcount +1
168 unwindcode(unwindcount, $-.startfunc + (4 << 8) + (regcode%1 << 12))
170 %assign unwindcount unwindcount+1
171 unwindcode(unwindcount, %2 >> 16)
172 %assign unwindcount unwindcount+1
173 unwindcode(unwindcount, %2 & 0x0000FFFF)
174 %assign unwindcount unwindcount+1
175 unwindcode(unwindcount, $-.startfunc + (5 << 8) + (regcode%1 << 12))
180 %if ((%2 % 16) | (%2 < 0))
181 %error Offset must be a positive multiple of 16.
184 %assign unwindcount unwindcount+1
185 unwindcode(unwindcount, %2/16)
186 %assign unwindcount unwindcount +1
187 unwindcode(unwindcount, $-.startfunc + (8 << 8) + (regcode%1 << 12))
189 %assign unwindcount unwindcount+1
190 unwindcode(unwindcount, %2 >> 16)
191 %assign unwindcount unwindcount+1
192 unwindcode(unwindcount, %2 & 0x0000FFFF)
193 %assign unwindcount unwindcount+1
194 unwindcode(unwindcount, $-.startfunc + (9 << 8) + (regcode%1 << 12))
203 db .endprolog-.startfunc
207 dw .unwind %+ unwindcount
208 %assign unwindcount unwindcount-1
230 %define parm7q [rsp+8]
231 %define parm8q [rsp+16]
238 %define parm7d dword parm7q
239 %define parm8d dword parm8q
270 ; PIC support macros. On x86_64 we just use RIP-relative addressing, which is
271 ; much simpler than the GOT handling we need to perform on x86.
273 ; - GLOBAL should be used as a suffix for global addressing, eg.
274 ; mov eax, [foo GLOBAL]
279 %define GLOBAL wrt rip