git.sesse.net Git - x264/blob - common/x86/x86inc-64.asm

   1 ;*****************************************************************************
   2 ;* x86inc-64.asm: h264 encoder library
   3 ;*****************************************************************************
   4 ;* Copyright (C) 2005-2008 x264 project
   5 ;*
   6 ;* Authors: Andrew Dunstan
   7 ;*
   8 ;* This program is free software; you can redistribute it and/or modify
   9 ;* it under the terms of the GNU General Public License as published by
  10 ;* the Free Software Foundation; either version 2 of the License, or
  11 ;* (at your option) any later version.
  12 ;*
  13 ;* This program is distributed in the hope that it will be useful,
  14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 ;* GNU General Public License for more details.
  17 ;*
  18 ;* You should have received a copy of the GNU General Public License
  19 ;* along with this program; if not, write to the Free Software
  20 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  21 ;*****************************************************************************
  22
  23 BITS 64
  24
  25 ; FIXME: All of the 64bit asm functions that take a stride as an argument
  26 ; via register, assume that the high dword of that register is filled with 0.
  27 ; This is true in practice (since we never do any 64bit arithmetic on strides,
  28 ; and x264's strides are all positive), but is not guaranteed by the ABI.
  29
  30 ; Name of the .rodata section. On OS X we cannot use .rodata because YASM
  31 ; is unable to compute address offsets outside of .text so we use the .text
  32 ; section instead until YASM is fixed.
  33 %macro SECTION_RODATA 0
  34     %ifidn __OUTPUT_FORMAT__,macho64
  35       SECTION .text align=16
  36     %else
  37       SECTION .rodata align=16
  38     %endif
  39 %endmacro
  40
  41 %macro pad 1
  42     %undef %1
  43     %ifdef PREFIX
  44         %define %1 _%1
  45     %endif
  46     %ifdef WIN64
  47         times 6 nop
  48         align 16
  49         %1
  50         .startfunc
  51         %assign unwindcount 0
  52         %assign framereg 0
  53     %else
  54         align 16
  55         %1
  56     %endif
  57 %endmacro
  58
  59 %ifdef WIN64
  60
  61 %define __PIC__
  62
  63 %define parm1q rcx
  64 %define parm2q rdx
  65 %define parm3q r8
  66 %define parm4q r9
  67 %define parm5q [rsp+40]
  68 %define parm6q [rsp+48]
  69 %define parm7q [rsp+56]
  70 %define parm8q [rsp+64]
  71 %define parm1d ecx
  72 %define parm2d edx
  73 %define parm3d r8d
  74 %define parm4d r9d
  75 %define parm5d dword parm5q
  76 %define parm6d dword parm6q
  77 %define parm7d dword parm7q
  78 %define parm8d dword parm8q
  79
  80 %define temp1q rdi
  81 %define temp2q rsi
  82 %define temp1d edi
  83 %define temp2d esi
  84
  85 %macro firstpush 1
  86     db 0x48
  87     push %1
  88 %endmacro
  89
  90 %define unwindcode(count, code) .unwind %+ count EQU code
  91
  92 %define regcoderax 0
  93 %define regcodercx 1
  94 %define regcoderdx 2
  95 %define regcoderbx 3
  96 %define regcodersp 4
  97 %define regcoderbp 5
  98 %define regcodersi 6
  99 %define regcoderdi 7
 100 %define regcoder8 8
 101 %define regcoder9 9
 102 %define regcoder10 10
 103 %define regcoder11 11
 104 %define regcoder12 12
 105 %define regcoder13 13
 106 %define regcoder14 14
 107 %define regcoder15 15
 108 %define regcodexmm0 0
 109 %define regcodexmm1 1
 110 %define regcodexmm2 2
 111 %define regcodexmm3 3
 112 %define regcodexmm4 4
 113 %define regcodexmm5 5
 114 %define regcodexmm6 6
 115 %define regcodexmm7 7
 116 %define regcodexmm8 8
 117 %define regcodexmm9 9
 118 %define regcodexmm10 10
 119 %define regcodexmm11 11
 120 %define regcodexmm12 12
 121 %define regcodexmm13 13
 122 %define regcodexmm14 14
 123 %define regcodexmm15 15
 124
 125 %macro allocstack 1
 126     %if %1 < 8
 127         %error Stack Allocation must be at least 8 bytes.
 128     %elif %1 < 129
 129         %assign unwindcount unwindcount+1
 130         unwindcode(unwindcount, $-.startfunc + 0x200 + (((%1-8)/8)<<12))
 131     %elif %1 < 524288
 132         %assign unwindcount unwindcount+1
 133         unwindcode(unwindcount, %1/8)
 134         %assign unwindcount unwindcount+1
 135         unwindcode(unwindcount, $-.startfunc + 0x100)
 136     %else
 137         %assign unwindcount unwindcount+1
 138         unwindcode(unwindcount, %1>>16)
 139         %assign unwindcount unwindcount+1
 140         unwindcode(unwindcount, %1 & 0x0000FFFF)
 141         %assign unwindcount unwindcount+1
 142         unwindcode(unwindcount, $-.startfunc + 0x1100)
 143     %endif
 144 %endmacro
 145
 146 %macro pushreg 1
 147     %assign unwindcount unwindcount+1
 148     unwindcode(unwindcount, $-.startfunc + 0 + (regcode%1 << 12))
 149 %endmacro
 150
 151 %macro setframe 2
 152     %if ((%2 % 16) | (%2 > 240) | (%2 < 0))
 153         %error Frame offset must be a multiple of 16 between 0 and 240.
 154     %endif
 155     %assign unwindcount unwindcount+1
 156     unwindcode(unwindcount, $-.startfunc + (3 << 8 )+ (regcode%1 << 12))
 157     %assign framereg regcode%1 + %2
 158 %endmacro
 159
 160 %macro savereg 2
 161     %if ((%2 % 8) | (%2 < 0))
 162         %error Offset must be a positive multiple of 8.
 163     %endif
 164     %if (%2 < 64504)
 165         %assign unwindcount unwindcount+1
 166         unwindcode(unwindcount, %2/8)
 167         %assign unwindcount unwindcount +1
 168         unwindcode(unwindcount, $-.startfunc + (4 << 8) + (regcode%1 << 12))
 169     %else
 170         %assign unwindcount unwindcount+1
 171         unwindcode(unwindcount, %2 >> 16)
 172         %assign unwindcount unwindcount+1
 173         unwindcode(unwindcount, %2 & 0x0000FFFF)
 174         %assign unwindcount unwindcount+1
 175         unwindcode(unwindcount, $-.startfunc + (5 << 8) + (regcode%1 << 12))
 176     %endif
 177 %endmacro
 178
 179 %macro savexmm128 2
 180     %if ((%2 % 16) | (%2 < 0))
 181         %error Offset must be a positive multiple of 16.
 182     %endif
 183     %if (%2 < 64512)
 184         %assign unwindcount unwindcount+1
 185         unwindcode(unwindcount, %2/16)
 186         %assign unwindcount unwindcount +1
 187         unwindcode(unwindcount, $-.startfunc + (8 << 8) + (regcode%1 << 12))
 188     %else
 189         %assign unwindcount unwindcount+1
 190         unwindcode(unwindcount, %2 >> 16)
 191         %assign unwindcount unwindcount+1
 192         unwindcode(unwindcount, %2 & 0x0000FFFF)
 193         %assign unwindcount unwindcount+1
 194         unwindcode(unwindcount, $-.startfunc + (9 << 8) + (regcode%1 << 12))
 195     %endif
 196 %endmacro
 197
 198 %macro endprolog 0
 199 .endprolog:
 200 SECTION .xdata
 201 .unwindinfo:
 202     db 0x01
 203     db .endprolog-.startfunc
 204     db unwindcount
 205     db framereg
 206     %rep unwindcount
 207         dw .unwind %+ unwindcount
 208         %assign unwindcount unwindcount-1
 209     %endrep
 210 align 4,db 0
 211 SECTION .text
 212 %endmacro
 213
 214 %macro endfunc 0
 215 .endfunc:
 216 SECTION .pdata
 217     dd .startfunc
 218     dd .endfunc
 219     dd .unwindinfo
 220 SECTION .text
 221 %endmacro
 222
 223 %else ;linux
 224 %define parm1q rdi
 225 %define parm2q rsi
 226 %define parm3q rdx
 227 %define parm4q rcx
 228 %define parm5q r8
 229 %define parm6q r9
 230 %define parm7q [rsp+8]
 231 %define parm8q [rsp+16]
 232 %define parm1d edi
 233 %define parm2d esi
 234 %define parm3d edx
 235 %define parm4d ecx
 236 %define parm5d r8d
 237 %define parm6d r9d
 238 %define parm7d dword parm7q
 239 %define parm8d dword parm8q
 240
 241 %define temp1q r9
 242 %define temp2q r8
 243 %define temp1d r9d
 244 %define temp2d r8d
 245
 246 %macro allocstack 1
 247 %endmacro
 248
 249 %macro firstpush 1
 250     push %1
 251 %endmacro
 252
 253 %macro pushreg 1
 254 %endmacro
 255
 256 %macro setframe 2
 257 %endmacro
 258
 259 %macro savereg 2
 260 %endmacro
 261
 262 %macro savexmm128 2
 263 %endmacro
 264
 265 %define endprolog
 266 %define endfunc
 267
 268 %endif ;linux
 269
 270 ; PIC support macros. On x86_64 we just use RIP-relative addressing, which is
 271 ; much simpler than the GOT handling we need to perform on x86.
 272 ;
 273 ; - GLOBAL should be used as a suffix for global addressing, eg.
 274 ;     mov eax, [foo GLOBAL]
 275 ;   instead of
 276 ;     mov eax, [foo]
 277 ;
 278 %ifdef __PIC__
 279     %define GLOBAL wrt rip
 280     %define PIC64
 281 %else
 282     %define GLOBAL
 283 %endif
 284
 285 %macro picgetgot 1
 286 %endmacro