x86: don't use the red zone on win64

author Anton Mitrofanov <BugMaster@narod.ru>

Mon, 25 Feb 2013 15:28:19 +0000 (19:28 +0400)

committer Fiona Glaser <fiona@x264.com>

Mon, 25 Feb 2013 20:14:30 +0000 (12:14 -0800)
author Anton Mitrofanov <BugMaster@narod.ru>
Mon, 25 Feb 2013 15:28:19 +0000 (19:28 +0400)
committer Fiona Glaser <fiona@x264.com>
Mon, 25 Feb 2013 20:14:30 +0000 (12:14 -0800)
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm

index 9104db26b9b58a0d1f7bcf096dc933e9568e8eae..ea27158b4b71866afbf5fba9c586f23ad852f439 100644 (file)
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -165,6 +165,9 @@ cglobal safe_intel_cpu_indicator_init
  %endif
      push rbp
      mov  rbp, rsp
+%if WIN64
+    sub  rsp, 32 ; shadow space
+%endif
      and  rsp, ~15
      call intel_cpu_indicator_init
      leave
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm

index 4e39064634475aacb0066d904a33557c1a004705..9692621a411e9a8ffaff65a03fce2ecdc5fc266c 100644 (file)
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1429,7 +1429,11 @@ DEBLOCK_LUMA v, 16
      %define t5 m11
      %define mask0 m12
      %define mask1p m13
+%if WIN64
+    %define mask1q [rsp]
+%else
      %define mask1q [rsp-24]
+%endif
      %define mpb_0 m14
      %define mpb_1 m15
  %else
@@ -1448,7 +1452,7 @@ DEBLOCK_LUMA v, 16
  ;-----------------------------------------------------------------------------
  ; void deblock_v_luma_intra( uint8_t *pix, intptr_t stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
-cglobal deblock_%1_luma_intra, 4,6,16,ARCH_X86_64*0x50-0x50
+cglobal deblock_%1_luma_intra, 4,6,16,0-(1-ARCH_X86_64)*0x50-WIN64*0x10
      lea     r4, [r1*4]
      lea     r5, [r1*3] ; 3*stride
      dec     r2d        ; alpha-1
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm

index 8161389f4c86f22ab81aa27d75c3430ba0473f29..9b46680b45a9eeaf2b38e31208e2f18ab667ed71 100644 (file)
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -1514,7 +1514,11 @@ ALIGN 4
      mov        t0, r0
      mov        t1, r1
      mov        t2, r3
+%if WIN64
+    %define multy0 r4m
+%else
      %define multy0 [rsp-8]
+%endif
      mova    multy0, m5
  %else
      mov       r3m, r3
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm

index 75c3b9137db0a0d9e5aa17762f724a4dd5304f1c..e71017ac41709940769a5fd41ca64a01b5015ab9 100644 (file)
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1913,15 +1913,16 @@ cglobal hadamard_load
  ; void intra_satd_x3_4x4( uint8_t *fenc, uint8_t *fdec, int *res )
  ;-----------------------------------------------------------------------------
  cglobal intra_satd_x3_4x4, 3,3
-%if ARCH_X86_64
+%if UNIX64
      ; stack is 16 byte aligned because abi says so
      %define  top_1d  rsp-8  ; size 8
      %define  left_1d rsp-16 ; size 8
  %else
-    ; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
-    SUB         esp, 16
-    %define  top_1d  esp+8
-    %define  left_1d esp
+    ; WIN64:  stack is 16 byte aligned because abi says so
+    ; X86_32: stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
+    SUB         rsp, 16
+    %define  top_1d  rsp+8
+    %define  left_1d rsp
  %endif
  
      call hadamard_load
@@ -1943,8 +1944,8 @@ cglobal intra_satd_x3_4x4, 3,3
      movd        [r2+0], m0 ; i4x4_v satd
      movd        [r2+4], m4 ; i4x4_h satd
      movd        [r2+8], m5 ; i4x4_dc satd
-%if ARCH_X86_64 == 0
-    ADD         esp, 16
+%if UNIX64 == 0
+    ADD         rsp, 16
  %endif
      RET
author	Anton Mitrofanov <BugMaster@narod.ru>
	Mon, 25 Feb 2013 15:28:19 +0000 (19:28 +0400)
committer	Fiona Glaser <fiona@x264.com>
	Mon, 25 Feb 2013 20:14:30 +0000 (12:14 -0800)
common/x86/cpu-a.asm		patch \| blob \| history
common/x86/deblock-a.asm		patch \| blob \| history
common/x86/mc-a.asm		patch \| blob \| history
common/x86/pixel-a.asm		patch \| blob \| history