Change %ifdef directives to %if directives in *.asm files

author Ronald S. Bultje <rsbultje@gmail.com>

Wed, 25 Jan 2012 05:53:59 +0000 (13:53 +0800)

committer Fiona Glaser <fiona@x264.com>

Sat, 4 Feb 2012 15:18:19 +0000 (07:18 -0800)
author Ronald S. Bultje <rsbultje@gmail.com>
Wed, 25 Jan 2012 05:53:59 +0000 (13:53 +0800)
committer Fiona Glaser <fiona@x264.com>
Sat, 4 Feb 2012 15:18:19 +0000 (07:18 -0800)
diff --git a/Makefile b/Makefile

index ca2daae0c4991718a4dbf1f71362137e3cd16fd9..c735a88c8909f7af70a6660472292d93138f3cf1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -87,12 +87,13 @@ X86SRC = $(X86SRC0:%=common/x86/%)
  ifeq ($(ARCH),X86)
  ARCH_X86 = yes
  ASMSRC   = $(X86SRC) common/x86/pixel-32.asm
+ASFLAGS += -DARCH_X86_64=0
  endif
  
  ifeq ($(ARCH),X86_64)
  ARCH_X86 = yes
  ASMSRC   = $(X86SRC:-32.asm=-64.asm) common/x86/trellis-64.asm
-ASFLAGS += -DARCH_X86_64
+ASFLAGS += -DARCH_X86_64=1
  endif
  
  ifdef ARCH_X86
diff --git a/common/x86/cabac-a.asm b/common/x86/cabac-a.asm

index ac42a83c7ed7ead0e378ae9db13fb22f42a6dca6..737a37f8cd49e7cce0270f9ddb8306b6dc44dec9 100644 (file)
--- a/common/x86/cabac-a.asm
+++ b/common/x86/cabac-a.asm
@@ -34,10 +34,10 @@ cextern cabac_transition
  cextern cabac_renorm_shift
  
  ; t3 must be ecx, since it's used for shift.
-%ifdef WIN64
+%if WIN64
      DECLARE_REG_TMP 3,1,2,0,6,5,4,2
      %define pointer resq
-%elifdef ARCH_X86_64
+%elif ARCH_X86_64
      DECLARE_REG_TMP 0,1,2,3,4,5,6,6
      %define pointer resq
  %else
@@ -81,7 +81,7 @@ cglobal cabac_encode_decision_asm, 0,7
      and   t4d, t6d
      shr   t5d, 6
      movifnidn t2d, r2m
-%ifdef WIN64
+%if WIN64
      PUSH r7
  %endif
      LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2
@@ -98,7 +98,7 @@ cglobal cabac_encode_decision_asm, 0,7
      mov   t4d, t3d
      shr   t3d, 3
      LOAD_GLOBAL t3d, cabac_renorm_shift, 0, t3
-%ifdef WIN64
+%if WIN64
      POP r7
  %endif
      shl   t4d, t3b
@@ -119,7 +119,7 @@ cglobal cabac_encode_bypass_asm, 0,3
      lea       t7d, [t7*2+t3]
      mov       t3d, [t0+cb.queue]
      inc       t3d
-%ifdef UNIX64 ; .putbyte compiles to nothing but a jmp
+%if UNIX64 ; .putbyte compiles to nothing but a jmp
      jge cabac_putbyte
  %else
      jge .putbyte
@@ -153,7 +153,7 @@ cglobal cabac_encode_terminal_asm, 0,3
  
  cabac_putbyte:
      ; alive: t0=cb t3=queue t6=low
-%ifdef WIN64
+%if WIN64
      DECLARE_REG_TMP 3,6,1,0,2,5,4
  %endif
      mov   t1d, -1
diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm

index 8ea7db16c065497da858978ad53a5b4763600240..c1ea6b8a30aaf826a45c1eba1e015eaa58b186ce 100644 (file)
--- a/common/x86/cpu-a.asm
+++ b/common/x86/cpu-a.asm
@@ -65,7 +65,7 @@ cglobal cpu_xgetbv, 3,7
      mov [rsi], edx
      RET
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  
  ;-----------------------------------------------------------------------------
  ; int cpu_cpuid_test( void )
diff --git a/common/x86/dct-32.asm b/common/x86/dct-32.asm

index 3b3c3623d852184d32435b6596fb61e6ab816216..07c47ab2dbeaac25d13d97b8022838de5df461a4 100644 (file)
--- a/common/x86/dct-32.asm
+++ b/common/x86/dct-32.asm
@@ -157,7 +157,7 @@ cextern hsub_mul
      SWAP %4, %9, %8
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  
  %macro SUB8x8_DCT8 0
  cglobal sub8x8_dct8, 3,3,8
diff --git a/common/x86/dct-64.asm b/common/x86/dct-64.asm

index 174e58e9145ff01551e1aa87cb09bca2054acba0..78a2484161bff53c5377df67053a3595434c3ee2 100644 (file)
--- a/common/x86/dct-64.asm
+++ b/common/x86/dct-64.asm
@@ -137,11 +137,11 @@ cextern hsub_mul
      SWAP  %4, %9, %8
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  
  %macro SUB8x8_DCT8 0
  cglobal sub8x8_dct8, 3,3,14
-%ifdef WIN64
+%if WIN64
      call .skip_prologue
      RET
  %endif
@@ -194,7 +194,7 @@ SUB8x8_DCT8
  %macro ADD8x8_IDCT8 0
  cglobal add8x8_idct8, 2,2,16
      add r1, 128
-%ifdef WIN64
+%if WIN64
      call .skip_prologue
      RET
  %endif
@@ -260,7 +260,7 @@ cglobal sub8x8_dct, 3,3,10
  %if cpuflag(ssse3)
      mova m7, [hsub_mul]
  %endif
-%ifdef WIN64
+%if WIN64
      call .skip_prologue
      RET
  %endif
@@ -287,7 +287,7 @@ cglobal sub8x8_dct8, 3,3,11
  %if cpuflag(ssse3)
      mova m7, [hsub_mul]
  %endif
-%ifdef WIN64
+%if WIN64
      call .skip_prologue
      RET
  %endif
@@ -330,7 +330,7 @@ DCT_SUB8
  cglobal add8x8_idct8, 2,2,11
      add r0, 4*FDEC_STRIDE
      pxor m7, m7
-%ifdef WIN64
+%if WIN64
      call .skip_prologue
      RET
  %endif
@@ -369,7 +369,7 @@ ADD8x8_IDCT8
  cglobal add8x8_idct, 2,2,11
      add  r0, 4*FDEC_STRIDE
      pxor m7, m7
-%ifdef WIN64
+%if WIN64
      call .skip_prologue
      RET
  %endif
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm

index 36ea4ec7b0afe4a4bbbbc37b894142859f2f7a8a..4e5185d166747c9a2892bb9501896aee0e464e7a 100644 (file)
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -82,7 +82,7 @@ cextern pw_pmpmpmpm
      SWAP      %1, %3
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void dct4x4dc( dctcoef d[4][4] )
  ;-----------------------------------------------------------------------------
@@ -134,7 +134,7 @@ cglobal dct4x4dc, 1,1
      RET
  %endif ; HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void idct4x4dc( int32_t d[4][4] )
  ;-----------------------------------------------------------------------------
@@ -179,7 +179,7 @@ cglobal idct4x4dc, 1,1
      RET
  %endif ; HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void sub4x4_dct( dctcoef dct[4][4], pixel *pix1, pixel *pix2 )
  ;-----------------------------------------------------------------------------
@@ -236,7 +236,7 @@ INIT_MMX ssse3
  SUB_DCT4
  %endif ; HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void add4x4_idct( pixel *p_dst, dctcoef dct[4][4] )
  ;-----------------------------------------------------------------------------
@@ -357,7 +357,7 @@ INIT_MMX
  ;-----------------------------------------------------------------------------
  %macro SUB_NxN_DCT 7
  cglobal %1, 3,3,%7
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %if mmsize == 8
      pxor m7, m7
  %else
@@ -378,7 +378,7 @@ cglobal %1, 3,3,%7
      add  r0, %3
      add  r1, %4-%5-%6*FENC_STRIDE
      add  r2, %4-%5-%6*FDEC_STRIDE
-%ifdef WIN64
+%if WIN64
      call %2.skip_prologue
      RET
  %else
@@ -390,7 +390,7 @@ cglobal %1, 3,3,%7
  ; void add8x8_idct( uint8_t *pix, int16_t dct[4][4][4] )
  ;-----------------------------------------------------------------------------
  %macro ADD_NxN_IDCT 6-7
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  cglobal %1, 2,2,%7
  %if %3==256
      add r1, 128
@@ -412,7 +412,7 @@ cglobal %1, 2,2,11
      call %2.skip_prologue
      add  r0, %4-%5-%6*FDEC_STRIDE
      add  r1, %3
-%ifdef WIN64
+%if WIN64
      call %2.skip_prologue
      RET
  %else
@@ -420,7 +420,7 @@ cglobal %1, 2,2,11
  %endif
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX
  SUB_NxN_DCT  sub8x8_dct_mmx,     sub4x4_dct_mmx,   64,  8, 0, 0, 0
  SUB_NxN_DCT  sub16x16_dct_mmx,   sub8x8_dct_mmx,   64, 16, 8, 8, 0
@@ -440,7 +440,7 @@ SUB_NxN_DCT  sub16x16_dct8_sse2, sub8x8_dct8_sse2, 256, 16, 0, 0, 14
  SUB_NxN_DCT  sub16x16_dct8_sse4, sub8x8_dct8_sse4, 256, 16, 0, 0, 14
  SUB_NxN_DCT  sub16x16_dct8_avx,  sub8x8_dct8_avx,  256, 16, 0, 0, 14
  %else ; !HIGH_BIT_DEPTH
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX
  SUB_NxN_DCT  sub8x8_dct_mmx,     sub4x4_dct_mmx,   32, 4, 0, 0, 0
  ADD_NxN_IDCT add8x8_idct_mmx,    add4x4_idct_mmx,  32, 4, 0, 0
@@ -481,7 +481,7 @@ SUB_NxN_DCT  sub16x16_dct8_ssse3, sub8x8_dct8_ssse3, 128, 8, 0, 0, 11
  SUB_NxN_DCT  sub16x16_dct8_avx,   sub8x8_dct8_avx,   128, 8, 0, 0, 11
  %endif ; HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void add8x8_idct_dc( pixel *p_dst, dctcoef *dct2x2 )
  ;-----------------------------------------------------------------------------
@@ -669,7 +669,7 @@ INIT_XMM
  cglobal add16x16_idct_dc_sse2, 2,2,8
      call .loop
      add       r0, FDEC_STRIDE*4
-%ifdef WIN64
+%if WIN64
      call .loop
      RET
  %endif
@@ -701,7 +701,7 @@ cglobal add16x16_idct_dc_sse2, 2,2,8
  cglobal add16x16_idct_dc, 2,2,8
      call .loop
      add       r0, FDEC_STRIDE*4
-%ifdef WIN64
+%if WIN64
      call .loop
      RET
  %endif
@@ -769,7 +769,7 @@ ADD16x16
      psubw     m0, m1         ; d02-d13 s02-s13 d02+d13 s02+s13
  %endmacro
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_MMX
  cglobal sub8x8_dct_dc_mmx2, 3,3
      DCTDC_2ROW_MMX m0, m4, 0, 0
@@ -874,7 +874,7 @@ SUB8x16_DCT_DC
      paddw      %1, m0
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro SUB8x8_DCT_DC_10 0
  cglobal sub8x8_dct_dc, 3,3,3
      DCTDC_4ROW_SSE2 m1, 0
@@ -1042,7 +1042,7 @@ cglobal zigzag_scan_8x8_frame, 2,2,8
      RET
  %endmacro
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_XMM sse2
  SCAN_8x8
  INIT_XMM ssse3
@@ -1137,7 +1137,7 @@ cglobal zigzag_scan_8x8_frame, 2,2,8
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  SCAN_8x8_FRAME 4 , dq, qdq, dq, d
  INIT_XMM avx
@@ -1178,7 +1178,7 @@ cglobal zigzag_scan_4x4_frame, 2,2,8*(mmsize)/16
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  SCAN_4x4 4 , dq, qdq, dq
  INIT_XMM avx
@@ -1221,7 +1221,7 @@ cglobal zigzag_scan_4x4_frame, 2,2
      RET
  %endif ; !HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void zigzag_scan_4x4_field( int32_t level[16], int32_t dct[4][4] )
  ;-----------------------------------------------------------------------------
@@ -1348,7 +1348,7 @@ cglobal zigzag_scan_8x8_field, 2,3,8
      mova [r0+60*SIZEOF_DCTCOEF], m7
      RET
  %endmacro
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse4
  SCAN_8x8 d, dq, qdq, dq, 4
  INIT_XMM avx
@@ -1417,7 +1417,7 @@ cglobal zigzag_sub_4x4%1_%2, 3,3,8
      RET
  %endmacro
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_XMM ssse3
  ZIGZAG_SUB_4x4   , frame
  ZIGZAG_SUB_4x4 ac, frame
@@ -1459,7 +1459,7 @@ cglobal zigzag_interleave_8x8_cavlc, 3,3,8
      packsswb   m5, m6
      packsswb   m5, m5
      pxor       m0, m0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      packsswb   m5, m5
  %endif
      pcmpeqb    m5, m0
@@ -1471,7 +1471,7 @@ cglobal zigzag_interleave_8x8_cavlc, 3,3,8
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  ZIGZAG_8x8_CAVLC D
  INIT_XMM avx
@@ -1511,7 +1511,7 @@ ZIGZAG_8x8_CAVLC W
  %endif
  %endmacro
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro ZIGZAG_8x8_CAVLC 0
  cglobal zigzag_interleave_8x8_cavlc, 3,3,8
      INTERLEAVE_XMM  0
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm

index 5379c357ac737f0cde4238d7c7f7bc8437a23f15..7622eb69eea72afb14b6d3b0cbed982233c8f3d7 100644 (file)
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -43,7 +43,7 @@ cextern pw_4
  cextern pw_00ff
  cextern pw_pixel_max
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ; out: %4 = |%1-%2|-%3
  ; clobbers: %5
  %macro ABS_SUB 5
@@ -306,7 +306,7 @@ cglobal deblock_h_luma, 5,6,8
      RET
  %endmacro
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ; in:  m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
  ;      m12=alpha, m13=beta
  ; out: m0=p1', m3=q1', m1=p0', m2=q0'
@@ -437,7 +437,7 @@ DEBLOCK_LUMA_64
  ;     %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0
  ;     %8=mask1p %9=2 %10=p0' %11=p1' %12=p2'
  %macro LUMA_INTRA_P012 12 ; p0..p3 in memory
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      paddw     t0, %3, %2
      mova      t2, %4
      paddw     t2, %3
@@ -504,7 +504,7 @@ DEBLOCK_LUMA_64
      LOAD_AB t0, t1, r2d, r3d
      mova    %1, t0
      LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      mova    %2, t0        ; mask0
      psrlw   t3, %1, 2
  %else
@@ -601,7 +601,7 @@ DEBLOCK_LUMA_64
  %endif
  %endmacro
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
@@ -789,7 +789,7 @@ cglobal deblock_h_luma_intra, 4,7,8
      RET
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DEBLOCK_LUMA
  DEBLOCK_LUMA_INTRA
@@ -802,7 +802,7 @@ DEBLOCK_LUMA_INTRA
  %endif
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  ; expands to [base],...,[base+7*stride]
  %define PASS8ROWS(base, base3, stride, stride3) \
      [base], [base+stride], [base+stride*2], [base3], \
@@ -1010,7 +1010,7 @@ DEBLOCK_LUMA_INTRA
  ; out: %4 = |%1-%2|>%3
  ; clobbers: %5
  %macro DIFF_GT2 5
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      psubusb %5, %2, %1
      psubusb %4, %1, %2
  %else
@@ -1088,7 +1088,7 @@ DEBLOCK_LUMA_INTRA
      mova    %4, %2
  %endmacro
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
  ;-----------------------------------------------------------------------------
@@ -1143,7 +1143,7 @@ cglobal deblock_h_luma, 5,9
      lea    r8, [r7*3]
      lea    r6, [r0-4]
      lea    r5, [r0-4+r8]
-%ifdef WIN64
+%if WIN64
      sub   rsp, 0x98
      %define pix_tmp rsp+0x30
  %else
@@ -1162,7 +1162,7 @@ cglobal deblock_h_luma, 5,9
      ; don't backup r6, r5, r7, r8 because deblock_v_luma_sse2 doesn't use them
      lea    r0, [pix_tmp+0x30]
      mov    r1d, 0x10
-%ifdef WIN64
+%if WIN64
      mov    [rsp+0x20], r4
  %endif
      call   deblock_v_luma
@@ -1186,7 +1186,7 @@ cglobal deblock_h_luma, 5,9
      movq   m3, [pix_tmp+0x40]
      TRANSPOSE8x4B_STORE  PASS8ROWS(r6, r5, r7, r8)
  
-%ifdef WIN64
+%if WIN64
      add    rsp, 0x98
  %else
      add    rsp, 0x68
@@ -1324,7 +1324,7 @@ DEBLOCK_LUMA v, 16
  
  
  %macro LUMA_INTRA_P012 4 ; p0..p3 in memory
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      pavgb t0, p2, p1
      pavgb t1, p0, q0
  %else
@@ -1335,7 +1335,7 @@ DEBLOCK_LUMA v, 16
  %endif
      pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
      mova  t5, t1
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      paddb t2, p2, p1
      paddb t3, p0, q0
  %else
@@ -1353,7 +1353,7 @@ DEBLOCK_LUMA v, 16
      pand  t2, mpb_1
      psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      pavgb t1, p2, q1
      psubb t2, p2, q1
  %else
@@ -1428,7 +1428,7 @@ DEBLOCK_LUMA v, 16
      %define t1 m5
      %define t2 m6
      %define t3 m7
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define p2 m8
      %define q2 m9
      %define t4 m10
@@ -1455,7 +1455,7 @@ DEBLOCK_LUMA v, 16
  ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
  cglobal deblock_%1_luma_intra, 4,6,16
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      sub     esp, 0x60
  %endif
      lea     r4, [r1*4]
@@ -1470,7 +1470,7 @@ cglobal deblock_%1_luma_intra, 4,6,16
      mova    p0, [r4+r5]
      mova    q0, [r0]
      mova    q1, [r0+r1]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      pxor    mpb_0, mpb_0
      mova    mpb_1, [pb_1]
      LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
@@ -1506,13 +1506,13 @@ cglobal deblock_%1_luma_intra, 4,6,16
      LUMA_INTRA_SWAP_PQ
      LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
  .end:
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      add     esp, 0x60
  %endif
      RET
  
  INIT_MMX cpuname
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
  ;-----------------------------------------------------------------------------
@@ -1590,13 +1590,13 @@ INIT_XMM sse2
  DEBLOCK_LUMA_INTRA v
  INIT_XMM avx
  DEBLOCK_LUMA_INTRA v
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DEBLOCK_LUMA_INTRA v8
  %endif
  %endif ; !HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
  ; out: %1=p0', %2=q0'
  %macro CHROMA_DEBLOCK_P0_Q0_INTRA 7
@@ -1870,7 +1870,7 @@ cglobal deblock_h_chroma_422, 5,7,8
      REP_RET
  %endmacro ; DEBLOCK_CHROMA
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DEBLOCK_CHROMA
  %endif
@@ -1880,7 +1880,7 @@ INIT_XMM avx
  DEBLOCK_CHROMA
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro CHROMA_V_START 0
      dec    r2d      ; alpha-1
      dec    r3d      ; beta-1
@@ -1974,7 +1974,7 @@ INIT_XMM sse2
  DEBLOCK_CHROMA
  INIT_XMM avx
  DEBLOCK_CHROMA
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DEBLOCK_CHROMA
  %endif
@@ -2002,14 +2002,14 @@ cglobal deblock_h_chroma_mbaff, 5,7,8
  
  INIT_XMM sse2
  DEBLOCK_H_CHROMA_420_MBAFF
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DEBLOCK_H_CHROMA_420_MBAFF
  %endif
  
  %macro DEBLOCK_H_CHROMA_422 0
  cglobal deblock_h_chroma_422, 5,8,8
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define cntr r7
  %else
      %define cntr dword r0m
@@ -2127,7 +2127,7 @@ DEBLOCK_CHROMA_INTRA_BODY
  DEBLOCK_CHROMA_INTRA
  INIT_MMX mmx2
  DEBLOCK_CHROMA_INTRA_BODY
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  DEBLOCK_CHROMA_INTRA
  %endif
  
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm

index f224f0c8af7c2efc216ed8513f101074d17bcc98..4228faa8cca3581daec557bd0361a349e567e721 100644 (file)
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -58,13 +58,13 @@ cextern pd_32
  ; implicit weighted biprediction
  ;=============================================================================
  ; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
-%ifdef WIN64
+%if WIN64
      DECLARE_REG_TMP 0,1,2,3,4,5,4,5
      %macro AVG_START 0-1 0
          PROLOGUE 5,7,%1
          movsxd r5, dword r5m
      %endmacro
-%elifdef UNIX64
+%elif UNIX64
      DECLARE_REG_TMP 0,1,2,3,4,5,7,8
      %macro AVG_START 0-1 0
          PROLOGUE 6,9,%1
@@ -91,7 +91,7 @@ cextern pd_32
      REP_RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  
  %macro BIWEIGHT_MMX 2
      movh      m0, %1
@@ -157,7 +157,7 @@ cextern pd_32
      SPLATW  m3, m3   ; weight_dst,src
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro BIWEIGHT_ROW 4
      BIWEIGHT   [%2], [%3]
  %if %4==mmsize/4
@@ -196,7 +196,7 @@ cextern pd_32
  cglobal pixel_avg_weight_w%1
      BIWEIGHT_START
      AVG_START %2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova    m7, [pw_pixel_max]
  %endif
  .height_loop:
@@ -204,7 +204,7 @@ cglobal pixel_avg_weight_w%1
      BIWEIGHT [t2], [t4]
      SWAP 0, 6
      BIWEIGHT [t2+SIZEOF_PIXEL*t3], [t4+SIZEOF_PIXEL*t5]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      packssdw m6, m0
      CLIPW    m6, m5, m7
  %else ;!HIGH_BIT_DEPTH
@@ -229,7 +229,7 @@ INIT_MMX mmx2
  AVG_WEIGHT 4
  AVG_WEIGHT 8
  AVG_WEIGHT 16
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  AVG_WEIGHT 4,  8
  AVG_WEIGHT 8,  8
@@ -251,7 +251,7 @@ AVG_WEIGHT 16, 7
  ; P frame explicit weighted prediction
  ;=============================================================================
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro WEIGHT_START 1 ; (width)
      mova        m0, [r4+ 0]         ; 1<<denom
      mova        m3, [r4+16]
@@ -414,7 +414,7 @@ AVG_WEIGHT 16, 7
  ;void mc_weight_wX( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, weight_t *weight, int h )
  ;-----------------------------------------------------------------------------
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  %define NUMREGS 6
  %define LOAD_HEIGHT
  %define HEIGHT_REG r5d
@@ -427,7 +427,7 @@ AVG_WEIGHT 16, 7
  %endif
  
  %assign XMMREGS 7
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %assign NUMREGS NUMREGS+1
  %assign XMMREGS 8
  %endif
@@ -456,7 +456,7 @@ INIT_XMM sse2
  WEIGHTER  8
  WEIGHTER 16
  WEIGHTER 20
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  WEIGHTER 12
  INIT_XMM avx
  WEIGHTER  8
@@ -481,7 +481,7 @@ WEIGHTER 20
  %macro OFFSET_OP 7
      mov%6        m0, [%1]
      mov%6        m1, [%2]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      p%5usw       m0, m2
      p%5usw       m1, m2
  %ifidn %5,add
@@ -503,7 +503,7 @@ WEIGHTER 20
      OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, u, a
      %assign x (x+mmsize)
  %else
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, h, h
  %else
      OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, d, d
@@ -523,7 +523,7 @@ WEIGHTER 20
      cglobal mc_offset%2_w%1, NUMREGS, NUMREGS
      FIX_STRIDES r1, r3
      mova m2, [r4]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %ifidn %2,add
      mova m3, [pw_pixel_max]
  %endif
@@ -556,7 +556,7 @@ INIT_XMM avx
  OFFSETPN 12
  OFFSETPN 16
  OFFSETPN 20
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  OFFSETPN  8
  INIT_XMM avx
@@ -602,7 +602,7 @@ cglobal pixel_avg_w%1
  %rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize
      %2     m0, [t2+x]
      %2     m1, [t2+x+SIZEOF_PIXEL*t3]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pavgw  m0, [t4+x]
      pavgw  m1, [t4+x+SIZEOF_PIXEL*t5]
  %else ;!HIGH_BIT_DEPTH
@@ -616,7 +616,7 @@ cglobal pixel_avg_w%1
      AVG_END
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  
  INIT_MMX mmx2
  AVG_FUNC 4, movq, movq
@@ -695,7 +695,7 @@ AVGH  4,  2
  ; pixel avg2
  ;=============================================================================
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void pixel_avg2_wN( uint16_t *dst,  int dst_stride,
  ;                     uint16_t *src1, int src_stride,
@@ -879,7 +879,7 @@ cglobal pixel_avg2_w18_sse2, 6,7,6
      REP_RET
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
  ; void pixel_avg2_w4( uint8_t *dst, int dst_stride,
  ;                     uint8_t *src1, int src_stride,
@@ -1094,7 +1094,7 @@ cglobal pixel_avg2_w%1_cache%2_%3
  %endif
  %if 0 ; or %1==8 - but the extra branch seems too expensive
      ja cachesplit
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      test      r4b, 1
  %else
      test byte r4m, 1
@@ -1116,7 +1116,7 @@ cglobal pixel_avg2_w%1_cache%2_%3
  INIT_MMX
  AVG_CACHELINE_CHECK  8, 64, mmx2
  AVG_CACHELINE_CHECK 12, 64, mmx2
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  AVG_CACHELINE_CHECK 16, 64, mmx2
  AVG_CACHELINE_CHECK 20, 64, mmx2
  AVG_CACHELINE_CHECK  8, 32, mmx2
@@ -1191,7 +1191,7 @@ cglobal pixel_avg2_w16_cache64_ssse3
  %else
      lea    r6, [avg_w16_addr + r6]
  %endif
-%ifdef UNIX64
+%if UNIX64
      jmp    r6
  %else
      call   r6
@@ -1258,7 +1258,7 @@ cglobal mc_copy_w4_mmx, 4,6
      lea     r5, [r3*3]
      lea     r4, [r1*3]
      je .end
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
      %define mova movd
      %define movu movd
  %endif
@@ -1309,7 +1309,7 @@ MC_COPY 16
  ;-----------------------------------------------------------------------------
  
  %macro PREFETCH_FENC 1
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  cglobal prefetch_fenc_%1, 5,5
      FIX_STRIDES r1d, r3d
      and    r4d, 3
@@ -1397,14 +1397,14 @@ cglobal prefetch_ref, 3,3
  ; chroma MC
  ;=============================================================================
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      DECLARE_REG_TMP 6,7,8
  %else
      DECLARE_REG_TMP 0,1,2
  %endif
  
  %macro MC_CHROMA_START 1
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      PROLOGUE 0,9,%1
  %else
      PROLOGUE 0,6,%1
@@ -1424,7 +1424,7 @@ cglobal prefetch_ref, 3,3
      add       r3,  t0            ; src += (dx>>3) + (dy>>3) * src_stride
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro UNPACK_UNALIGNED 4
      movu       %1, [%4+0]
      movu       %2, [%4+4]
@@ -1461,11 +1461,11 @@ cglobal mc_chroma
      MC_CHROMA_START 0
      FIX_STRIDES r4
      and       r5d, 7
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      jz .mc1dy
  %endif
      and       t2d, 7
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      jz .mc1dx
  %endif
      shl       r5d, 16
@@ -1494,7 +1494,7 @@ cglobal mc_chroma
      pshufw     m5, m5, q1111
      jge .width4
  %else
-%ifdef WIN64
+%if WIN64
      cmp dword r7m, 4 ; flags were clobbered by WIN64_SPILL_XMM
  %endif
      pshufd     m7, m5, q1111
@@ -1503,7 +1503,7 @@ cglobal mc_chroma
      pshufd     m5, m5, q1111
      jg .width8
  %endif
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      add        r2, r2
      UNPACK_UNALIGNED m0, m1, m2, r3
  %else
@@ -1519,7 +1519,7 @@ cglobal mc_chroma
      SWAP        3, 0
  ALIGN 4
  .loop2:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      UNPACK_UNALIGNED m0, m1, m2, r3+r4
      pmullw     m3, m6
  %else ; !HIGH_BIT_DEPTH
@@ -1539,7 +1539,7 @@ ALIGN 4
      pmullw     m0, m5
      paddw      m0, m2
      psrlw      m0, 6
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movh     [r0], m0
  %if mmsize == 8
      psrlq      m0, 32
@@ -1566,7 +1566,7 @@ ALIGN 4
  
  %if mmsize==8
  .width4:
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      mov        t0, r0
      mov        t1, r1
      mov        t2, r3
@@ -1579,7 +1579,7 @@ ALIGN 4
  %endif
  %else
  .width8:
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define multy0 m8
      SWAP        8, 5
  %else
@@ -1589,7 +1589,7 @@ ALIGN 4
  %endif
      FIX_STRIDES r2
  .loopx:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      UNPACK_UNALIGNED m0, m2, m4, r3
      UNPACK_UNALIGNED m1, m3, m5, r3+mmsize
  %else
@@ -1613,7 +1613,7 @@ ALIGN 4
      add        r3, r4
  ALIGN 4
  .loop4:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      UNPACK_UNALIGNED m0, m1, m2, r3
      pmaddwd    m0, m7
      pmaddwd    m1, m7
@@ -1651,7 +1651,7 @@ ALIGN 4
      paddw      m1, m3
      psrlw      m0, 6
      psrlw      m1, 6
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movh     [r0], m0
      movh     [r0+mmsize/2], m1
  %if mmsize==8
@@ -1688,7 +1688,7 @@ ALIGN 4
      jg .width8
      REP_RET
  .width8:
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      lea        r3, [t2+8*SIZEOF_PIXEL]
      lea        r0, [t0+4*SIZEOF_PIXEL]
      lea        r1, [t1+4*SIZEOF_PIXEL]
@@ -1704,9 +1704,9 @@ ALIGN 4
      jmp .loopx
  %endif
  
-%ifdef ARCH_X86_64 ; too many regs for x86_32
+%if ARCH_X86_64 ; too many regs for x86_32
      RESET_MM_PERMUTATION
-%ifdef WIN64
+%if WIN64
  %if xmm_regs_used > 6
      %assign stack_offset stack_offset-(xmm_regs_used-6)*16-16
      %assign xmm_regs_used 6
@@ -1721,10 +1721,8 @@ ALIGN 4
      movd       m5, r5d
      mov       r6d, 2*SIZEOF_PIXEL
  .mc1d:
-%ifdef HIGH_BIT_DEPTH
-%if mmsize == 16
+%if HIGH_BIT_DEPTH && mmsize == 16
      WIN64_SPILL_XMM 8
-%endif
  %endif
      mova       m4, [pw_8]
      SPLATW     m5, m5
@@ -1742,7 +1740,7 @@ ALIGN 4
      shr       r5d, 1
  %endif
  .loop1d_w4:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if mmsize == 8
      movq       m0, [r3+0]
      movq       m2, [r3+8]
@@ -1786,7 +1784,7 @@ ALIGN 4
      paddw      m2, m3
      psrlw      m0, 3
      psrlw      m2, 3
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if mmsize == 8
      xchg       r4, r8
      xchg       r2, r7
@@ -1913,7 +1911,7 @@ cglobal mc_chroma
      pshufb     m0, m5
      movu       m1, [r3+8]
      pshufb     m1, m5
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      SWAP        8, 6
      %define  mult1 m8
  %else
@@ -1970,7 +1968,7 @@ cglobal mc_chroma
      REP_RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX mmx2
  MC_CHROMA
  INIT_XMM sse2
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm

index 56f5ba2729bedb2d85e3aa348aae0b194ecb1df5..b6714341aefc059822789565aad62d20c556d1f3 100644 (file)
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -146,14 +146,14 @@ cextern pd_ffff
  ;%define movntps movaps
  ;%define sfence
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void hpel_filter_v( uint16_t *dst, uint16_t *src, int16_t *buf, int stride, int width );
  ;-----------------------------------------------------------------------------
  %macro HPEL_FILTER 0
  cglobal hpel_filter_v, 5,6,11
      FIX_STRIDES r3d, r4d
-%ifdef WIN64
+%if WIN64
      movsxd     r4, r4d
  %endif
      lea        r5, [r1+r3]
@@ -307,13 +307,13 @@ INIT_XMM sse2
  HPEL_FILTER
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro HPEL_V 1
  ;-----------------------------------------------------------------------------
  ; void hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, int stride, int width );
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter_v, 5,6,%1
-%ifdef WIN64
+%if WIN64
      movsxd   r4, r4d
  %endif
      lea r5, [r1+r3]
@@ -455,7 +455,7 @@ cglobal hpel_filter_c, 3,3,9
  %ifnidn cpuname, sse2
      mova    m7, [pw_32]
      %define tpw_32 m7
-%elifdef ARCH_X86_64
+%elif ARCH_X86_64
      mova    m8, [pw_32]
      %define tpw_32 m8
  %else
@@ -559,7 +559,7 @@ cglobal hpel_filter_h_sse2, 3,3,8
      jl .loop
      REP_RET
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  ;-----------------------------------------------------------------------------
  ; void hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
  ;-----------------------------------------------------------------------------
@@ -604,7 +604,7 @@ INIT_XMM sse2
  HPEL_V 8
  INIT_XMM sse2, misalign
  HPEL_C
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_XMM sse2
  HPEL_C
  INIT_XMM ssse3
@@ -615,7 +615,7 @@ HPEL_C
  HPEL_V 0
  %endif
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  %macro DO_FILT_V 5
      ;The optimum prefetch distance is difficult to determine in checkasm:
      ;any prefetch seems slower than not prefetching.
@@ -729,7 +729,7 @@ HPEL_V 0
  ;                   uint8_t *src, int stride, int width, int height)
  ;-----------------------------------------------------------------------------
  cglobal hpel_filter, 7,9,16
-%ifdef WIN64
+%if WIN64
      movsxd   r4, r4d
      movsxd   r5, r5d
  %endif
@@ -862,7 +862,7 @@ cglobal plane_copy_core_mmx2, 6,7
  
  
  %macro INTERLEAVE 4-5 ; dst, srcu, srcv, is_aligned, nt_hint
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %assign x 0
  %rep 16/mmsize
      mov%4     m0, [%2+(x/2)*mmsize]
@@ -894,7 +894,7 @@ cglobal plane_copy_core_mmx2, 6,7
  %endmacro
  
  %macro DEINTERLEAVE 6 ; dstu, dstv, src, dstv==dstu+8, shuffle constant, is aligned
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %assign n 0
  %rep 16/mmsize
      mova     m0, [%3+(n+0)*mmsize]
@@ -952,7 +952,7 @@ cglobal plane_copy_core_mmx2, 6,7
  ; assumes i_dst and w are multiples of 16, and i_dst>2*w
  cglobal plane_copy_interleave_core, 7,9
      FIX_STRIDES r1d, r3d, r5d, r6d
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mov   r1m, r1d
      mov   r3m, r3d
      mov   r6m, r6d
@@ -964,7 +964,7 @@ cglobal plane_copy_interleave_core, 7,9
      lea    r0, [r0+r6*2]
      add    r2,  r6
      add    r4,  r6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      DECLARE_REG_TMP 7,8
  %else
      DECLARE_REG_TMP 1,3
@@ -1031,7 +1031,7 @@ cglobal store_interleave_chroma, 5,5
  %endmacro ; PLANE_INTERLEAVE
  
  %macro DEINTERLEAVE_START 0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova   m4, [pd_ffff]
  %elif cpuflag(ssse3)
      mova   m4, [deinterleave_shuf]
@@ -1050,7 +1050,7 @@ cglobal plane_copy_deinterleave, 6,7
      DEINTERLEAVE_START
      mov    r6d, r6m
      FIX_STRIDES r1d, r3d, r5d, r6d
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mov    r6m, r6d
  %endif
      movsxdifnidn r1, r1d
@@ -1105,7 +1105,7 @@ cglobal load_deinterleave_chroma_fdec, 4,4
      REP_RET
  %endmacro ; PLANE_DEINTERLEAVE
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX mmx2
  PLANE_INTERLEAVE
  INIT_MMX mmx
@@ -1221,7 +1221,7 @@ MEMZERO
  
  
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
  ; void integral_init4h( uint16_t *sum, uint8_t *pix, int stride )
  ;-----------------------------------------------------------------------------
@@ -1481,12 +1481,12 @@ cglobal integral_init4v_ssse3, 3,5
  ;-----------------------------------------------------------------------------
  %macro FRAME_INIT_LOWRES 0
  cglobal frame_init_lowres_core, 6,7,(12-4*(BIT_DEPTH/9)) ; 8 for HIGH_BIT_DEPTH, 12 otherwise
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      shl   dword r6m, 1
      FIX_STRIDES r5d
      shl   dword r7m, 1
  %endif
-%ifdef WIN64
+%if WIN64
      movsxd    r5, r5d
  %endif
      ; src += 2*(height-1)*stride + 2*width
@@ -1514,7 +1514,7 @@ cglobal frame_init_lowres_core, 6,7,(12-4*(BIT_DEPTH/9)) ; 8 for HIGH_BIT_DEPTH,
      shl      r6d, 1
      PUSH      r6
      %define src_gap [rsp]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pcmpeqw   m7, m7
      psrld     m7, 16
  .vloop:
@@ -1622,7 +1622,7 @@ cglobal frame_init_lowres_core, 6,7,(12-4*(BIT_DEPTH/9)) ; 8 for HIGH_BIT_DEPTH,
  
  INIT_MMX mmx2
  FRAME_INIT_LOWRES
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX cache32, mmx2
  FRAME_INIT_LOWRES
  %endif
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm

index 74fc7e26a06a9cc576d36bd233e1b9e30da0f7af..b0aa65956a720249f03beae744474868438b7f31 100644 (file)
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -142,7 +142,7 @@ cextern hsub_mul
  ; SSD
  ;=============================================================================
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; int pixel_ssd_MxN( uint16_t *, int, uint16_t *, int )
  ;-----------------------------------------------------------------------------
@@ -227,7 +227,7 @@ SSD_ONE    16,  8
  SSD_ONE    16, 16
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro SSD_LOAD_FULL 5
      mova      m1, [t0+%1]
      mova      m2, [t2+%2]
@@ -377,7 +377,7 @@ cglobal pixel_ssd_%1x%2, 0,0,0
  %else
  
  .startloop:
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      DECLARE_REG_TMP 0,1,2,3
      PROLOGUE 0,0,8
  %else
@@ -477,7 +477,7 @@ SSD  8,  4
  ; For 10-bit MMX this means width >= 16416 and for XMM >= 32832. At sane
  ; distortion levels it will take much more than that though.
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro SSD_NV12 0
  cglobal pixel_ssd_nv12_core, 6,7,7
      shl        r4d, 2
@@ -558,7 +558,7 @@ cglobal pixel_ssd_nv12_core, 6,7,7
  %endmacro ; SSD_NV12
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
  ; void pixel_ssd_nv12_core( uint8_t *pixuv1, int stride1, uint8_t *pixuv2, int stride2,
  ;                           int width, int height, uint64_t *ssd_u, uint64_t *ssd_v )
@@ -623,7 +623,7 @@ SSD_NV12
  %macro VAR_START 1
      pxor  m5, m5    ; sum
      pxor  m6, m6    ; sum squared
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %if %1
      mova  m7, [pw_00ff]
  %else
@@ -633,7 +633,7 @@ SSD_NV12
  %endmacro
  
  %macro VAR_END 2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if mmsize == 8 && %1*%2 == 256
      HADDUW  m5, m2
  %else
@@ -645,7 +645,7 @@ SSD_NV12
      movd   eax, m5
      HADDD   m6, m1
      movd   edx, m6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      shl    rdx, 32
      add    rax, rdx
  %endif
@@ -670,7 +670,7 @@ SSD_NV12
  %macro VAR_2ROW 2
      mov      r2d, %2
  .loop:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova      m0, [r0]
      mova      m1, [r0+mmsize]
      mova      m3, [r0+%1]
@@ -687,7 +687,7 @@ SSD_NV12
  %else
      add       r0, r1
  %endif
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
      punpcklbw m3, m7
      punpckhbw m4, m7
  %endif ; !HIGH_BIT_DEPTH
@@ -718,7 +718,7 @@ cglobal pixel_var_8x8, 2,3
      VAR_2ROW r1, 4
      VAR_END 8, 8
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro VAR 0
  cglobal pixel_var_16x16, 2,3,8
      FIX_STRIDES r1
@@ -751,7 +751,7 @@ INIT_XMM xop
  VAR
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro VAR 0
  cglobal pixel_var_16x16, 2,3,8
      VAR_START 1
@@ -828,7 +828,7 @@ cglobal pixel_var2_8x%1, 5,6
      VAR_START 0
      mov      r5d, %1
  .loop:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova      m0, [r0]
      mova      m1, [r0+mmsize]
      psubw     m0, [r2]
@@ -858,7 +858,7 @@ cglobal pixel_var2_8x%1, 5,6
      VAR2_END %2
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  VAR2_8x8_MMX  8, 6
  VAR2_8x8_MMX 16, 7
@@ -869,7 +869,7 @@ cglobal pixel_var2_8x%1, 5,6,8
      VAR_START 1
      mov      r5d, %1/2
  .loop:
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova      m0, [r0]
      mova      m1, [r0+r1*2]
      mova      m2, [r2]
@@ -900,7 +900,7 @@ INIT_XMM sse2
  VAR2_8x8_SSE2  8, 6
  VAR2_8x8_SSE2 16, 7
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro VAR2_8x8_SSSE3 2
  cglobal pixel_var2_8x%1, 5,6,8
      pxor      m5, m5    ; sum
@@ -1110,7 +1110,7 @@ VAR2_8x8_SSSE3 16, 7
  %endmacro
  
  %macro SATD_END_MMX 0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      HADDUW      m0, m1
      movd       eax, m0
  %else ; !HIGH_BIT_DEPTH
@@ -1154,7 +1154,7 @@ pixel_satd_8x4_internal_mmx2:
      paddw        m0, m1
      ret
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro SATD_MxN_MMX 3
  cglobal pixel_satd_%1x%2, 4,7
      SATD_START_MMX
@@ -1182,7 +1182,7 @@ SATD_MxN_MMX 16,  8, 4
  SATD_MxN_MMX  8, 16, 8
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  cglobal pixel_satd_16x16, 4,6
      SATD_START_MMX
      pxor   m0, m0
@@ -1266,8 +1266,8 @@ cglobal pixel_satd_4x4, 4,6
  %endmacro
  
  %macro BACKUP_POINTERS 0
-%ifdef ARCH_X86_64
-%ifdef WIN64
+%if ARCH_X86_64
+%if WIN64
      PUSH r7
  %endif
      mov     r6, r0
@@ -1276,10 +1276,10 @@ cglobal pixel_satd_4x4, 4,6
  %endmacro
  
  %macro RESTORE_AND_INC_POINTERS 0
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      lea     r0, [r6+8]
      lea     r2, [r7+8]
-%ifdef WIN64
+%if WIN64
      POP r7
  %endif
  %else
@@ -1386,7 +1386,7 @@ cglobal pixel_satd_8x8_internal
      SATD_8x4_SSE cpuname, 0, 1, 2, 3, 4, 5, 6
      ret
  
-%ifdef UNIX64 ; 16x8 regresses on phenom win64, 16x16 is almost the same
+%if UNIX64 ; 16x8 regresses on phenom win64, 16x16 is almost the same
  cglobal pixel_satd_16x4_internal
      LOAD_SUMSUB_16x4P 0, 1, 2, 3, 4, 8, 5, 9, 6, 7, r0, r2, 11
      lea  r2, [r2+4*r3]
@@ -1452,14 +1452,14 @@ cglobal pixel_satd_8x4, 4,6,8
  %endmacro ; SATDS_SSE2
  
  %macro SA8D_INTER 0
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define lh m10
      %define rh m0
  %else
      %define lh m0
      %define rh [esp+48]
  %endif
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      HADDUW  m0, m1
      paddd   lh, rh
  %else
@@ -1468,13 +1468,13 @@ cglobal pixel_satd_8x4, 4,6,8
  %endmacro
  
  %macro SA8D 0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      %define vertical 1
  %else ; sse2 doesn't seem to like the horizontal way of doing things
      %define vertical (cpuflags == cpuflags_sse2)
  %endif
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; int pixel_sa8d_8x8( uint8_t *, int, uint8_t *, int )
  ;-----------------------------------------------------------------------------
@@ -1502,7 +1502,7 @@ cglobal pixel_sa8d_8x8, 4,8,12
      mova m7, [hmul_8p]
  %endif
      call pixel_sa8d_8x8_internal
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      HADDUW m0, m1
  %else
      HADDW m0, m1
@@ -1522,7 +1522,7 @@ cglobal pixel_sa8d_16x16, 4,8,12
      call pixel_sa8d_8x8_internal ; pix[0]
      add  r2, 8*SIZEOF_PIXEL
      add  r0, 8*SIZEOF_PIXEL
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      HADDUW m0, m1
  %endif
      mova m10, m0
@@ -1537,7 +1537,7 @@ cglobal pixel_sa8d_16x16, 4,8,12
      call pixel_sa8d_8x8_internal ; pix[8*stride]
      SA8D_INTER
      SWAP 0, 10
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
      HADDUW m0, m1
  %endif
      movd eax, m0
@@ -1606,7 +1606,7 @@ cglobal pixel_sa8d_8x8, 4,7
      lea    r4, [3*r1]
      lea    r5, [3*r3]
      call pixel_sa8d_8x8_internal
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      HADDUW m0, m1
  %else
      HADDW  m0, m1
@@ -1629,7 +1629,7 @@ cglobal pixel_sa8d_16x16, 4,7
      lea  r0, [r0+4*r1]
      lea  r2, [r2+4*r3]
  %endif
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      HADDUW m0, m1
  %endif
      mova [esp+48], m0
@@ -1649,7 +1649,7 @@ cglobal pixel_sa8d_16x16, 4,7
  %endif
      mova [esp+64-mmsize], m0
      call pixel_sa8d_8x8_internal
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      SA8D_INTER
  %else ; !HIGH_BIT_DEPTH
      paddusw m0, [esp+64-mmsize]
@@ -1694,7 +1694,7 @@ cglobal pixel_sa8d_16x16, 4,7
  ; intra_sa8d_x3_8x8 and intra_satd_x3_4x4 are obsoleted by x9 on ssse3+,
  ; and are only retained for old cpus.
  %macro INTRA_SA8D_SSE2 0
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; void intra_sa8d_x3_8x8( uint8_t *fenc, uint8_t edge[36], int *res )
  ;-----------------------------------------------------------------------------
@@ -1800,7 +1800,7 @@ cglobal intra_sa8d_x3_8x8, 3,3,14
  INIT_MMX
  cglobal hadamard_load
  ; not really a global, but otherwise cycles get attributed to the wrong function in profiling
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova        m0, [r0+0*FENC_STRIDEB]
      mova        m1, [r0+1*FENC_STRIDEB]
      mova        m2, [r0+2*FENC_STRIDEB]
@@ -1822,7 +1822,7 @@ cglobal hadamard_load
  
  %macro SCALAR_HADAMARD 4-5 ; direction, offset, 3x tmp
  %ifidn %1, top
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova        %3, [r1+%2*SIZEOF_PIXEL-FDEC_STRIDEB]
  %else
      movd        %3, [r1+%2*SIZEOF_PIXEL-FDEC_STRIDEB]
@@ -1837,7 +1837,7 @@ cglobal hadamard_load
      pinsrw      %3, [r1+%2*SIZEOF_PIXEL-2+0*FDEC_STRIDEB], 0
      pinsrw      %3, [r1+%2*SIZEOF_PIXEL-2+2*FDEC_STRIDEB], 2
      pinsrw      %3, [r1+%2*SIZEOF_PIXEL-2+3*FDEC_STRIDEB], 3
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
      psrlw       %3, 8
  %endif
  %ifnidn %2, 0
@@ -1913,7 +1913,7 @@ cglobal hadamard_load
  ; void intra_satd_x3_4x4( uint8_t *fenc, uint8_t *fdec, int *res )
  ;-----------------------------------------------------------------------------
  cglobal intra_satd_x3_4x4, 3,3
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      ; stack is 16 byte aligned because abi says so
      %define  top_1d  rsp-8  ; size 8
      %define  left_1d rsp-16 ; size 8
@@ -1943,7 +1943,7 @@ cglobal intra_satd_x3_4x4, 3,3
      movd        [r2+0], m0 ; i4x4_v satd
      movd        [r2+4], m4 ; i4x4_h satd
      movd        [r2+8], m5 ; i4x4_dc satd
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      ADD         esp, 16
  %endif
      RET
@@ -1964,7 +1964,7 @@ cglobal intra_satd_x3_16x16, 0,5
      mova [sums+ 0], m7
      mova [sums+ 8], m7
      mova [sums+16], m7
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova [sums+24], m7
      mova [sums+32], m7
      mova [sums+40], m7
@@ -2006,7 +2006,7 @@ cglobal intra_satd_x3_16x16, 0,5
      add         r0, 4*SIZEOF_PIXEL
      inc         r4
      jl  .loop_x
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova        m7, [pw_1]
      pmaddwd     m4, m7
      pmaddwd     m0, m7
@@ -2031,7 +2031,7 @@ cglobal intra_satd_x3_16x16, 0,5
  
  ; horizontal sum
      movifnidn   r2, r2mp
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova        m1, m5
      paddd       m5, m3
      HADDD       m5, m7 ; DC satd
@@ -2058,7 +2058,7 @@ cglobal intra_satd_x3_16x16, 0,5
      ADD        rsp, stack_pad
      RET
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define  t0 r6
  %else
      %define  t0 r2
@@ -2142,7 +2142,7 @@ cglobal intra_satd_x3_8x8c, 0,6
      movq        m1, [sums+8]
      movq        m2, [sums+16]
      movq        m7, m0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      psrlq       m7, 16
      HADDW       m7, m3
      SUM_MM_X3   m0, m1, m2, m3, m4, m5, m6, paddd
@@ -2365,7 +2365,7 @@ cglobal intra_sad_x9_4x4, 3,4,9
      %assign pad 0xc0-gprsize-(stack_offset&15)
      %define pred_buf rsp
      sub       rsp, pad
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      INTRA_X9_PRED intrax9a, m8
  %else
      INTRA_X9_PRED intrax9a, [rsp+0xa0]
@@ -2400,7 +2400,7 @@ cglobal intra_sad_x9_4x4, 3,4,9
      paddd      m2, m3
      paddd      m4, m5
      paddd      m6, m7
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      SWAP        7, 8
      pxor       m8, m8
      %define %%zero m8
@@ -2440,7 +2440,7 @@ cglobal intra_sad_x9_4x4, 3,4,9
      RET
  %endif ; cpuflag
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; int intra_satd_x9_4x4( uint8_t *fenc, uint8_t *fdec, uint16_t *bitcosts )
  ;-----------------------------------------------------------------------------
@@ -2652,7 +2652,7 @@ cglobal intra_sad_x9_8x8, 5,6,9
      %define fenc13 m5
      %define fenc46 m6
      %define fenc57 m7
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define tmp m8
      %assign padbase 0x0
  %else
@@ -3008,7 +3008,7 @@ cglobal intra_sad_x9_8x8, 5,6,9
      ADD       rsp, pad
      RET
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  ;-----------------------------------------------------------------------------
  ; int intra_sa8d_x9_8x8( uint8_t *fenc, uint8_t *fdec, uint8_t edge[36], uint16_t *bitcosts, uint16_t *satds )
  ;-----------------------------------------------------------------------------
@@ -3309,7 +3309,7 @@ ALIGN 16
  ; out: [tmp]=hadamard4, m0=satd
  INIT_MMX mmx2
  cglobal hadamard_ac_4x4
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova      m0, [r0]
      mova      m1, [r0+r1]
      mova      m2, [r0+r1*2]
@@ -3351,7 +3351,7 @@ cglobal hadamard_ac_2x2max
      ABSW2 m1, m3, m1, m3, m4, m5
      HADAMARD 0, max, 0, 2, 4, 5
      HADAMARD 0, max, 1, 3, 4, 5
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pmaddwd   m0, m7
      pmaddwd   m1, m7
      paddd     m6, m0
@@ -3364,13 +3364,13 @@ cglobal hadamard_ac_2x2max
      ret
  
  %macro AC_PREP 2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pmaddwd %1, %2
  %endif
  %endmacro
  
  %macro AC_PADD 3
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      AC_PREP %2, %3
      paddd   %1, %2
  %else
@@ -3380,7 +3380,7 @@ cglobal hadamard_ac_2x2max
  
  cglobal hadamard_ac_8x8
      mova      m6, [mask_ac4]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova      m7, [pw_1]
  %else
      pxor      m7, m7
@@ -3402,7 +3402,7 @@ cglobal hadamard_ac_8x8
      AC_PADD   m5, m0, m7
      sub       r3, 40
      mova [rsp+gprsize+8], m5 ; save satd
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pxor      m6, m6
  %endif
  %rep 3
@@ -3417,7 +3417,7 @@ cglobal hadamard_ac_8x8
      ABSW2 m1, m3, m1, m3, m4, m5
      ABSW2 m0, m2, m0, m2, m4, m5
      HADAMARD 0, max, 1, 3, 4, 5
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pand      m0, [mask_ac4]
      pmaddwd   m1, m7
      pmaddwd   m0, m7
@@ -3441,7 +3441,7 @@ cglobal hadamard_ac_8x8
  
  %macro HADAMARD_AC_WXH_SUM_MMX 2
      mova    m1, [rsp+1*mmsize]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if %1*%2 >= 128
      paddd   m0, [rsp+2*mmsize]
      paddd   m1, [rsp+3*mmsize]
@@ -3514,7 +3514,7 @@ cglobal pixel_hadamard_ac_%1x%2, 2,4
      movd edx, m0
      movd eax, m1
      shr  edx, 1
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      shl  rdx, 32
      add  rax, rdx
  %endif
@@ -3528,7 +3528,7 @@ HADAMARD_AC_WXH_MMX 16,  8
  HADAMARD_AC_WXH_MMX  8,  8
  
  %macro LOAD_INC_8x4W_SSE2 5
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movu      m%1, [r0]
      movu      m%2, [r0+r1]
      movu      m%3, [r0+r1*2]
@@ -3563,7 +3563,7 @@ HADAMARD_AC_WXH_MMX  8,  8
  ; in:  r0=pix, r1=stride, r2=stride*3
  ; out: [esp+16]=sa8d, [esp+32]=satd, r0+=stride*4
  cglobal hadamard_ac_8x8
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define spill0 m8
      %define spill1 m9
      %define spill2 m10
@@ -3572,7 +3572,7 @@ cglobal hadamard_ac_8x8
      %define spill1 [rsp+gprsize+16]
      %define spill2 [rsp+gprsize+32]
  %endif
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      %define vertical 1
  %elif cpuflag(ssse3)
      %define vertical 0
@@ -3657,7 +3657,7 @@ cglobal hadamard_ac_8x8
      AC_PREP   m2, [pw_1]
      AC_PADD   m2, m3, [pw_1]
      AC_PADD   m2, m1, [pw_1]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      paddd     m2, m2
  %else
      paddw     m2, m2
@@ -3680,7 +3680,7 @@ HADAMARD_AC_WXH_SSE2  8,  8
  
  %macro HADAMARD_AC_WXH_SUM_SSE2 2
      mova    m1, [rsp+2*mmsize]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if %1*%2 >= 128
      paddd   m0, [rsp+3*mmsize]
      paddd   m1, [rsp+4*mmsize]
@@ -3743,7 +3743,7 @@ cglobal pixel_hadamard_ac_%1x%2, 2,3,11
      movd eax, m1
      shr  edx, 2 - (%1*%2 >> 8)
      shr  eax, 1
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      shl  rdx, 32
      add  rax, rdx
  %endif
@@ -3753,7 +3753,7 @@ cglobal pixel_hadamard_ac_%1x%2, 2,3,11
  
  ; instantiate satds
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  cextern pixel_sa8d_8x8_internal_mmx2
  INIT_MMX mmx2
  SA8D
@@ -3770,7 +3770,7 @@ SA8D
  INIT_XMM sse2
  SA8D
  SATDS_SSE2
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INTRA_SA8D_SSE2
  %endif
  INIT_MMX mmx2
@@ -3780,7 +3780,7 @@ HADAMARD_AC_SSE2
  
  %define DIFFOP DIFF_SUMSUB_SSSE3
  %define LOAD_DUP_4x8P LOAD_DUP_4x8P_CONROE
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %define LOAD_INC_8x4W LOAD_INC_8x4W_SSSE3
  %define LOAD_SUMSUB_8x4P LOAD_SUMSUB_8x4P_SSSE3
  %define LOAD_SUMSUB_16P  LOAD_SUMSUB_16P_SSSE3
@@ -3789,14 +3789,14 @@ INIT_XMM ssse3
  SATDS_SSE2
  SA8D
  HADAMARD_AC_SSE2
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INTRA_X9
  INTRA8_X9
  %endif
  %undef movdqa ; nehalem doesn't like movaps
  %undef movdqu ; movups
  %undef punpcklqdq ; or movlhps
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_MMX ssse3
  INTRA_X3_MMX
  %endif
@@ -3807,7 +3807,7 @@ INIT_XMM sse4
  SATDS_SSE2
  SA8D
  HADAMARD_AC_SSE2
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INTRA_X9
  INTRA8_X9
  %endif
@@ -3815,7 +3815,7 @@ INTRA8_X9
  INIT_XMM avx
  SATDS_SSE2
  SA8D
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INTRA_X9
  INTRA8_X9
  %endif
@@ -3825,7 +3825,7 @@ HADAMARD_AC_SSE2
  INIT_XMM xop
  SATDS_SSE2
  SA8D
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INTRA_X9
  ; no xop INTRA8_X9. it's slower than avx on bulldozer. dunno why.
  %endif
@@ -3840,7 +3840,7 @@ HADAMARD_AC_SSE2
  ;                             const uint8_t *pix2, int stride2, int sums[2][4] )
  ;-----------------------------------------------------------------------------
  %macro SSIM_ITER 1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movdqu    m5, [r0+(%1&1)*r1]
      movdqu    m6, [r2+(%1&1)*r3]
  %else
@@ -3891,7 +3891,7 @@ cglobal pixel_ssim_4x4x2_core, 4,4,8
      punpckhdq m5, m3, m4
      punpckldq m3, m4
  
-%ifdef UNIX64
+%if UNIX64
      %define t0 r4
  %else
      %define t0 rax
@@ -3984,7 +3984,7 @@ cglobal pixel_ssim_end4, 3,3,7
      addps     m0, m4
      pshuflw   m4, m0, q0032
      addss     m0, m4
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      movd     r0m, m0
      fld     dword r0m
  %endif
@@ -4001,7 +4001,7 @@ SSIM
  ;=============================================================================
  
  %macro ADS_START 0
-%ifdef WIN64
+%if WIN64
      movsxd  r5,  r5d
  %endif
      mov     r0d, r5d
@@ -4109,7 +4109,7 @@ cglobal pixel_ads4, 6,7,12
      punpcklqdq xmm6, xmm6
      punpckhqdq xmm5, xmm5
      punpckhqdq xmm4, xmm4
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      pshuflw xmm8, r6m, 0
      punpcklqdq xmm8, xmm8
      ADS_START
@@ -4264,7 +4264,7 @@ ALIGN 16
      jge .end
  .loopi:
      mov     r2,  [r6+r1]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      test    r2,  r2
  %else
      mov     r3,  r2
@@ -4276,7 +4276,7 @@ ALIGN 16
      TEST 1
      TEST 2
      TEST 3
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      shr     r2,  32
  %else
      mov     r2d, [r6+r1]
diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm

index a58e99944ab71356851aad95342164f27970f69c..460ecb75e04c05a991d940a47aa08c8ac490612f 100644 (file)
--- a/common/x86/predict-a.asm
+++ b/common/x86/predict-a.asm
@@ -163,7 +163,7 @@ cextern pw_pixel_max
  ; dest, left, right, src, tmp
  ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
  %macro PRED8x8_LOWPASS 4-5
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      paddw       %2, %3
      psrlw       %2, 1
      pavgw       %1, %4, %2
@@ -185,7 +185,7 @@ cglobal predict_4x4_ddl, 1,1
      movu    m1, [r0-FDEC_STRIDEB]
      PSLLPIX m2, m1, 1
      mova    m0, m1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      PSRLPIX m1, m1, 1
      pshufhw m1, m1, q2210
  %else
@@ -205,7 +205,7 @@ cglobal predict_4x4_ddl, 1,1
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_4x4_DDL
  INIT_XMM avx
@@ -234,7 +234,7 @@ PREDICT_4x4_DDL
  ;-----------------------------------------------------------------------------
  ; void predict_4x4_vr( pixel *src )
  ;-----------------------------------------------------------------------------
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_MMX ssse3
  cglobal predict_4x4_vr, 1,1
      movd    m1, [r0-1*FDEC_STRIDEB]        ; ........t3t2t1t0
@@ -264,7 +264,7 @@ cglobal predict_4x4_vr, 1,1
  ;-----------------------------------------------------------------------------
  %macro PREDICT_4x4 4
  cglobal predict_4x4_ddr, 1,1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movu      m2, [r0-1*FDEC_STRIDEB-8]
      pinsrw    m2, [r0+0*FDEC_STRIDEB-2], 2
      pinsrw    m2, [r0+1*FDEC_STRIDEB-2], 1
@@ -305,7 +305,7 @@ cglobal predict_4x4_ddr, 1,1
  ; void predict_4x4_vr( pixel *src )
  ;-----------------------------------------------------------------------------
  cglobal predict_4x4_vr, 1,1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movu      m1, [r0-1*FDEC_STRIDEB-8]
      pinsrw    m1, [r0+0*FDEC_STRIDEB-2], 2
      pinsrw    m1, [r0+1*FDEC_STRIDEB-2], 1
@@ -345,7 +345,7 @@ cglobal predict_4x4_vr, 1,1
  ; void predict_4x4_hd( pixel *src )
  ;-----------------------------------------------------------------------------
  cglobal predict_4x4_hd, 1,1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movu      m1, [r0-1*FDEC_STRIDEB-8]
      PSLLPIX   m1, m1, 1
      pinsrw    m1, [r0+0*FDEC_STRIDEB-2], 3
@@ -384,7 +384,7 @@ cglobal predict_4x4_hd, 1,1
  ;-----------------------------------------------------------------------------
  ; void predict_4x4_ddr( pixel *src )
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX mmx2
  cglobal predict_4x4_ddr, 1,1
      mova      m0, [r0+1*FDEC_STRIDEB-8]
@@ -468,7 +468,7 @@ PREDICT_4x4 b, bw, wd, dq
  ;-----------------------------------------------------------------------------
  ; void predict_4x4_hu( pixel *src )
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX
  cglobal predict_4x4_hu_mmx2, 1,1
      movq      m0, [r0+0*FDEC_STRIDEB-8]
@@ -541,7 +541,7 @@ cglobal predict_4x4_vl, 1,1
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_4x4_V1 w
  INIT_XMM avx
@@ -584,7 +584,7 @@ PREDICT_4x4_V1 b
  ; void predict_4x4_dc( pixel *src )
  ;-----------------------------------------------------------------------------
  INIT_MMX mmx2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  cglobal predict_4x4_dc, 1,1
      mova   m2, [r0+0*FDEC_STRIDEB-4*SIZEOF_PIXEL]
      paddw  m2, [r0+1*FDEC_STRIDEB-4*SIZEOF_PIXEL]
@@ -633,7 +633,7 @@ cglobal predict_4x4_dc, 1,4
  cglobal predict_8x8_filter, 4,6,6
      add          r0, 0x58*SIZEOF_PIXEL
  %define src r0-0x58*SIZEOF_PIXEL
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      mov          r4, r1
  %define t1 r4
  %define t4 r1
@@ -735,7 +735,7 @@ INIT_XMM cpuname
  %endif
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_FILTER w, d, q, dq
  INIT_XMM ssse3
@@ -759,7 +759,7 @@ cglobal predict_8x8_v, 2,2
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_V
  %else
@@ -786,7 +786,7 @@ cglobal predict_8x8_h, 2,2
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_H wd, D
  %else
@@ -797,7 +797,7 @@ PREDICT_8x8_H bw, W
  ;-----------------------------------------------------------------------------
  ; void predict_8x8_dc( pixel *src, pixel *edge );
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  cglobal predict_8x8_dc, 2,2
      movu        m0, [r1+14]
@@ -829,7 +829,7 @@ cglobal predict_8x8_dc, 2,2
  ; void predict_8x8_dc_top ( pixel *src, pixel *edge );
  ; void predict_8x8_dc_left( pixel *src, pixel *edge );
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro PREDICT_8x8_DC 3
  cglobal %1, 2,2
      %3          m0, [r1+%2]
@@ -928,14 +928,14 @@ cglobal predict_8x8_ddr, 2,2,7
      RET
  %endmacro ; PREDICT_8x8_DDLR
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_DDLR
  INIT_XMM ssse3
  PREDICT_8x8_DDLR
  INIT_XMM ssse3, cache64
  PREDICT_8x8_DDLR
-%elifndef ARCH_X86_64
+%elif ARCH_X86_64 == 0
  INIT_MMX mmx2
  PREDICT_8x8_DDLR
  %endif
@@ -946,7 +946,7 @@ PREDICT_8x8_DDLR
  %macro PREDICT_8x8_HU 2
  cglobal predict_8x8_hu, 2,2,8
      add       r0, 4*FDEC_STRIDEB
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if cpuflag(ssse3)
      movu      m5, [r1+7*SIZEOF_PIXEL]
      pshufb    m5, [pw_reverse]
@@ -1000,14 +1000,14 @@ cglobal predict_8x8_hu, 2,2,8
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_HU d, wd
  INIT_XMM ssse3
  PREDICT_8x8_HU d, wd
  INIT_XMM avx
  PREDICT_8x8_HU d, wd
-%elifndef ARCH_X86_64
+%elif ARCH_X86_64 == 0
  INIT_MMX mmx2
  PREDICT_8x8_HU w, bw
  %endif
@@ -1049,20 +1049,20 @@ cglobal predict_8x8_vr, 2,3
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_VR w
  INIT_XMM ssse3
  PREDICT_8x8_VR w
  INIT_XMM avx
  PREDICT_8x8_VR w
-%elifndef ARCH_X86_64
+%elif ARCH_X86_64 == 0
  INIT_MMX mmx2
  PREDICT_8x8_VR b
  %endif
  
  %macro LOAD_PLANE_ARGS 0
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      movd        mm0, r1d
      movd        mm2, r2d
      movd        mm4, r3d
@@ -1079,8 +1079,7 @@ PREDICT_8x8_VR b
  ;-----------------------------------------------------------------------------
  ; void predict_8x8c_p_core( uint8_t *src, int i00, int b, int c )
  ;-----------------------------------------------------------------------------
-%ifndef ARCH_X86_64
-%ifndef HIGH_BIT_DEPTH
+%if ARCH_X86_64 == 0 && HIGH_BIT_DEPTH == 0
  %macro PREDICT_CHROMA_P_MMX 1
  cglobal predict_8x%1c_p_core, 1,2
      LOAD_PLANE_ARGS
@@ -1110,11 +1109,10 @@ ALIGN 4
  INIT_MMX mmx2
  PREDICT_CHROMA_P_MMX 8
  PREDICT_CHROMA_P_MMX 16
-%endif ; !HIGH_BIT_DEPTH
-%endif ; !ARCH_X86_64
+%endif ; !ARCH_X86_64 && !HIGH_BIT_DEPTH
  
  %macro PREDICT_CHROMA_P_XMM 1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  cglobal predict_8x%1c_p_core, 1,2,7
      movd        m0, r1m
      movd        m2, r2m
@@ -1188,7 +1186,7 @@ PREDICT_CHROMA_P_XMM 16
  ;-----------------------------------------------------------------------------
  ; void predict_16x16_p_core( uint8_t *src, int i00, int b, int c )
  ;-----------------------------------------------------------------------------
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  cglobal predict_16x16_p_core, 1,2
      LOAD_PLANE_ARGS
@@ -1240,7 +1238,7 @@ cglobal predict_16x16_p_core, 1,2,8
      SPLATW   m2, m2, 0
      pmullw   m3, m1, [pw_76543210]
      psllw    m1, 3
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pxor     m6, m6
      mov     r1d, 16
  .loop:
@@ -1289,12 +1287,12 @@ ALIGN 4
  
  INIT_XMM sse2
  PREDICT_16x16_P
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_XMM avx
  PREDICT_16x16_P
  %endif
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro PREDICT_8x8 0
  ;-----------------------------------------------------------------------------
  ; void predict_8x8_ddl( uint8_t *src, uint8_t *edge )
@@ -1454,7 +1452,7 @@ cglobal predict_8x8_vl, 2,2,8
      mova         [r0+3*FDEC_STRIDEB], m1
      RET
  %endmacro
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_VL_10 w
  INIT_XMM ssse3
@@ -1508,7 +1506,7 @@ cglobal predict_8x8_hd, 2,2
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8_HD w, wd
  INIT_XMM ssse3
@@ -1552,7 +1550,7 @@ INIT_XMM avx
  PREDICT_8x8_HD
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  ;-----------------------------------------------------------------------------
  ; void predict_8x8_hu( uint8_t *src, uint8_t *edge )
  ;-----------------------------------------------------------------------------
@@ -1631,7 +1629,7 @@ cglobal predict_8x8c_v, 1,1
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x8C_V
  %else
@@ -1639,7 +1637,7 @@ INIT_MMX mmx
  PREDICT_8x8C_V
  %endif
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  
  INIT_MMX
  cglobal predict_8x8c_v_mmx, 1,1
@@ -1665,7 +1663,7 @@ cglobal predict_8x16c_v, 1,1
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  PREDICT_8x16C_V
  %else
@@ -1676,7 +1674,7 @@ PREDICT_8x16C_V
  ;-----------------------------------------------------------------------------
  ; void predict_8x8c_h( uint8_t *src )
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  
  %macro PREDICT_C_H 1
  cglobal predict_8x%1c_h, 1,1
@@ -1754,7 +1752,7 @@ PREDICT_C_H 16
  %macro PREDICT_8x8C_DC 0
  cglobal predict_8x8c_dc, 1,3
      pxor      m7, m7
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movq      m0, [r0-FDEC_STRIDEB+0]
      movq      m1, [r0-FDEC_STRIDEB+8]
      HADDW     m0, m2
@@ -1780,7 +1778,7 @@ cglobal predict_8x8c_dc, 1,3
      paddw     m0, m3
      psrlw     m0, 2
      pavgw     m0, m7            ; s0+s2, s1, s3, s1+s3
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if cpuflag(sse2)
      movq2dq   xmm0, m0
      punpcklwd xmm0, xmm0
@@ -1824,12 +1822,12 @@ cglobal predict_8x8c_dc, 1,3
  
  INIT_MMX mmx2
  PREDICT_8x8C_DC
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX sse2
  PREDICT_8x8C_DC
  %endif
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro STORE_4LINES 3
  %if cpuflag(sse2)
      movdqa [r0+FDEC_STRIDEB*(%3-4)], %1
@@ -1859,7 +1857,7 @@ PREDICT_8x8C_DC
  %macro PREDICT_8x16C_DC 0
  cglobal predict_8x16c_dc, 1,3
      pxor      m7, m7
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movq      m0, [r0-FDEC_STRIDEB+0]
      movq      m1, [r0-FDEC_STRIDEB+8]
      HADDW     m0, m2
@@ -1894,7 +1892,7 @@ cglobal predict_8x16c_dc, 1,3
      psrlw     m1, 2
      pavgw     m0, m7
      pavgw     m1, m7
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %if cpuflag(sse2)
      movq2dq xmm0, m0
      movq2dq xmm1, m1
@@ -1942,13 +1940,13 @@ cglobal predict_8x16c_dc, 1,3
  
  INIT_MMX mmx2
  PREDICT_8x16C_DC
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX sse2
  PREDICT_8x16C_DC
  %endif
  
  %macro PREDICT_C_DC_TOP 1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM
  cglobal predict_8x%1c_dc_top_sse2, 1,1
      pxor        m2, m2
@@ -1990,7 +1988,7 @@ PREDICT_C_DC_TOP 16
  ;-----------------------------------------------------------------------------
  ; void predict_16x16_v( pixel *src )
  ;-----------------------------------------------------------------------------
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_MMX
  cglobal predict_16x16_v_mmx2, 1,2
      mova        m0, [r0 - FDEC_STRIDEB+ 0]
@@ -2025,7 +2023,7 @@ cglobal predict_16x16_v_sse2, 1,1
  %macro PREDICT_16x16_H 0
  cglobal predict_16x16_h, 1,2
      mov r1, 12*FDEC_STRIDEB
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  .vloop:
  %assign Y 0
  %rep 4
@@ -2063,7 +2061,7 @@ cglobal predict_16x16_h, 1,2
  INIT_MMX mmx2
  PREDICT_16x16_H
  INIT_XMM sse2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  PREDICT_16x16_H
  %else
  ;no SSE2 for 8-bit, it's slower than MMX on all systems that don't support SSSE3
@@ -2076,7 +2074,7 @@ PREDICT_16x16_H
  ;-----------------------------------------------------------------------------
  
  %macro PRED16x16_DC 2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova       m0, [r0 - FDEC_STRIDEB+ 0]
      paddw      m0, [r0 - FDEC_STRIDEB+ 8]
      paddw      m0, [r0 - FDEC_STRIDEB+16]
@@ -2102,7 +2100,7 @@ PREDICT_16x16_H
  
  INIT_MMX mmx2
  cglobal predict_16x16_dc_core, 1,2
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      movd         m6, r1d
      PRED16x16_DC m6, 5
  %else
@@ -2116,7 +2114,7 @@ cglobal predict_16x16_dc_top, 1,2
      REP_RET
  
  INIT_MMX mmx2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  cglobal predict_16x16_dc_left_core, 1,2
      movd       m0, r1m
      SPLATW     m0, m0
@@ -2136,7 +2134,7 @@ cglobal predict_16x16_dc_left_core, 1,1
  ;-----------------------------------------------------------------------------
  
  %macro PRED16x16_DC_SSE2 2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova       m0, [r0 - FDEC_STRIDEB+ 0]
      paddw      m0, [r0 - FDEC_STRIDEB+16]
      HADDW      m0, m2
@@ -2168,7 +2166,7 @@ cglobal predict_16x16_dc_top, 1,2
      REP_RET
  
  INIT_XMM sse2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  cglobal predict_16x16_dc_left_core, 1,2
      movd       m0, r1m
      SPLATW     m0, m0
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm

index 550776e5a90c39a3bf33f8160fea0f9a51025cdf..6f8a9239c1d5627378257d5f725f5ad77694f6ee 100644 (file)
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -86,7 +86,7 @@ cextern pd_1024
  %macro QUANT_DC_START 0
      movd       m6, r1m     ; mf
      movd       m7, r2m     ; bias
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      SPLATD     m6, m6
      SPLATD     m7, m7
  %elif cpuflag(sse4) ; ssse3, but not faster on conroe
@@ -106,7 +106,7 @@ cextern pd_1024
      setne     al
  %else ; !sse4
      xor      eax, eax
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  %if mmsize == 16
      packsswb  m5, m5
  %endif
@@ -128,7 +128,7 @@ cextern pd_1024
  %endif ; cpuflag
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro QUANT_ONE_DC 4
  %if cpuflag(sse4)
      mova        m0, [%1]
@@ -276,7 +276,7 @@ QUANT_AC 8, 8
  
  %endif ; HIGH_BIT_DEPTH
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  %macro QUANT_ONE 4
  ;;; %1      (m64)       dct[y][x]
  ;;; %2      (m64/mmx)   mf[y][x] or mf[0][0] (as uint16_t)
@@ -342,7 +342,7 @@ cglobal %1, 3,3
  
  INIT_MMX mmx2
  QUANT_DC quant_2x2_dc, 1
-%ifndef ARCH_X86_64 ; not needed because sse2 is faster
+%if ARCH_X86_64 == 0 ; not needed because sse2 is faster
  QUANT_DC quant_4x4_dc, 4
  INIT_MMX mmx
  QUANT_AC quant_4x4, 4
@@ -380,7 +380,7 @@ QUANT_AC quant_8x8, 8
  ;;; %2,%3   dequant_mf[i_mf][y][x]
  ;;; m2      i_qbits
      mova     m0, %2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pmaddwd  m0, %1
      pslld    m0, m2
  %else
@@ -398,7 +398,7 @@ QUANT_AC quant_8x8, 8
  ;;; m3      f
  ;;; m4      0
      mova      m0, %1
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pmadcswd   m0, m0, %2, m3
      psrad     m0, m2
  %else
@@ -446,9 +446,9 @@ QUANT_AC quant_8x8, 8
  %endrep
  %endmacro
  
-%ifdef WIN64
+%if WIN64
      DECLARE_REG_TMP 6,3,2
-%elifdef ARCH_X86_64
+%elif ARCH_X86_64
      DECLARE_REG_TMP 4,3,2
  %else
      DECLARE_REG_TMP 2,0,1
@@ -462,7 +462,7 @@ QUANT_AC quant_8x8, 8
      sub  t2d, t1d
      sub  t2d, t1d   ; i_mf = i_qp % 6
      shl  t2d, %1
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      add  r1, t2     ; dequant_mf[i_mf]
  %else
      add  r1, r1mp   ; dequant_mf[i_mf]
@@ -493,8 +493,7 @@ cglobal dequant_%1x%1, 0,3,6
      psrld m3, 1
      DEQUANT_LOOP DEQUANT32_R, %1*%1/4, %3
  
-%ifndef HIGH_BIT_DEPTH
-%if notcpuflag(avx)
+%if HIGH_BIT_DEPTH == 0 && notcpuflag(avx)
  cglobal dequant_%1x%1_flat16, 0,3
      movifnidn t2d, r2m
  %if %1 == 8
@@ -534,11 +533,10 @@ cglobal dequant_%1x%1_flat16, 0,3
      DEQUANT16_FLAT [r1+32], 32, 96
  %endif
      RET
-%endif ; !AVX
-%endif ; !HIGH_BIT_DEPTH
+%endif ; !HIGH_BIT_DEPTH && !AVX
  %endmacro ; DEQUANT
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  DEQUANT 4, 4, 1
  DEQUANT 8, 6, 1
@@ -546,7 +544,7 @@ INIT_XMM xop
  DEQUANT 4, 4, 1
  DEQUANT 8, 6, 1
  %else
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx
  DEQUANT 4, 4, 1
  DEQUANT 8, 6, 1
@@ -592,7 +590,7 @@ cglobal dequant_4x4dc, 0,3,6
      psrld m4, 1
      movd  m2, [r1]
  %assign x 0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      pshufd m2, m2, 0
  %rep SIZEOF_PIXEL*32/mmsize
      mova      m0, [r0+x]
@@ -621,13 +619,13 @@ cglobal dequant_4x4dc, 0,3,6
      RET
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  INIT_XMM sse2
  DEQUANT_DC d, pmaddwd
  INIT_XMM xop
  DEQUANT_DC d, pmaddwd
  %else
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DEQUANT_DC w, pmullw
  %endif
@@ -638,7 +636,7 @@ DEQUANT_DC w, pmullw
  %endif
  
  ; t4 is eax for return value.
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      DECLARE_REG_TMP 0,1,2,3,6,4  ; Identical for both Windows and *NIX
  %else
      DECLARE_REG_TMP 4,1,2,3,0,5
@@ -653,7 +651,7 @@ DEQUANT_DC w, pmullw
  %if cpuflag(sse4)
      %assign %%regs %%regs-1
  %endif
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      %assign %%regs %%regs+1      ; t0-t4 are volatile on x86-64
  %endif
  cglobal optimize_chroma_2x2_dc, 0,%%regs,7
@@ -748,7 +746,7 @@ cglobal optimize_chroma_2x2_dc, 0,%%regs,7
      REP_RET
  %endmacro
  
-%ifndef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH == 0
  INIT_XMM sse2
  OPTIMIZE_CHROMA_2x2_DC
  INIT_XMM ssse3
@@ -759,7 +757,7 @@ INIT_XMM avx
  OPTIMIZE_CHROMA_2x2_DC
  %endif ; !HIGH_BIT_DEPTH
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  ;-----------------------------------------------------------------------------
  ; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
  ;-----------------------------------------------------------------------------
@@ -792,7 +790,7 @@ cglobal denoise_dct, 4,4,8
      REP_RET
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx
  DENOISE_DCT
  %endif
@@ -839,7 +837,7 @@ cglobal denoise_dct, 4,4,7
      REP_RET
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx
  DENOISE_DCT
  %endif
@@ -858,7 +856,7 @@ DENOISE_DCT
  
  %macro DECIMATE_MASK 5
  %if mmsize==16
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movdqa   xmm0, [%3+ 0]
      movdqa   xmm1, [%3+32]
      packssdw xmm0, [%3+16]
@@ -876,7 +874,7 @@ DENOISE_DCT
      pmovmskb %2, xmm0
  
  %else ; mmsize==8
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movq      mm0, [%3+ 0]
      movq      mm1, [%3+16]
      movq      mm2, [%3+32]
@@ -966,7 +964,7 @@ cglobal decimate_score%1, 1,3
  
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DECIMATE4x4 15
  DECIMATE4x4 16
@@ -989,7 +987,7 @@ DECIMATE4x4 16
  
  %macro DECIMATE8x8 0
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  cglobal decimate_score64, 1,5
  %ifdef PIC
      lea r4, [decimate_table8]
@@ -1087,7 +1085,7 @@ cglobal decimate_score64, 1,5
  
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  DECIMATE8x8
  %endif
@@ -1118,7 +1116,7 @@ DECIMATE8x8
  %endif
  %endmacro
  
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %macro LAST_MASK 3-4
  %if %1 == 4
      movq     mm0, [%3]
@@ -1196,7 +1194,7 @@ cglobal coeff_last8, 1,3
      RET
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  COEFF_LAST8
  %endif
@@ -1236,7 +1234,7 @@ COEFF_LAST8
  %endmacro
  
  %macro COEFF_LAST48 0
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
  cglobal coeff_last4, 1,1
      BSR  rax, [r0], 0x3f
      shr  eax, 4
@@ -1285,7 +1283,7 @@ cglobal coeff_last16, 1,3
      BSR eax, r1d, 0x1f
      RET
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  cglobal coeff_last64, 1, 5-mmsize/16
      pxor m2, m2
      LAST_MASK 16, r2d, r0+SIZEOF_DCTCOEF* 32, r4d
@@ -1324,7 +1322,7 @@ cglobal coeff_last64, 1,4
  %endif
  %endmacro
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX mmx2
  COEFF_LAST
  %endif
@@ -1338,9 +1336,9 @@ COEFF_LAST
  ;-----------------------------------------------------------------------------
  
  ; t6 = eax for return, t3 = ecx for shift, t[01] = r[01] for x86_64 args
-%ifdef WIN64
+%if WIN64
      DECLARE_REG_TMP 3,1,2,0,4,5,6
-%elifdef ARCH_X86_64
+%elif ARCH_X86_64
      DECLARE_REG_TMP 0,1,2,3,4,5,6
  %else
      DECLARE_REG_TMP 6,3,2,1,4,5,0
@@ -1371,7 +1369,7 @@ cglobal coeff_level_run%1,0,7
      mov   [t1], t4d
  .loop:
      LZCOUNT t3d, t5d, 0x1f
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mov    t2d, [t0+t4*4]
      mov   [t1+t6+8+16*4], t3b
      mov   [t1+t6*4+ 8], t2d
@@ -1389,20 +1387,20 @@ cglobal coeff_level_run%1,0,7
  %endmacro
  
  INIT_MMX mmx2
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  COEFF_LEVELRUN 15
  COEFF_LEVELRUN 16
  %endif
  COEFF_LEVELRUN 4
  COEFF_LEVELRUN 8
  INIT_XMM sse2
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  COEFF_LEVELRUN 8
  %endif
  COEFF_LEVELRUN 15
  COEFF_LEVELRUN 16
  INIT_XMM sse2, lzcnt
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  COEFF_LEVELRUN 8
  %endif
  COEFF_LEVELRUN 15
diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm

index 15309eaf520222a4f3652e44d94e98066df984ca..58f4273ec6801da94d9e31ee280ec01ad77076dd 100644 (file)
--- a/common/x86/sad-a.asm
+++ b/common/x86/sad-a.asm
@@ -260,7 +260,7 @@ cglobal pixel_sad_8x16_sse2, 4,4
  ; void pixel_vsad( pixel *src, int stride );
  ;-----------------------------------------------------------------------------
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  INIT_MMX
  cglobal pixel_vsad_mmx2, 3,3
      mova      m0, [r0]
@@ -843,7 +843,7 @@ INTRA_SAD16
  %endmacro
  
  %macro SAD_X3_END 0
-%ifdef UNIX64
+%if UNIX64
      movd    [r5+0], mm0
      movd    [r5+4], mm1
      movd    [r5+8], mm2
@@ -871,7 +871,7 @@ INTRA_SAD16
  ;-----------------------------------------------------------------------------
  %macro SAD_X 3
  cglobal pixel_sad_x%1_%2x%3_mmx2, %1+2, %1+2
-%ifdef WIN64
+%if WIN64
      %assign i %1+1
      movsxd r %+ i, r %+ i %+ d
  %endif
@@ -1013,7 +1013,7 @@ SAD_X 4,  4,  4
      movq    xmm7, [r0]
      movq    xmm4, [r1]
      movq    xmm5, [r2]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      movq    xmm6, [r3]
      movq    xmm8, [r4]
      movhps  xmm7, [r0+FENC_STRIDE]
@@ -1090,7 +1090,7 @@ SAD_X 4,  4,  4
      movu   xmm4, [r1+%2]
      movu   xmm5, [r2+%2]
      movu   xmm6, [r3+%2]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      movu   xmm8, [r4+%2]
      psadbw xmm4, xmm7
      psadbw xmm5, xmm7
@@ -1160,7 +1160,7 @@ SAD_X 4,  4,  4
      paddw   xmm0, xmm4
      paddw   xmm1, xmm5
      paddw   xmm2, xmm6
-%ifdef UNIX64
+%if UNIX64
      movd [r5+0], xmm0
      movd [r5+4], xmm1
      movd [r5+8], xmm2
@@ -1194,7 +1194,7 @@ SAD_X 4,  4,  4
  ;-----------------------------------------------------------------------------
  %macro SAD_X_SSE2 3
  cglobal pixel_sad_x%1_%2x%3, 2+%1,2+%1,9
-%ifdef WIN64
+%if WIN64
      %assign i %1+1
      movsxd r %+ i, r %+ i %+ d
  %endif
@@ -1416,9 +1416,9 @@ cglobal pixel_sad_x3_%1x%2_cache%3_%6
      CHECK_SPLIT r3m, %1, %3
      jmp pixel_sad_x3_%1x%2_%4
  .split:
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      PROLOGUE 6,9
-%ifdef WIN64
+%if WIN64
      movsxd r4, r4d
      sub  rsp, 8
  %endif
@@ -1431,7 +1431,7 @@ cglobal pixel_sad_x3_%1x%2_cache%3_%6
      mov  r8, r5
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8], eax
-%ifdef WIN64
+%if WIN64
      mov  r2, [rsp]
  %else
      pop  r2
@@ -1439,7 +1439,7 @@ cglobal pixel_sad_x3_%1x%2_cache%3_%6
      mov  r0, r7
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8+4], eax
-%ifdef WIN64
+%if WIN64
      mov  r2, [rsp+8]
  %else
      pop  r2
@@ -1447,7 +1447,7 @@ cglobal pixel_sad_x3_%1x%2_cache%3_%6
      mov  r0, r7
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8+8], eax
-%ifdef WIN64
+%if WIN64
      add  rsp, 24
  %endif
      RET
@@ -1482,10 +1482,10 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
      CHECK_SPLIT r4m, %1, %3
      jmp pixel_sad_x4_%1x%2_%4
  .split:
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      PROLOGUE 6,9
      mov  r8,  r6mp
-%ifdef WIN64
+%if WIN64
      movsxd r5, r5d
  %endif
      push r4
@@ -1497,7 +1497,7 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
      mov  r7, r0
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8], eax
-%ifdef WIN64
+%if WIN64
      mov  r2, [rsp]
  %else
      pop  r2
@@ -1505,7 +1505,7 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
      mov  r0, r7
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8+4], eax
-%ifdef WIN64
+%if WIN64
      mov  r2, [rsp+8]
  %else
      pop  r2
@@ -1513,7 +1513,7 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
      mov  r0, r7
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8+8], eax
-%ifdef WIN64
+%if WIN64
      mov  r2, [rsp+16]
  %else
      pop  r2
@@ -1521,7 +1521,7 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
      mov  r0, r7
      call pixel_sad_%1x%2_cache%3_%5
      mov  [r8+12], eax
-%ifdef WIN64
+%if WIN64
      add  rsp, 24
  %endif
      RET
@@ -1561,7 +1561,7 @@ cglobal pixel_sad_x4_%1x%2_cache%3_%6
  ; instantiate the aligned sads
  
  INIT_MMX
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  SAD16_CACHELINE_FUNC_MMX2  8, 32
  SAD16_CACHELINE_FUNC_MMX2 16, 32
  SAD8_CACHELINE_FUNC_MMX2   4, 32
@@ -1574,7 +1574,7 @@ SAD8_CACHELINE_FUNC_MMX2   4, 64
  SAD8_CACHELINE_FUNC_MMX2   8, 64
  SAD8_CACHELINE_FUNC_MMX2  16, 64
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  SADX34_CACHELINE_FUNC 16, 16, 32, mmx2, mmx2, mmx2
  SADX34_CACHELINE_FUNC 16,  8, 32, mmx2, mmx2, mmx2
  SADX34_CACHELINE_FUNC  8, 16, 32, mmx2, mmx2, mmx2
@@ -1585,7 +1585,7 @@ SADX34_CACHELINE_FUNC 16,  8, 64, mmx2, mmx2, mmx2
  SADX34_CACHELINE_FUNC  8, 16, 64, mmx2, mmx2, mmx2
  SADX34_CACHELINE_FUNC  8,  8, 64, mmx2, mmx2, mmx2
  
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
  SAD16_CACHELINE_FUNC sse2, 8
  SAD16_CACHELINE_FUNC sse2, 16
  %assign i 1
diff --git a/common/x86/sad16-a.asm b/common/x86/sad16-a.asm

index dffe2a913c5b3a4415e285d4258844d79313f427..39f7259817976064c85a69f23b86fda61869dfa2 100644 (file)
--- a/common/x86/sad16-a.asm
+++ b/common/x86/sad16-a.asm
@@ -236,7 +236,7 @@ SAD_XMM  8,  8
      HADDW    m1, m4
      HADDW    m2, m5
  %endif
-%ifdef UNIX64
+%if UNIX64
      movd [r5+0], m0
      movd [r5+4], m1
      movd [r5+8], m2
@@ -408,7 +408,7 @@ PIXEL_VSAD
  cglobal pixel_sad_x%1_%2x%3, 6,7,XMM_REGS
      %assign regnum %1+1
      %xdefine STRIDE r %+ regnum
-%ifdef WIN64
+%if WIN64
      movsxd STRIDE, STRIDE %+ d
  %endif
      mov     r6, %3/2-1
diff --git a/common/x86/trellis-64.asm b/common/x86/trellis-64.asm

index 15438357fb433ba1874cf505c75753d9ff95e1e6..2511ca00f151dec4607653cbd93b168e8019ca26 100644 (file)
--- a/common/x86/trellis-64.asm
+++ b/common/x86/trellis-64.asm
@@ -86,7 +86,7 @@ SECTION .text
  %if cpuflag(ssse3)
      pabsd   m%1, m%1
      pmuludq m%1, m%1
-%elifdef HIGH_BIT_DEPTH
+%elif HIGH_BIT_DEPTH
      ABSD    m%2, m%1
      SWAP     %1, %2
      pmuludq m%1, m%1
@@ -113,7 +113,7 @@ cglobal %1, 4,15,9
      %assign pad 96 + level_tree_size + 16*SIZEOF_NODE + 16-gprsize-(stack_offset&15)
      SUB  rsp, pad
      DEFINE_ARGS unquant_mf, zigzag, lambda2, ii, orig_coefs, quant_coefs, dct, cabac_state_sig, cabac_state_last
-%ifdef WIN64
+%if WIN64
      %define level_statem rsp+stack_offset+80 ; r9m, except that we need to index into it (and r10m) as an array
  %else
      %define level_statem rsp+stack_offset+32
@@ -137,7 +137,7 @@ cglobal %1, 4,15,9
      %define zigzagm   [stack+8]
      mov     last_nnzm, iid
      mov     zigzagm,   zigzagq
-%ifndef WIN64
+%if WIN64 == 0
      %define orig_coefsm  [stack+16]
      %define quant_coefsm [stack+24]
      mov     orig_coefsm,  orig_coefsq
@@ -232,7 +232,7 @@ cglobal %1, 4,15,9
      movzx   r0, word [level_tree + r0*4]
      psrld   m0, 16
      movd    m1, [dctq + r2*SIZEOF_DCTCOEF]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      psignd  m0, m1
      movd [dctq + r2*SIZEOF_DCTCOEF], m0
  %else
@@ -242,7 +242,7 @@ cglobal %1, 4,15,9
  %endif
  %else
      mov    r5d, [level_tree + r0*4]
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mov    r4d, dword [dctq + r2*SIZEOF_DCTCOEF]
  %else
      movsx  r4d, word [dctq + r2*SIZEOF_DCTCOEF]
@@ -252,7 +252,7 @@ cglobal %1, 4,15,9
      shr    r5d, 16
      xor    r5d, r4d
      sub    r5d, r4d
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mov  [dctq + r2*SIZEOF_DCTCOEF], r5d
  %else
      mov  [dctq + r2*SIZEOF_DCTCOEF], r5w
@@ -271,7 +271,7 @@ cglobal %1, 4,15,9
      pxor       m0, m0
      mova [r10+ 0], m0
      mova [r10+16], m0
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova [r10+32], m0
      mova [r10+48], m0
  %endif
@@ -285,7 +285,7 @@ cglobal %1, 4,15,9
  .i_loop%1:
      ; if( !quant_coefs[i] )
      mov   r6, quant_coefsm
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mov   abs_leveld, dword [r6 + iiq*SIZEOF_DCTCOEF]
  %else
      movsx abs_leveld, word [r6 + iiq*SIZEOF_DCTCOEF]
@@ -332,7 +332,7 @@ cglobal %1, 4,15,9
      movzx   zigzagid, byte [zigzagq+iiq]
      movd    m0, abs_leveld
      mov     r6, orig_coefsm
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movd    m1, [r6 + zigzagiq*SIZEOF_DCTCOEF]
  %else
      movd    m1, [r6 + zigzagiq*SIZEOF_DCTCOEF - 2]
@@ -433,7 +433,7 @@ cglobal %1, 4,15,9
      ; int psy_weight = dct_weight_tab[zigzag[i]] * h->mb.i_psy_trellis;
      ; ssd1[k] -= psy_weight * psy_value;
      mov     r6, fenc_dctm
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      movd    m3, [r6 + zigzagiq*SIZEOF_DCTCOEF]
  %else
      movd    m3, [r6 + zigzagiq*SIZEOF_DCTCOEF - 2]
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm

index dd864415a908890f784928b30f2959872f100da7..53e8ba8fe8d83fad64679730e6c6fdc310952473 100644 (file)
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -36,11 +36,13 @@
  
  %define program_name x264
  
-%ifdef ARCH_X86_64
+%define WIN64  0
+%define UNIX64 0
+%if ARCH_X86_64
      %ifidn __OUTPUT_FORMAT__,win32
-        %define WIN64
+        %define WIN64  1
      %else
-        %define UNIX64
+        %define UNIX64 1
      %endif
  %endif
  
@@ -80,9 +82,9 @@
      %endif
  %endmacro
  
-%ifdef WIN64
+%if WIN64
      %define PIC
-%elifndef ARCH_X86_64
+%elif ARCH_X86_64 == 0
  ; x86_32 doesn't require PIC.
  ; Some distros prefer shared objects to be PIC, but nothing breaks if
  ; the code contains a few textrels, so we'll skip that complexity.
@@ -136,7 +138,7 @@ CPU intelnop
      %if %0 == 5
          %define r%1m  %3
          %define r%1mp %2
-    %elifdef ARCH_X86_64 ; memory
+    %elif ARCH_X86_64 ; memory
          %define r%1m [rsp + stack_offset + %6]
          %define r%1mp qword r %+ %1m
      %else
@@ -155,7 +157,7 @@ CPU intelnop
      %define e%1w %1
      %define r%1b %2
      %define e%1b %2
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
      %define r%1  e%1
  %endif
  %endmacro
@@ -191,7 +193,7 @@ DECLARE_REG_SIZE bp, bpl
  
  DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
  
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      %define gprsize 8
  %else
      %define gprsize 4
@@ -297,7 +299,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
      %assign n_arg_names %0
  %endmacro
  
-%ifdef WIN64 ; Windows x64 ;=================================================
+%if WIN64 ; Windows x64 ;=================================================
  
  DECLARE_REG 0,  rcx, ecx,  cx,   cl
  DECLARE_REG 1,  rdx, edx,  dx,   dl
@@ -373,7 +375,7 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
      %endif
  %endmacro
  
-%elifdef ARCH_X86_64 ; *nix x64 ;=============================================
+%elif ARCH_X86_64 ; *nix x64 ;=============================================
  
  DECLARE_REG 0,  rdi, edi,  di,   dil
  DECLARE_REG 1,  rsi, esi,  si,   sil
@@ -462,7 +464,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
  
  %endif ;======================================================================
  
-%ifndef WIN64
+%if WIN64 == 0
  %macro WIN64_SPILL_XMM 1
  %endmacro
  %macro WIN64_RESTORE_XMM 1
@@ -622,7 +624,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
      %define RESET_MM_PERMUTATION INIT_XMM %1
      %define mmsize 16
      %define num_mmregs 8
-    %ifdef ARCH_X86_64
+    %if ARCH_X86_64
      %define num_mmregs 16
      %endif
      %define mova movdqa
@@ -643,7 +645,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
      %define RESET_MM_PERMUTATION INIT_YMM %1
      %define mmsize 32
      %define num_mmregs 8
-    %ifdef ARCH_X86_64
+    %if ARCH_X86_64
      %define num_mmregs 16
      %endif
      %define mova vmovaps
diff --git a/common/x86/x86util.asm b/common/x86/x86util.asm

index c9848afa72e2610e840bc5ccc6d01f6f89c687f9..f35bb6990f85bcd9f21955d503d18bdd5be145d8 100644 (file)
--- a/common/x86/x86util.asm
+++ b/common/x86/x86util.asm
@@ -30,7 +30,7 @@
  %assign SIZEOF_PIXEL 1
  %assign SIZEOF_DCTCOEF 2
  %define pixel byte
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      %assign SIZEOF_PIXEL 2
      %assign SIZEOF_DCTCOEF 4
      %define pixel word
@@ -42,7 +42,7 @@
  %assign PIXEL_MAX ((1 << BIT_DEPTH)-1)
  
  %macro FIX_STRIDES 1-*
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
  %rep %0
      add %1, %1
      %rotate 1
@@ -95,7 +95,7 @@
  %endmacro
  
  %macro TRANSPOSE8x8W 9-11
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
      SBUTTERFLY wd,  %1, %2, %9
      SBUTTERFLY wd,  %3, %4, %9
      SBUTTERFLY wd,  %5, %6, %9
@@ -676,7 +676,7 @@
  
  
  %macro LOAD_DIFF 5
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      mova       %1, %4
      psubw      %1, %5
  %elifidn %3, none
@@ -768,7 +768,7 @@
  ; (high depth) in: %1, %2, min to clip, max to clip, mem128
  ; in: %1, tmp, %3, mem64
  %macro STORE_DIFF 4-5
-%ifdef HIGH_BIT_DEPTH
+%if HIGH_BIT_DEPTH
      psrad      %1, 6
      psrad      %2, 6
      packssdw   %1, %2
diff --git a/configure b/configure

index a86bd1cb000b49b21636c08cfde1520a99bc70cd..db7ad0e8431c5a069c0d52cd57029bb1b46b1060 100755 (executable)
--- a/configure
+++ b/configure
@@ -958,7 +958,9 @@ fi
  
  if [ "$bit_depth" -gt "8" ]; then
      define HIGH_BIT_DEPTH
-    ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH"
+    ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1"
+else
+    ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=0"
  fi
  
  if [ "$chroma_format" != "all" ]; then
diff --git a/tools/checkasm-a.asm b/tools/checkasm-a.asm

index 9c6e4ebcc3110954c847366a77d01cfe42f64c48..a0b85fac294fde086cea8d92a0f5b1ad04dfb805 100644 (file)
--- a/tools/checkasm-a.asm
+++ b/tools/checkasm-a.asm
@@ -29,7 +29,7 @@ SECTION_RODATA
  
  error_message: db "failed to preserve register", 0
  
-%ifdef WIN64
+%if WIN64
  ; just random numbers to reduce the chance of incidental match
  ALIGN 16
  x6:  ddq 0x79445c159ce790641a1b2550a612b48c
@@ -60,7 +60,7 @@ cextern_naked puts
  ; (max_args % 4) must equal 3 for stack alignment
  %define max_args 15
  
-%ifdef WIN64
+%if WIN64
  
  ;-----------------------------------------------------------------------------
  ; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
@@ -117,7 +117,7 @@ cglobal checkasm_call, 4,15,16
      ADD  rsp, max_args*8
      RET
  
-%elifndef ARCH_X86_64
+%elif ARCH_X86_64 == 0
  
  ; just random numbers to reduce the chance of incidental match
  %define n3 dword 0x6549315c
author	Ronald S. Bultje <rsbultje@gmail.com>
	Wed, 25 Jan 2012 05:53:59 +0000 (13:53 +0800)
committer	Fiona Glaser <fiona@x264.com>
	Sat, 4 Feb 2012 15:18:19 +0000 (07:18 -0800)
Makefile		patch \| blob \| history
common/x86/cabac-a.asm		patch \| blob \| history
common/x86/cpu-a.asm		patch \| blob \| history
common/x86/dct-32.asm		patch \| blob \| history
common/x86/dct-64.asm		patch \| blob \| history
common/x86/dct-a.asm		patch \| blob \| history
common/x86/deblock-a.asm		patch \| blob \| history
common/x86/mc-a.asm		patch \| blob \| history
common/x86/mc-a2.asm		patch \| blob \| history
common/x86/pixel-a.asm		patch \| blob \| history
common/x86/predict-a.asm		patch \| blob \| history
common/x86/quant-a.asm		patch \| blob \| history
common/x86/sad-a.asm		patch \| blob \| history
common/x86/sad16-a.asm		patch \| blob \| history
common/x86/trellis-64.asm		patch \| blob \| history
common/x86/x86inc.asm		patch \| blob \| history
common/x86/x86util.asm		patch \| blob \| history
configure		patch \| blob \| history
tools/checkasm-a.asm		patch \| blob \| history